数据归一化

import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
data = np.arange(36)
data = data.reshape(6,6)
data = pd.DataFrame(data)
data
0 1 2 3 4 5
0 0 1 2 3 4 5
1 6 7 8 9 10 11
2 12 13 14 15 16 17
3 18 19 20 21 22 23
4 24 25 26 27 28 29
5 30 31 32 33 34 35
sclar = MinMaxScaler(feature_range=(0,1)).fit(data)
result = sclar.transform(data)
result
array([[0. , 0. , 0. , 0. , 0. , 0. ],
       [0.2, 0.2, 0.2, 0.2, 0.2, 0.2],
       [0.4, 0.4, 0.4, 0.4, 0.4, 0.4],
       [0.6, 0.6, 0.6, 0.6, 0.6, 0.6],
       [0.8, 0.8, 0.8, 0.8, 0.8, 0.8],
       [1. , 1. , 1. , 1. , 1. , 1. ]])
sclar_ = sclar.inverse_transform(result)
sclar_
array([[ 0.,  1.,  2.,  3.,  4.,  5.],
       [ 6.,  7.,  8.,  9., 10., 11.],
       [12., 13., 14., 15., 16., 17.],
       [18., 19., 20., 21., 22., 23.],
       [24., 25., 26., 27., 28., 29.],
       [30., 31., 32., 33., 34., 35.]])

使用 np 实现数据归一化

data = np.arange(36)
data = data.reshape(6,6)
data = pd.DataFrame(data)
data
0 1 2 3 4 5
0 0 1 2 3 4 5
1 6 7 8 9 10 11
2 12 13 14 15 16 17
3 18 19 20 21 22 23
4 24 25 26 27 28 29
5 30 31 32 33 34 35
x_nor = (data - data.min(axis=0)) / (data.max(axis=0) - data.min(axis=0))
x_nor
0 1 2 3 4 5
0 0.0 0.0 0.0 0.0 0.0 0.0
1 0.2 0.2 0.2 0.2 0.2 0.2
2 0.4 0.4 0.4 0.4 0.4 0.4
3 0.6 0.6 0.6 0.6 0.6 0.6
4 0.8 0.8 0.8 0.8 0.8 0.8
5 1.0 1.0 1.0 1.0 1.0 1.0
x_nor_inverse = x_nor * (data.max(axis=0) - data.min(axis=0)) + data.min(axis=0)
x_nor_inverse
0 1 2 3 4 5
0 0.0 1.0 2.0 3.0 4.0 5.0
1 6.0 7.0 8.0 9.0 10.0 11.0
2 12.0 13.0 14.0 15.0 16.0 17.0
3 18.0 19.0 20.0 21.0 22.0 23.0
4 24.0 25.0 26.0 27.0 28.0 29.0
5 30.0 31.0 32.0 33.0 34.0 35.0

数据标准化

data = np.arange(36)
data = data.reshape(6,6)
data = pd.DataFrame(data)
data
0 1 2 3 4 5
0 0 1 2 3 4 5
1 6 7 8 9 10 11
2 12 13 14 15 16 17
3 18 19 20 21 22 23
4 24 25 26 27 28 29
5 30 31 32 33 34 35
from sklearn.preprocessing import StandardScaler
std = StandardScaler().fit(data)
result1 = std.transform(data)
result1
array([[-1.46385011, -1.46385011, -1.46385011, -1.46385011, -1.46385011,
        -1.46385011],
       [-0.87831007, -0.87831007, -0.87831007, -0.87831007, -0.87831007,
        -0.87831007],
       [-0.29277002, -0.29277002, -0.29277002, -0.29277002, -0.29277002,
        -0.29277002],
       [ 0.29277002,  0.29277002,  0.29277002,  0.29277002,  0.29277002,
         0.29277002],
       [ 0.87831007,  0.87831007,  0.87831007,  0.87831007,  0.87831007,
         0.87831007],
       [ 1.46385011,  1.46385011,  1.46385011,  1.46385011,  1.46385011,
         1.46385011]])
result1.mean()
-3.700743415417188e-17
result1.var()
1.0000000000000002
std.inverse_transform(result1)
array([[ 0.,  1.,  2.,  3.,  4.,  5.],
       [ 6.,  7.,  8.,  9., 10., 11.],
       [12., 13., 14., 15., 16., 17.],
       [18., 19., 20., 21., 22., 23.],
       [24., 25., 26., 27., 28., 29.],
       [30., 31., 32., 33., 34., 35.]])