from sklearn.preprocessing import StandardScaler, MinMaxScaler
import sklearn
import numpy as np
np.random.seed(10)
print sklearn.__version__
print np.__version__
0.19.1 1.14.2
rows = 5
column = 2
data = np.random.rand(rows, column)
def py_standardisation(X):
rows, features = X.shape
data = np.zeros((rows, ), dtype='float')
for f in xrange(features):
X[:, f] = (X[:,f] - X[:,f].mean(axis=0)) / X[:,f].std()
return X
def skl_standardisation(X):
scaler = StandardScaler()
return scaler.fit_transform(data)
print skl_standardisation(data).all() == py_standardisation(data).all()
print
print skl_standardisation(data)
True [[ 1.3365329 -1.08044519] [ 0.7564286 1.1806552 ] [ 0.18698985 -0.44674652] [-1.07897872 1.21707505] [-1.20097264 -0.87053854]]
def py_mean_normalisation(X):
rows, features = X.shape
data = np.zeros((rows, ), dtype='float')
for f in xrange(features):
X[:, f] = (X[:,f] - X[:,f].mean(axis=0)) / (X[:,f].max() - X[:,f].min())
return X
print py_mean_normalisation(data)
[[ 0.52671132 -0.4702658 ] [ 0.29809929 0.51388239] [ 0.07369042 -0.19444726] [-0.42521236 0.5297342 ] [-0.47328868 -0.37890353]]
def py_min_max_scaling(X):
rows , features = X.shape
data = np.zeros((rows, ), dtype='float')
for f in xrange(features):
X[:, f] = (X[:,f] - X[:,f].min()) / (X[:,f].max() - X[:,f].min())
return X
def skl_min_max_scaling(X):
min_max_scaler = MinMaxScaler()
return min_max_scaler.fit_transform(X)
print skl_min_max_scaling(data).all() == py_min_max_scaling(data).all()
print
print skl_min_max_scaling(data)
True [[1. 0. ] [0.77138797 0.98414819] [0.5469791 0.27581854] [0.04807631 1. ] [0. 0.09136227]]