from sklearn import preprocessing
import numpy as np
data = np.array([[2.2, 5.9, -1.8], [5.4, -3.2, -5.1], [-1.9, 4.2, 3.2]])
bindata = preprocessing.Binarizer(threshold=1.5).transform(data)
print('Binarized data:\n\n', bindata)
Binarized data: [[ 1. 1. 0.] [ 1. 0. 0.] [ 0. 1. 1.]]
print('Mean (before)= ', data.mean(axis=0))
print('Standard Deviation (before)= ', data.std(axis=0))
Mean (before)= [ 1.9 2.3 -1.23333333] Standard Deviation (before)= [ 2.98775278 3.95052739 3.41207008]
scaled_data = preprocessing.scale(data)
print('Mean (after)= ', scaled_data.mean(axis=0))
print('Standard Deviation (after)= ', scaled_data.std(axis=0))
Mean (after)= [ 0.00000000e+00 0.00000000e+00 7.40148683e-17] Standard Deviation (after)= [ 1. 1. 1.]
data
array([[ 2.2, 5.9, -1.8], [ 5.4, -3.2, -5.1], [-1.9, 4.2, 3.2]])
minmax_scaler = preprocessing.MinMaxScaler(feature_range=(0,1))
data_minmax = minmax_scaler.fit_transform(data)
print('MinMaxScaler applied on the data:\n', data_minmax)
MinMaxScaler applied on the data: [[ 0.56164384 1. 0.39759036] [ 1. 0. 0. ] [ 0. 0.81318681 1. ]]
-- bringing the values of each feature vector on a common scale
data
array([[ 2.2, 5.9, -1.8], [ 5.4, -3.2, -5.1], [-1.9, 4.2, 3.2]])
data_l1 = preprocessing.normalize(data, norm='l1')
data_l2 = preprocessing.normalize(data, norm='l2')
print('L1-normalized data:\n', data_l1)
print('\nL2-normalized data:\n', data_l2)
L1-normalized data: [[ 0.22222222 0.5959596 -0.18181818] [ 0.39416058 -0.23357664 -0.37226277] [-0.20430108 0.4516129 0.34408602]] L2-normalized data: [[ 0.3359268 0.90089461 -0.2748492 ] [ 0.6676851 -0.39566524 -0.63059148] [-0.33858465 0.74845029 0.57024784]]
0.39416058+0.23357664+0.37226277
0.9999999900000001
0.3359268**2+0.90089461**2+(-0.2748492)**2
0.9999999960259321