Pandas を用いた演算

In [1]:
# データフレーム操作に関するライブラリをインポートする
import pandas as pd

まずは Pandas の基本操作から

In [2]:
df = pd.DataFrame([[100, -20, -60, -20, 100, 300, 580, 940, 1380, 1900],
 [92, -24, -60, -16, 108, 312, 596, 960, 1404, 1928],
 [84, -28, -60, -12, 116, 324, 612, 980, 1428, 1956],
 [77, -31, -59, -7, 125, 337, 629, 1001, 1453, 1985],
 [70, -34, -58, -2, 134, 350, 646, 1022, 1478, 2014],
 [62, -38, -58, 2, 142, 362, 662, 1042, 1502, 2042],
 [56, -40, -56, 8, 152, 376, 680, 1064, 1528, 2072],
 [49, -43, -55, 13, 161, 389, 697, 1085, 1553, 2101],
 [42, -46, -54, 18, 170, 402, 714, 1106, 1578, 2130],
 [36, -48, -52, 24, 180, 416, 732, 1128, 1604, 2160],
 [30, -50, -50, 30, 190, 430, 750, 1150, 1630, 2190],
 [24, -52, -48, 36, 200, 444, 768, 1172, 1656, 2220],
 [18, -54, -46, 42, 210, 458, 786, 1194, 1682, 2250],
 [13, -55, -43, 49, 221, 473, 805, 1217, 1709, 2281],
 [8, -56, -40, 56, 232, 488, 824, 1240, 1736, 2312],
 [2, -58, -38, 62, 242, 502, 842, 1262, 1762, 2342],
 [-2, -58, -34, 70, 254, 518, 862, 1286, 1790, 2374],
 [-7, -59, -31, 77, 265, 533, 881, 1309, 1817, 2405],
 [-12, -60, -28, 84, 276, 548, 900, 1332, 1844, 2436],
 [-16, -60, -24, 92, 288, 564, 920, 1356, 1872, 2468]],
                   index = list("abcdefghijklmnopqrst"),
                   columns = list("ABCDEFGHIJ"))
In [3]:
df #データの中身を確認
Out[3]:
A B C D E F G H I J
a 100 -20 -60 -20 100 300 580 940 1380 1900
b 92 -24 -60 -16 108 312 596 960 1404 1928
c 84 -28 -60 -12 116 324 612 980 1428 1956
d 77 -31 -59 -7 125 337 629 1001 1453 1985
e 70 -34 -58 -2 134 350 646 1022 1478 2014
f 62 -38 -58 2 142 362 662 1042 1502 2042
g 56 -40 -56 8 152 376 680 1064 1528 2072
h 49 -43 -55 13 161 389 697 1085 1553 2101
i 42 -46 -54 18 170 402 714 1106 1578 2130
j 36 -48 -52 24 180 416 732 1128 1604 2160
k 30 -50 -50 30 190 430 750 1150 1630 2190
l 24 -52 -48 36 200 444 768 1172 1656 2220
m 18 -54 -46 42 210 458 786 1194 1682 2250
n 13 -55 -43 49 221 473 805 1217 1709 2281
o 8 -56 -40 56 232 488 824 1240 1736 2312
p 2 -58 -38 62 242 502 842 1262 1762 2342
q -2 -58 -34 70 254 518 862 1286 1790 2374
r -7 -59 -31 77 265 533 881 1309 1817 2405
s -12 -60 -28 84 276 548 900 1332 1844 2436
t -16 -60 -24 92 288 564 920 1356 1872 2468
In [4]:
df.head() #最初の数レコードだけ確認
Out[4]:
A B C D E F G H I J
a 100 -20 -60 -20 100 300 580 940 1380 1900
b 92 -24 -60 -16 108 312 596 960 1404 1928
c 84 -28 -60 -12 116 324 612 980 1428 1956
d 77 -31 -59 -7 125 337 629 1001 1453 1985
e 70 -34 -58 -2 134 350 646 1022 1478 2014
In [5]:
df.iloc[10:20, 5:9] # 指定した行、指定した列だけ抜き出す
Out[5]:
F G H I
k 430 750 1150 1630
l 444 768 1172 1656
m 458 786 1194 1682
n 473 805 1217 1709
o 488 824 1240 1736
p 502 842 1262 1762
q 518 862 1286 1790
r 533 881 1309 1817
s 548 900 1332 1844
t 564 920 1356 1872
In [6]:
df.iloc[:, 5:] # 指定した行、指定した列だけ抜き出す
Out[6]:
F G H I J
a 300 580 940 1380 1900
b 312 596 960 1404 1928
c 324 612 980 1428 1956
d 337 629 1001 1453 1985
e 350 646 1022 1478 2014
f 362 662 1042 1502 2042
g 376 680 1064 1528 2072
h 389 697 1085 1553 2101
i 402 714 1106 1578 2130
j 416 732 1128 1604 2160
k 430 750 1150 1630 2190
l 444 768 1172 1656 2220
m 458 786 1194 1682 2250
n 473 805 1217 1709 2281
o 488 824 1240 1736 2312
p 502 842 1262 1762 2342
q 518 862 1286 1790 2374
r 533 881 1309 1817 2405
s 548 900 1332 1844 2436
t 564 920 1356 1872 2468
In [7]:
df.iloc[[1, 3, 5, 7, 9], [2, 4, 6, 8]] # 指定した行、指定した列だけ抜き出す
Out[7]:
C E G I
b -60 108 596 1404
d -59 125 629 1453
f -58 142 662 1502
h -55 161 697 1553
j -52 180 732 1604
In [8]:
df.T # 転置
Out[8]:
a b c d e f g h i j k l m n o p q r s t
A 100 92 84 77 70 62 56 49 42 36 30 24 18 13 8 2 -2 -7 -12 -16
B -20 -24 -28 -31 -34 -38 -40 -43 -46 -48 -50 -52 -54 -55 -56 -58 -58 -59 -60 -60
C -60 -60 -60 -59 -58 -58 -56 -55 -54 -52 -50 -48 -46 -43 -40 -38 -34 -31 -28 -24
D -20 -16 -12 -7 -2 2 8 13 18 24 30 36 42 49 56 62 70 77 84 92
E 100 108 116 125 134 142 152 161 170 180 190 200 210 221 232 242 254 265 276 288
F 300 312 324 337 350 362 376 389 402 416 430 444 458 473 488 502 518 533 548 564
G 580 596 612 629 646 662 680 697 714 732 750 768 786 805 824 842 862 881 900 920
H 940 960 980 1001 1022 1042 1064 1085 1106 1128 1150 1172 1194 1217 1240 1262 1286 1309 1332 1356
I 1380 1404 1428 1453 1478 1502 1528 1553 1578 1604 1630 1656 1682 1709 1736 1762 1790 1817 1844 1872
J 1900 1928 1956 1985 2014 2042 2072 2101 2130 2160 2190 2220 2250 2281 2312 2342 2374 2405 2436 2468
In [9]:
df.iloc[[1, 3, 5, 7, 9], [2, 4, 6, 8]].T # 指定した行・列だけ抜き出してから転置
Out[9]:
b d f h j
C -60 -59 -58 -55 -52
E 108 125 142 161 180
G 596 629 662 697 732
I 1404 1453 1502 1553 1604
In [10]:
df.T.iloc[[1, 3, 5, 7, 9], [2, 4, 6, 8]] # 転置してから指定した行・列だけ抜き出す
Out[10]:
c e g i
B -28 -34 -40 -46
D -12 -2 8 18
F 324 350 376 402
H 980 1022 1064 1106
J 1956 2014 2072 2130

簡単な統計量

In [11]:
# 基本統計量の表示
df.describe()
Out[11]:
A B C D E F G H I J
count 20.000000 20.000000 20.000000 20.000000 20.000000 20.000000 20.000000 20.000000 20.000000 20.000000
mean 36.300000 -45.700000 -47.700000 30.300000 188.300000 426.300000 744.300000 1142.300000 1620.300000 2178.300000
std 36.220233 12.794324 11.639768 35.032466 58.643526 82.285191 105.936972 129.593332 153.252149 176.912437
min -16.000000 -60.000000 -60.000000 -20.000000 100.000000 300.000000 580.000000 940.000000 1380.000000 1900.000000
25% 6.500000 -56.500000 -58.000000 1.000000 140.000000 359.000000 658.000000 1037.000000 1496.000000 2035.000000
50% 33.000000 -49.000000 -51.000000 27.000000 185.000000 423.000000 741.000000 1139.000000 1617.000000 2175.000000
75% 64.000000 -37.000000 -39.500000 57.500000 234.500000 491.500000 828.500000 1245.500000 1742.500000 2319.500000
max 100.000000 -20.000000 -24.000000 92.000000 288.000000 564.000000 920.000000 1356.000000 1872.000000 2468.000000
In [12]:
# 基本統計量の表示
df.T.describe()
Out[12]:
a b c d e f g h i j k l m n o p q r s t
count 10.000000 10.00000 10.000000 10.00000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000 10.000000
mean 520.000000 530.00000 540.000000 551.00000 562.000000 572.000000 584.000000 595.000000 606.000000 618.000000 630.000000 642.000000 654.000000 667.000000 680.000000 692.000000 706.000000 719.000000 732.000000 746.000000
std 678.626063 689.45389 700.323735 711.23367 722.181879 733.166648 744.186357 755.239476 766.324561 777.440244 788.585231 799.758297 810.958281 822.184083 833.434661 844.709023 856.006231 867.325391 878.665655 890.026217
min -60.000000 -60.00000 -60.000000 -59.00000 -58.000000 -58.000000 -56.000000 -55.000000 -54.000000 -52.000000 -50.000000 -52.000000 -54.000000 -55.000000 -56.000000 -58.000000 -58.000000 -59.000000 -60.000000 -60.000000
25% 10.000000 11.00000 12.000000 14.00000 16.000000 17.000000 20.000000 22.000000 24.000000 27.000000 30.000000 27.000000 24.000000 22.000000 20.000000 17.000000 16.000000 14.000000 12.000000 11.000000
50% 200.000000 210.00000 220.000000 231.00000 242.000000 252.000000 264.000000 275.000000 286.000000 298.000000 310.000000 322.000000 334.000000 347.000000 360.000000 372.000000 386.000000 399.000000 412.000000 426.000000
75% 850.000000 869.00000 888.000000 908.00000 928.000000 947.000000 968.000000 988.000000 1008.000000 1029.000000 1050.000000 1071.000000 1092.000000 1114.000000 1136.000000 1157.000000 1180.000000 1202.000000 1224.000000 1247.000000
max 1900.000000 1928.00000 1956.000000 1985.00000 2014.000000 2042.000000 2072.000000 2101.000000 2130.000000 2160.000000 2190.000000 2220.000000 2250.000000 2281.000000 2312.000000 2342.000000 2374.000000 2405.000000 2436.000000 2468.000000

行列の正規化(標準化)

正規化 (normalize) とは、異なる基準のデータを一定の基準にしたがって変形し利用しやすくすることです。

In [13]:
# 一般的には平均 0 、分散 (及び標準偏差) が 1 になるように値を変換することを指します。
# axis=1 とすれば、列ではなく行単位で正規化します。
df.apply(lambda x: (x-x.mean())/x.std(), axis=0)
Out[13]:
A B C D E F G H I J
a 1.758686 2.008703 -1.056722 -1.435811 -1.505708 -1.534906 -1.550922 -1.561037 -1.568004 -1.573095
b 1.537815 1.696065 -1.056722 -1.321631 -1.369290 -1.389071 -1.399889 -1.406708 -1.411399 -1.414824
c 1.316943 1.383426 -1.056722 -1.207451 -1.232873 -1.243237 -1.248856 -1.252379 -1.254795 -1.256554
d 1.123681 1.148947 -0.970810 -1.064727 -1.079403 -1.085250 -1.088383 -1.090334 -1.091665 -1.092631
e 0.930419 0.914468 -0.884897 -0.922002 -0.925933 -0.927263 -0.927910 -0.928289 -0.928535 -0.928708
f 0.709548 0.601829 -0.884897 -0.807822 -0.789516 -0.781429 -0.776877 -0.773960 -0.771930 -0.770438
g 0.543895 0.445510 -0.713073 -0.636552 -0.618994 -0.611289 -0.606965 -0.604198 -0.602275 -0.600862
h 0.350633 0.211031 -0.627160 -0.493828 -0.465525 -0.453301 -0.446492 -0.442152 -0.439146 -0.436939
i 0.157371 -0.023448 -0.541248 -0.351103 -0.312055 -0.295314 -0.286019 -0.280107 -0.276016 -0.273016
j -0.008283 -0.179767 -0.369423 -0.179833 -0.141533 -0.125174 -0.116107 -0.110345 -0.106361 -0.103441
k -0.173936 -0.336087 -0.197598 -0.008563 0.028989 0.044966 0.053806 0.059417 0.063294 0.066134
l -0.339589 -0.492406 -0.025774 0.162706 0.199511 0.215106 0.223718 0.229178 0.232949 0.235710
m -0.505242 -0.648725 0.146051 0.333976 0.370032 0.385246 0.393630 0.398940 0.402604 0.405285
n -0.643287 -0.726885 0.403788 0.533791 0.557606 0.567538 0.572982 0.576419 0.578785 0.580513
o -0.781331 -0.805044 0.661525 0.733605 0.745180 0.749831 0.752334 0.753897 0.754965 0.755741
p -0.946985 -0.961364 0.833350 0.904875 0.915702 0.919971 0.922246 0.923659 0.924620 0.925317
q -1.057420 -0.961364 1.176999 1.133235 1.120328 1.114417 1.111038 1.108853 1.107325 1.106197
r -1.195464 -1.039523 1.434736 1.333049 1.307902 1.296710 1.290390 1.286332 1.283506 1.281425
s -1.333509 -1.117683 1.692474 1.532864 1.495476 1.479002 1.469742 1.463810 1.459686 1.456653
t -1.443944 -1.117683 2.036123 1.761223 1.700102 1.673448 1.658533 1.649005 1.642391 1.637533
In [14]:
# 最大値を1、最小値を0にするような正規化もできます。
# axis=1 とすれば、列ではなく行単位で正規化します。
df.apply(lambda x: (x-x.min())/(x.max() - x.min()), axis=0)
Out[14]:
A B C D E F G H I J
a 1.000000 1.000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
b 0.931034 0.900 0.000000 0.035714 0.042553 0.045455 0.047059 0.048077 0.048780 0.049296
c 0.862069 0.800 0.000000 0.071429 0.085106 0.090909 0.094118 0.096154 0.097561 0.098592
d 0.801724 0.725 0.027778 0.116071 0.132979 0.140152 0.144118 0.146635 0.148374 0.149648
e 0.741379 0.650 0.055556 0.160714 0.180851 0.189394 0.194118 0.197115 0.199187 0.200704
f 0.672414 0.550 0.055556 0.196429 0.223404 0.234848 0.241176 0.245192 0.247967 0.250000
g 0.620690 0.500 0.111111 0.250000 0.276596 0.287879 0.294118 0.298077 0.300813 0.302817
h 0.560345 0.425 0.138889 0.294643 0.324468 0.337121 0.344118 0.348558 0.351626 0.353873
i 0.500000 0.350 0.166667 0.339286 0.372340 0.386364 0.394118 0.399038 0.402439 0.404930
j 0.448276 0.300 0.222222 0.392857 0.425532 0.439394 0.447059 0.451923 0.455285 0.457746
k 0.396552 0.250 0.277778 0.446429 0.478723 0.492424 0.500000 0.504808 0.508130 0.510563
l 0.344828 0.200 0.333333 0.500000 0.531915 0.545455 0.552941 0.557692 0.560976 0.563380
m 0.293103 0.150 0.388889 0.553571 0.585106 0.598485 0.605882 0.610577 0.613821 0.616197
n 0.250000 0.125 0.472222 0.616071 0.643617 0.655303 0.661765 0.665865 0.668699 0.670775
o 0.206897 0.100 0.555556 0.678571 0.702128 0.712121 0.717647 0.721154 0.723577 0.725352
p 0.155172 0.050 0.611111 0.732143 0.755319 0.765152 0.770588 0.774038 0.776423 0.778169
q 0.120690 0.050 0.722222 0.803571 0.819149 0.825758 0.829412 0.831731 0.833333 0.834507
r 0.077586 0.025 0.805556 0.866071 0.877660 0.882576 0.885294 0.887019 0.888211 0.889085
s 0.034483 0.000 0.888889 0.928571 0.936170 0.939394 0.941176 0.942308 0.943089 0.943662
t 0.000000 0.000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000 1.000000
In [15]:
# 合計値が1になるような正規化もできます。
# axis=1 とすれば、列ではなく行単位で正規化します。
df.apply(lambda x: x/x.sum(), axis=0)
Out[15]:
A B C D E F G H I J
a 0.137741 0.021882 0.062893 -0.033003 0.026553 0.035186 0.038963 0.041145 0.042585 0.043612
b 0.126722 0.026258 0.062893 -0.026403 0.028678 0.036594 0.040038 0.042020 0.043325 0.044255
c 0.115702 0.030635 0.062893 -0.019802 0.030802 0.038001 0.041112 0.042896 0.044066 0.044897
d 0.106061 0.033917 0.061845 -0.011551 0.033192 0.039526 0.042254 0.043815 0.044837 0.045563
e 0.096419 0.037199 0.060797 -0.003300 0.035582 0.041051 0.043396 0.044734 0.045609 0.046229
f 0.085399 0.041575 0.060797 0.003300 0.037706 0.042458 0.044471 0.045610 0.046349 0.046871
g 0.077135 0.043764 0.058700 0.013201 0.040361 0.044100 0.045681 0.046573 0.047152 0.047560
h 0.067493 0.047046 0.057652 0.021452 0.042751 0.045625 0.046823 0.047492 0.047923 0.048226
i 0.057851 0.050328 0.056604 0.029703 0.045141 0.047150 0.047965 0.048411 0.048695 0.048891
j 0.049587 0.052516 0.054507 0.039604 0.047796 0.048792 0.049174 0.049374 0.049497 0.049580
k 0.041322 0.054705 0.052411 0.049505 0.050451 0.050434 0.050383 0.050337 0.050299 0.050269
l 0.033058 0.056893 0.050314 0.059406 0.053107 0.052076 0.051592 0.051300 0.051102 0.050957
m 0.024793 0.059081 0.048218 0.069307 0.055762 0.053718 0.052801 0.052263 0.051904 0.051646
n 0.017906 0.060175 0.045073 0.080858 0.058683 0.055477 0.054078 0.053270 0.052737 0.052357
o 0.011019 0.061269 0.041929 0.092409 0.061604 0.057237 0.055354 0.054276 0.053570 0.053069
p 0.002755 0.063457 0.039832 0.102310 0.064259 0.058879 0.056563 0.055239 0.054373 0.053758
q -0.002755 0.063457 0.035639 0.115512 0.067446 0.060755 0.057907 0.056290 0.055237 0.054492
r -0.009642 0.064551 0.032495 0.127063 0.070366 0.062515 0.059183 0.057297 0.056070 0.055204
s -0.016529 0.065646 0.029350 0.138614 0.073287 0.064274 0.060459 0.058303 0.056903 0.055915
t -0.022039 0.065646 0.025157 0.151815 0.076474 0.066151 0.061803 0.059354 0.057767 0.056650

相関行列

相関行列とは、各要素間の相関係数を並べたものであり、その性質から必ず対称行列である。

In [16]:
df.corr()
Out[16]:
A B C D E F G H I J
A 1.000000 0.987887 -0.940014 -0.985441 -0.990788 -0.992696 -0.993658 -0.994233 -0.994615 -0.994886
B 0.987887 1.000000 -0.875691 -0.947122 -0.957773 -0.961951 -0.964172 -0.965549 -0.966485 -0.967162
C -0.940014 -0.875691 1.000000 0.984327 0.977551 0.974303 0.972412 0.971176 0.970307 0.969662
D -0.985441 -0.947122 0.984327 1.000000 0.999387 0.998755 0.998309 0.997991 0.997755 0.997574
E -0.990788 -0.957773 0.977551 0.999387 1.000000 0.999889 0.999732 0.999597 0.999488 0.999399
F -0.992696 -0.961951 0.974303 0.998755 0.999889 1.000000 0.999966 0.999909 0.999854 0.999805
G -0.993658 -0.964172 0.972412 0.998309 0.999732 0.999966 1.000000 0.999986 0.999961 0.999934
H -0.994233 -0.965549 0.971176 0.997991 0.999597 0.999909 0.999986 1.000000 0.999993 0.999980
I -0.994615 -0.966485 0.970307 0.997755 0.999488 0.999854 0.999961 0.999993 1.000000 0.999996
J -0.994886 -0.967162 0.969662 0.997574 0.999399 0.999805 0.999934 0.999980 0.999996 1.000000
In [17]:
df.T.corr()
Out[17]:
a b c d e f g h i j k l m n o p q r s t
a 1.000000 0.999969 0.999878 0.999734 0.999541 0.999305 0.999028 0.998715 0.998370 0.997995 0.997594 0.997168 0.996722 0.996256 0.995773 0.995276 0.994764 0.994241 0.993708 0.993165
b 0.999969 1.000000 0.999970 0.999885 0.999750 0.999569 0.999346 0.999086 0.998791 0.998465 0.998112 0.997733 0.997332 0.996910 0.996471 0.996014 0.995543 0.995060 0.994565 0.994059
c 0.999878 0.999970 1.000000 0.999972 0.999892 0.999765 0.999594 0.999385 0.999139 0.998862 0.998555 0.998221 0.997864 0.997485 0.997086 0.996670 0.996239 0.995793 0.995335 0.994866
d 0.999734 0.999885 0.999972 1.000000 0.999974 0.999899 0.999779 0.999618 0.999421 0.999189 0.998927 0.998637 0.998323 0.997985 0.997627 0.997250 0.996857 0.996448 0.996026 0.995593
e 0.999541 0.999750 0.999892 0.999974 1.000000 0.999975 0.999905 0.999792 0.999640 0.999454 0.999236 0.998988 0.998715 0.998417 0.998098 0.997759 0.997403 0.997030 0.996644 0.996244
f 0.999305 0.999569 0.999765 0.999899 0.999975 1.000000 0.999977 0.999910 0.999804 0.999661 0.999485 0.999279 0.999045 0.998787 0.998506 0.998204 0.997883 0.997546 0.997193 0.996826
g 0.999028 0.999346 0.999594 0.999779 0.999905 0.999977 1.000000 0.999978 0.999915 0.999815 0.999680 0.999514 0.999319 0.999099 0.998854 0.998588 0.998302 0.997999 0.997679 0.997345
h 0.998715 0.999086 0.999385 0.999618 0.999792 0.999910 0.999978 1.000000 0.999979 0.999920 0.999825 0.999698 0.999541 0.999357 0.999148 0.998917 0.998665 0.998394 0.998107 0.997804
i 0.998370 0.998791 0.999139 0.999421 0.999640 0.999804 0.999915 0.999979 1.000000 0.999981 0.999925 0.999835 0.999715 0.999566 0.999392 0.999195 0.998976 0.998737 0.998481 0.998208
j 0.997995 0.998465 0.998862 0.999189 0.999454 0.999661 0.999815 0.999920 0.999981 1.000000 0.999982 0.999929 0.999844 0.999730 0.999590 0.999425 0.999238 0.999031 0.998804 0.998561
k 0.997594 0.998112 0.998555 0.998927 0.999236 0.999485 0.999680 0.999825 0.999925 0.999982 1.000000 0.999983 0.999933 0.999853 0.999745 0.999612 0.999456 0.999279 0.999082 0.998867
l 0.997168 0.997733 0.998221 0.998637 0.998988 0.999279 0.999514 0.999698 0.999835 0.999929 0.999983 1.000000 0.999984 0.999936 0.999861 0.999759 0.999633 0.999485 0.999317 0.999130
m 0.996722 0.997332 0.997864 0.998323 0.998715 0.999045 0.999319 0.999541 0.999715 0.999844 0.999933 0.999984 1.000000 0.999985 0.999940 0.999868 0.999771 0.999652 0.999512 0.999352
n 0.996256 0.996910 0.997485 0.997985 0.998417 0.998787 0.999099 0.999357 0.999566 0.999730 0.999853 0.999936 0.999985 1.000000 0.999985 0.999943 0.999875 0.999783 0.999670 0.999537
o 0.995773 0.996471 0.997086 0.997627 0.998098 0.998506 0.998854 0.999148 0.999392 0.999590 0.999745 0.999861 0.999940 0.999985 1.000000 0.999986 0.999946 0.999881 0.999795 0.999687
p 0.995276 0.996014 0.996670 0.997250 0.997759 0.998204 0.998588 0.998917 0.999195 0.999425 0.999612 0.999759 0.999868 0.999943 0.999986 1.000000 0.999987 0.999949 0.999888 0.999805
q 0.994764 0.995543 0.996239 0.996857 0.997403 0.997883 0.998302 0.998665 0.998976 0.999238 0.999456 0.999633 0.999771 0.999875 0.999946 0.999987 1.000000 0.999988 0.999951 0.999893
r 0.994241 0.995060 0.995793 0.996448 0.997030 0.997546 0.997999 0.998394 0.998737 0.999031 0.999279 0.999485 0.999652 0.999783 0.999881 0.999949 0.999988 1.000000 0.999988 0.999954
s 0.993708 0.994565 0.995335 0.996026 0.996644 0.997193 0.997679 0.998107 0.998481 0.998804 0.999082 0.999317 0.999512 0.999670 0.999795 0.999888 0.999951 0.999988 1.000000 0.999989
t 0.993165 0.994059 0.994866 0.995593 0.996244 0.996826 0.997345 0.997804 0.998208 0.998561 0.998867 0.999130 0.999352 0.999537 0.999687 0.999805 0.999893 0.999954 0.999989 1.000000

ある条件でレコードをクラス分けする

In [18]:
df['D'] < 30 # ある条件
Out[18]:
a     True
b     True
c     True
d     True
e     True
f     True
g     True
h     True
i     True
j     True
k    False
l    False
m    False
n    False
o    False
p    False
q    False
r    False
s    False
t    False
Name: D, dtype: bool
In [19]:
df[df['D'] < 30] # ある条件を満たす行だけ抜き出す
Out[19]:
A B C D E F G H I J
a 100 -20 -60 -20 100 300 580 940 1380 1900
b 92 -24 -60 -16 108 312 596 960 1404 1928
c 84 -28 -60 -12 116 324 612 980 1428 1956
d 77 -31 -59 -7 125 337 629 1001 1453 1985
e 70 -34 -58 -2 134 350 646 1022 1478 2014
f 62 -38 -58 2 142 362 662 1042 1502 2042
g 56 -40 -56 8 152 376 680 1064 1528 2072
h 49 -43 -55 13 161 389 697 1085 1553 2101
i 42 -46 -54 18 170 402 714 1106 1578 2130
j 36 -48 -52 24 180 416 732 1128 1604 2160
In [20]:
# classという名の列を作り、ある基準を満たしていれば 1 を、そうでなければ 0 を入れる
df['class'] = [1 if i < 30 else 0 for i in df['D'].tolist()]
In [21]:
df #データの中身を確認
Out[21]:
A B C D E F G H I J class
a 100 -20 -60 -20 100 300 580 940 1380 1900 1
b 92 -24 -60 -16 108 312 596 960 1404 1928 1
c 84 -28 -60 -12 116 324 612 980 1428 1956 1
d 77 -31 -59 -7 125 337 629 1001 1453 1985 1
e 70 -34 -58 -2 134 350 646 1022 1478 2014 1
f 62 -38 -58 2 142 362 662 1042 1502 2042 1
g 56 -40 -56 8 152 376 680 1064 1528 2072 1
h 49 -43 -55 13 161 389 697 1085 1553 2101 1
i 42 -46 -54 18 170 402 714 1106 1578 2130 1
j 36 -48 -52 24 180 416 732 1128 1604 2160 1
k 30 -50 -50 30 190 430 750 1150 1630 2190 0
l 24 -52 -48 36 200 444 768 1172 1656 2220 0
m 18 -54 -46 42 210 458 786 1194 1682 2250 0
n 13 -55 -43 49 221 473 805 1217 1709 2281 0
o 8 -56 -40 56 232 488 824 1240 1736 2312 0
p 2 -58 -38 62 242 502 842 1262 1762 2342 0
q -2 -58 -34 70 254 518 862 1286 1790 2374 0
r -7 -59 -31 77 265 533 881 1309 1817 2405 0
s -12 -60 -28 84 276 548 900 1332 1844 2436 0
t -16 -60 -24 92 288 564 920 1356 1872 2468 0