# 数値計算やデータフレーム操作に関するライブラリをインポートする
import numpy as np
import pandas as pd
# URL によるリソースへのアクセスを提供するライブラリをインポートする。
# import urllib # Python 2 の場合
import urllib.request # Python 3 の場合
__「ニューヨークの大気状態観測値」__のデータを読み込んでみましょう。 (詳細)
# ウェブ上のリソースを指定する
url = 'https://raw.githubusercontent.com/maskot1977/ipython_notebook/master/toydata/airquality.txt'
# 指定したURLからリソースをダウンロードし、名前をつける。
# urllib.urlretrieve(url, 'airquality.txt') # Python 2 の場合
urllib.request.urlretrieve(url, 'airquality.txt') # Python 3 の場合
('airquality.txt', <http.client.HTTPMessage at 0x10652fcc0>)
# データの読み込み
df1 = pd.read_csv('airquality.txt', sep='\t', index_col=0)
# 読み込んだデータの確認
df1
Ozone | Solar.R | Wind | Temp | Month | Day | |
---|---|---|---|---|---|---|
1 | 41 | 190 | 7.4 | 67 | 5 | 1 |
2 | 36 | 118 | 8.0 | 72 | 5 | 2 |
3 | 12 | 149 | 12.6 | 74 | 5 | 3 |
4 | 18 | 313 | 11.5 | 62 | 5 | 4 |
5 | NaN | NaN | 14.3 | 56 | 5 | 5 |
6 | 28 | NaN | 14.9 | 66 | 5 | 6 |
7 | 23 | 299 | 8.6 | 65 | 5 | 7 |
8 | 19 | 99 | 13.8 | 59 | 5 | 8 |
9 | 8 | 19 | 20.1 | 61 | 5 | 9 |
10 | NaN | 194 | 8.6 | 69 | 5 | 10 |
11 | 7 | NaN | 6.9 | 74 | 5 | 11 |
12 | 16 | 256 | 9.7 | 69 | 5 | 12 |
13 | 11 | 290 | 9.2 | 66 | 5 | 13 |
14 | 14 | 274 | 10.9 | 68 | 5 | 14 |
15 | 18 | 65 | 13.2 | 58 | 5 | 15 |
16 | 14 | 334 | 11.5 | 64 | 5 | 16 |
17 | 34 | 307 | 12.0 | 66 | 5 | 17 |
18 | 6 | 78 | 18.4 | 57 | 5 | 18 |
19 | 30 | 322 | 11.5 | 68 | 5 | 19 |
20 | 11 | 44 | 9.7 | 62 | 5 | 20 |
21 | 1 | 8 | 9.7 | 59 | 5 | 21 |
22 | 11 | 320 | 16.6 | 73 | 5 | 22 |
23 | 4 | 25 | 9.7 | 61 | 5 | 23 |
24 | 32 | 92 | 12.0 | 61 | 5 | 24 |
25 | NaN | 66 | 16.6 | 57 | 5 | 25 |
26 | NaN | 266 | 14.9 | 58 | 5 | 26 |
27 | NaN | NaN | 8.0 | 57 | 5 | 27 |
28 | 23 | 13 | 12.0 | 67 | 5 | 28 |
29 | 45 | 252 | 14.9 | 81 | 5 | 29 |
30 | 115 | 223 | 5.7 | 79 | 5 | 30 |
... | ... | ... | ... | ... | ... | ... |
124 | 96 | 167 | 6.9 | 91 | 9 | 1 |
125 | 78 | 197 | 5.1 | 92 | 9 | 2 |
126 | 73 | 183 | 2.8 | 93 | 9 | 3 |
127 | 91 | 189 | 4.6 | 93 | 9 | 4 |
128 | 47 | 95 | 7.4 | 87 | 9 | 5 |
129 | 32 | 92 | 15.5 | 84 | 9 | 6 |
130 | 20 | 252 | 10.9 | 80 | 9 | 7 |
131 | 23 | 220 | 10.3 | 78 | 9 | 8 |
132 | 21 | 230 | 10.9 | 75 | 9 | 9 |
133 | 24 | 259 | 9.7 | 73 | 9 | 10 |
134 | 44 | 236 | 14.9 | 81 | 9 | 11 |
135 | 21 | 259 | 15.5 | 76 | 9 | 12 |
136 | 28 | 238 | 6.3 | 77 | 9 | 13 |
137 | 9 | 24 | 10.9 | 71 | 9 | 14 |
138 | 13 | 112 | 11.5 | 71 | 9 | 15 |
139 | 46 | 237 | 6.9 | 78 | 9 | 16 |
140 | 18 | 224 | 13.8 | 67 | 9 | 17 |
141 | 13 | 27 | 10.3 | 76 | 9 | 18 |
142 | 24 | 238 | 10.3 | 68 | 9 | 19 |
143 | 16 | 201 | 8.0 | 82 | 9 | 20 |
144 | 13 | 238 | 12.6 | 64 | 9 | 21 |
145 | 23 | 14 | 9.2 | 71 | 9 | 22 |
146 | 36 | 139 | 10.3 | 81 | 9 | 23 |
147 | 7 | 49 | 10.3 | 69 | 9 | 24 |
148 | 14 | 20 | 16.6 | 63 | 9 | 25 |
149 | 30 | 193 | 6.9 | 70 | 9 | 26 |
150 | NaN | 145 | 13.2 | 77 | 9 | 27 |
151 | 14 | 191 | 14.3 | 75 | 9 | 28 |
152 | 18 | 131 | 8.0 | 76 | 9 | 29 |
153 | 20 | 223 | 11.5 | 68 | 9 | 30 |
153 rows × 6 columns
うまく読み込めました。
次に、__「好きなアイスクリームアンケート」__のデータを読み込んでみましょう。 (詳細)
# ウェブ上のリソースを指定する
url = 'https://raw.githubusercontent.com/maskot1977/ipython_notebook/master/toydata/icecream_chosa.txt'
# 指定したURLからリソースをダウンロードし、名前をつける。
# urllib.urlretrieve(url, 'icecream_chosa.txt') # Python 2 の場合
urllib.request.urlretrieve(url, 'icecream_chosa.txt') # Python 3 の場合
('icecream_chosa.txt', <http.client.HTTPMessage at 0x1065b8668>)
# データの読み込み
df2 = pd.read_csv('icecream_chosa.txt', sep='\t', index_col=0)
df2
ID gender age birth_order frequency vanilla strawberry milk_tea macadamia_nuts cookie chocolate melon coffee almond rum_raisin mint banana caramel walnut cassis chocolate_chips orange green_tea marron chocolate_mint adzuki-bean |
---|
1 2 20 2 5 7 7 8 3 9 9 6 8 3 2 1 7 9 3 3 9 7 8 8 1 7 |
2 2 21 2 1 7 8 9 9 9 9 2 7 7 9 5 4 7 8 9 9 4 7 7 8 9 |
3 2 21 3 2 7 4 3 3 6 4 7 7 6 3 3 4 6 3 3 6 7 3 7 3 3 |
4 2 21 4 3 9 6 6 5 8 6 8 9 6 5 4 4 9 5 4 8 9 1 2 1 3 |
5 2 21 4 2 9 5 7 5 6 8 4 4 4 6 9 6 6 6 5 9 6 6 6 9 1 |
6 2 21 2 5 5 7 5 5 5 7 5 8 5 8 9 5 9 5 8 8 9 9 5 9 1 |
7 2 21 2 2 9 7 6 3 7 9 4 6 5 2 2 6 7 5 5 9 9 3 5 2 3 |
8 2 20 2 4 7 7 6 8 7 6 5 8 7 5 6 5 6 7 9 5 6 8 5 6 8 |
9 2 21 2 4 7 7 4 8 7 7 4 7 6 3 5 5 7 7 6 8 6 3 3 7 3 |
10 2 22 4 4 5 5 9 5 8 8 5 6 3 3 3 8 7 2 8 8 6 6 2 1 1 |
11 2 22 2 3 8 1 8 9 7 9 3 9 4 7 4 4 9 7 6 6 7 8 6 4 5 |
12 2 21 4 3 9 4 7 5 8 4 5 7 5 6 9 4 4 5 8 9 5 6 4 9 7 |
13 2 21 4 4 7 7 6 8 8 9 6 7 7 6 6 5 9 7 5 9 7 7 6 8 8 |
14 2 20 4 2 9 6 4 6 9 9 3 6 8 9 4 6 7 8 4 9 9 8 9 7 8 |
15 2 20 2 4 8 7 5 5 5 9 4 5 4 3 9 4 4 6 5 8 6 9 4 9 5 |
16 2 21 4 2 7 4 7 4 7 3 3 7 5 3 6 6 6 6 6 4 6 8 5 7 7 |
17 2 20 2 2 6 4 2 2 7 6 4 5 5 8 3 4 6 6 5 7 5 9 6 9 6 |
18 2 20 3 2 8 7 5 5 6 8 3 5 6 3 7 3 5 4 5 7 9 8 4 9 4 |
19 2 21 4 6 5 9 8 5 7 5 5 5 9 4 7 6 6 7 7 8 7 9 5 8 5 |
20 2 20 4 2 6 6 6 8 8 6 1 5 7 8 4 5 7 7 7 7 6 7 7 8 8 |
21 2 21 4 3 9 9 6 6 8 7 7 9 6 6 3 7 6 5 9 9 9 9 7 4 8 |
22 2 21 1 4 7 9 9 7 9 8 3 1 6 1 3 8 8 8 5 8 6 7 8 4 8 |
23 2 21 2 4 8 8 9 8 8 8 6 6 6 9 4 5 8 8 4 9 8 9 5 5 8 |
24 2 20 4 4 7 7 6 5 4 7 4 5 5 8 9 9 9 3 7 7 3 5 9 9 9 |
25 2 20 1 3 7 6 7 9 9 8 4 6 8 9 3 6 8 8 5 8 4 8 9 3 9 |
26 2 21 4 4 7 5 8 7 9 9 5 9 4 4 1 5 4 4 8 9 8 8 6 1 7 |
27 2 20 2 5 7 9 7 7 8 5 3 4 8 9 1 2 8 8 9 8 9 6 3 1 7 |
28 2 21 2 4 7 6 1 7 8 8 4 4 4 6 7 3 6 6 4 8 4 8 7 8 7 |
29 2 21 3 4 6 7 7 8 8 9 8 9 9 6 6 5 9 8 7 9 7 7 5 5 3 |
30 2 21 4 4 7 7 7 7 9 9 6 9 9 9 9 9 9 5 5 9 6 9 7 9 6 |
... |
51 1 19 2 6 7 8 6 3 5 6 6 6 3 8 6 4 5 2 3 8 5 8 3 8 2 |
52 1 21 1 2 9 4 5 3 7 7 1 5 3 4 4 8 6 2 4 9 8 7 5 5 9 |
53 1 20 1 5 9 8 7 6 9 8 6 9 5 1 1 7 9 7 6 9 5 3 9 1 9 |
54 1 21 4 5 8 8 7 5 7 7 8 4 4 8 4 5 4 4 4 7 6 3 4 6 5 |
55 1 19 4 4 7 7 8 7 6 9 5 9 5 7 6 5 6 5 9 9 7 9 7 8 9 |
56 1 20 2 2 5 7 5 5 5 9 5 5 5 5 8 4 6 5 5 9 9 8 5 9 8 |
57 1 22 1 3 9 9 9 9 9 6 6 9 8 9 1 7 8 6 6 8 7 9 7 1 9 |
58 1 19 1 3 9 3 5 7 6 3 2 4 5 8 1 5 1 6 3 8 2 7 1 1 3 |
59 1 20 2 3 7 7 7 4 6 6 7 7 4 6 6 7 8 3 6 7 7 8 6 3 6 |
60 1 20 4 8 7 8 8 5 7 4 4 4 3 9 7 5 6 4 5 5 8 7 6 5 6 |
61 1 18 2 4 6 4 3 5 9 6 4 3 4 5 3 3 4 3 5 8 5 1 3 5 1 |
62 1 25 2 3 7 8 6 5 9 8 7 7 5 4 4 7 5 5 5 9 7 6 6 4 5 |
63 1 19 2 2 6 6 7 7 7 9 5 6 7 5 5 6 8 7 5 9 5 6 6 8 5 |
64 1 21 2 4 8 7 9 6 9 9 6 9 3 7 6 8 4 5 9 9 3 6 5 9 1 |
65 1 21 2 6 7 8 5 4 6 5 5 5 5 9 5 4 7 7 5 6 4 8 5 4 6 |
66 1 20 2 1 7 7 7 7 8 6 5 9 5 8 5 7 4 6 5 7 7 9 5 5 6 |
67 1 22 2 5 7 7 5 4 8 8 5 5 4 4 3 5 3 3 4 8 7 8 5 3 4 |
68 1 21 2 5 5 5 6 7 7 9 6 6 9 6 8 5 5 9 6 9 6 9 7 8 5 |
69 1 21 3 1 8 7 5 5 7 7 6 5 5 6 5 6 5 4 5 7 8 5 5 5 5 |
70 1 21 2 6 7 7 9 8 9 5 8 8 5 6 5 9 9 8 6 9 8 9 7 7 7 |
71 1 22 4 4 7 7 6 6 8 9 6 5 5 5 4 5 7 5 5 9 7 3 7 6 8 |
72 1 24 1 2 6 7 5 6 4 8 8 7 6 8 4 4 2 6 8 4 7 8 3 2 6 |
73 1 20 2 2 6 6 5 8 8 7 3 6 7 7 8 6 7 8 7 8 5 8 8 8 7 |
74 1 20 2 5 8 8 4 8 9 7 3 6 6 3 8 5 4 8 8 9 8 9 6 9 4 |
75 1 21 2 6 4 6 2 3 6 6 8 5 4 2 2 3 2 3 2 6 8 1 1 2 2 |
76 1 20 2 4 5 6 4 9 6 7 4 3 6 5 4 5 5 5 5 7 7 4 4 4 4 |
77 1 20 3 6 7 5 6 6 8 9 4 4 4 5 4 3 9 5 5 9 7 6 5 6 7 |
78 1 23 1 5 4 8 8 9 9 9 4 8 9 1 9 6 8 9 5 9 9 9 4 9 9 |
79 1 23 4 7 9 7 7 5 4 6 7 7 4 5 8 9 7 4 5 8 8 5 5 8 4 |
80 1 22 2 2 8 5 5 2 5 4 7 1 5 5 3 4 5 5 5 5 7 1 1 5 5 |
80 rows × 0 columns
上の方法では、「好きなアイスクリームアンケート」のデータがうまく読み込めていません。原因は、実際のデータ区切り文字が「 」(空白)なのに、データの読み込み時に「sep='\t'」(データ区切り文字はタブ)と指定したからです。では改めて、データ区切り文字に空白を指定して読み込んでみましょう。
# データの読み込み
df2 = pd.read_csv('icecream_chosa.txt', sep=' ', index_col=0)
df2
gender | age | birth_order | frequency | vanilla | strawberry | milk_tea | macadamia_nuts | cookie | chocolate | ... | caramel | walnut | cassis | chocolate_chips | orange | green_tea | marron | chocolate_mint | adzuki-bean | Unnamed: 26 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
ID | |||||||||||||||||||||
1 | 2 | 20 | 2 | 5 | 7 | 7 | 8 | 3 | 9 | 9 | ... | 9 | 3 | 3 | 9 | 7 | 8 | 8 | 1 | 7 | NaN |
2 | 2 | 21 | 2 | 1 | 7 | 8 | 9 | 9 | 9 | 9 | ... | 7 | 8 | 9 | 9 | 4 | 7 | 7 | 8 | 9 | NaN |
3 | 2 | 21 | 3 | 2 | 7 | 4 | 3 | 3 | 6 | 4 | ... | 6 | 3 | 3 | 6 | 7 | 3 | 7 | 3 | 3 | NaN |
4 | 2 | 21 | 4 | 3 | 9 | 6 | 6 | 5 | 8 | 6 | ... | 9 | 5 | 4 | 8 | 9 | 1 | 2 | 1 | 3 | NaN |
5 | 2 | 21 | 4 | 2 | 9 | 5 | 7 | 5 | 6 | 8 | ... | 6 | 6 | 5 | 9 | 6 | 6 | 6 | 9 | 1 | NaN |
6 | 2 | 21 | 2 | 5 | 5 | 7 | 5 | 5 | 5 | 7 | ... | 9 | 5 | 8 | 8 | 9 | 9 | 5 | 9 | 1 | NaN |
7 | 2 | 21 | 2 | 2 | 9 | 7 | 6 | 3 | 7 | 9 | ... | 7 | 5 | 5 | 9 | 9 | 3 | 5 | 2 | 3 | NaN |
8 | 2 | 20 | 2 | 4 | 7 | 7 | 6 | 8 | 7 | 6 | ... | 6 | 7 | 9 | 5 | 6 | 8 | 5 | 6 | 8 | NaN |
9 | 2 | 21 | 2 | 4 | 7 | 7 | 4 | 8 | 7 | 7 | ... | 7 | 7 | 6 | 8 | 6 | 3 | 3 | 7 | 3 | NaN |
10 | 2 | 22 | 4 | 4 | 5 | 5 | 9 | 5 | 8 | 8 | ... | 7 | 2 | 8 | 8 | 6 | 6 | 2 | 1 | 1 | NaN |
11 | 2 | 22 | 2 | 3 | 8 | 1 | 8 | 9 | 7 | 9 | ... | 9 | 7 | 6 | 6 | 7 | 8 | 6 | 4 | 5 | NaN |
12 | 2 | 21 | 4 | 3 | 9 | 4 | 7 | 5 | 8 | 4 | ... | 4 | 5 | 8 | 9 | 5 | 6 | 4 | 9 | 7 | NaN |
13 | 2 | 21 | 4 | 4 | 7 | 7 | 6 | 8 | 8 | 9 | ... | 9 | 7 | 5 | 9 | 7 | 7 | 6 | 8 | 8 | NaN |
14 | 2 | 20 | 4 | 2 | 9 | 6 | 4 | 6 | 9 | 9 | ... | 7 | 8 | 4 | 9 | 9 | 8 | 9 | 7 | 8 | NaN |
15 | 2 | 20 | 2 | 4 | 8 | 7 | 5 | 5 | 5 | 9 | ... | 4 | 6 | 5 | 8 | 6 | 9 | 4 | 9 | 5 | NaN |
16 | 2 | 21 | 4 | 2 | 7 | 4 | 7 | 4 | 7 | 3 | ... | 6 | 6 | 6 | 4 | 6 | 8 | 5 | 7 | 7 | NaN |
17 | 2 | 20 | 2 | 2 | 6 | 4 | 2 | 2 | 7 | 6 | ... | 6 | 6 | 5 | 7 | 5 | 9 | 6 | 9 | 6 | NaN |
18 | 2 | 20 | 3 | 2 | 8 | 7 | 5 | 5 | 6 | 8 | ... | 5 | 4 | 5 | 7 | 9 | 8 | 4 | 9 | 4 | NaN |
19 | 2 | 21 | 4 | 6 | 5 | 9 | 8 | 5 | 7 | 5 | ... | 6 | 7 | 7 | 8 | 7 | 9 | 5 | 8 | 5 | NaN |
20 | 2 | 20 | 4 | 2 | 6 | 6 | 6 | 8 | 8 | 6 | ... | 7 | 7 | 7 | 7 | 6 | 7 | 7 | 8 | 8 | NaN |
21 | 2 | 21 | 4 | 3 | 9 | 9 | 6 | 6 | 8 | 7 | ... | 6 | 5 | 9 | 9 | 9 | 9 | 7 | 4 | 8 | NaN |
22 | 2 | 21 | 1 | 4 | 7 | 9 | 9 | 7 | 9 | 8 | ... | 8 | 8 | 5 | 8 | 6 | 7 | 8 | 4 | 8 | NaN |
23 | 2 | 21 | 2 | 4 | 8 | 8 | 9 | 8 | 8 | 8 | ... | 8 | 8 | 4 | 9 | 8 | 9 | 5 | 5 | 8 | NaN |
24 | 2 | 20 | 4 | 4 | 7 | 7 | 6 | 5 | 4 | 7 | ... | 9 | 3 | 7 | 7 | 3 | 5 | 9 | 9 | 9 | NaN |
25 | 2 | 20 | 1 | 3 | 7 | 6 | 7 | 9 | 9 | 8 | ... | 8 | 8 | 5 | 8 | 4 | 8 | 9 | 3 | 9 | NaN |
26 | 2 | 21 | 4 | 4 | 7 | 5 | 8 | 7 | 9 | 9 | ... | 4 | 4 | 8 | 9 | 8 | 8 | 6 | 1 | 7 | NaN |
27 | 2 | 20 | 2 | 5 | 7 | 9 | 7 | 7 | 8 | 5 | ... | 8 | 8 | 9 | 8 | 9 | 6 | 3 | 1 | 7 | NaN |
28 | 2 | 21 | 2 | 4 | 7 | 6 | 1 | 7 | 8 | 8 | ... | 6 | 6 | 4 | 8 | 4 | 8 | 7 | 8 | 7 | NaN |
29 | 2 | 21 | 3 | 4 | 6 | 7 | 7 | 8 | 8 | 9 | ... | 9 | 8 | 7 | 9 | 7 | 7 | 5 | 5 | 3 | NaN |
30 | 2 | 21 | 4 | 4 | 7 | 7 | 7 | 7 | 9 | 9 | ... | 9 | 5 | 5 | 9 | 6 | 9 | 7 | 9 | 6 | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
51 | 1 | 19 | 2 | 6 | 7 | 8 | 6 | 3 | 5 | 6 | ... | 5 | 2 | 3 | 8 | 5 | 8 | 3 | 8 | 2 | NaN |
52 | 1 | 21 | 1 | 2 | 9 | 4 | 5 | 3 | 7 | 7 | ... | 6 | 2 | 4 | 9 | 8 | 7 | 5 | 5 | 9 | NaN |
53 | 1 | 20 | 1 | 5 | 9 | 8 | 7 | 6 | 9 | 8 | ... | 9 | 7 | 6 | 9 | 5 | 3 | 9 | 1 | 9 | NaN |
54 | 1 | 21 | 4 | 5 | 8 | 8 | 7 | 5 | 7 | 7 | ... | 4 | 4 | 4 | 7 | 6 | 3 | 4 | 6 | 5 | NaN |
55 | 1 | 19 | 4 | 4 | 7 | 7 | 8 | 7 | 6 | 9 | ... | 6 | 5 | 9 | 9 | 7 | 9 | 7 | 8 | 9 | NaN |
56 | 1 | 20 | 2 | 2 | 5 | 7 | 5 | 5 | 5 | 9 | ... | 6 | 5 | 5 | 9 | 9 | 8 | 5 | 9 | 8 | NaN |
57 | 1 | 22 | 1 | 3 | 9 | 9 | 9 | 9 | 9 | 6 | ... | 8 | 6 | 6 | 8 | 7 | 9 | 7 | 1 | 9 | NaN |
58 | 1 | 19 | 1 | 3 | 9 | 3 | 5 | 7 | 6 | 3 | ... | 1 | 6 | 3 | 8 | 2 | 7 | 1 | 1 | 3 | NaN |
59 | 1 | 20 | 2 | 3 | 7 | 7 | 7 | 4 | 6 | 6 | ... | 8 | 3 | 6 | 7 | 7 | 8 | 6 | 3 | 6 | NaN |
60 | 1 | 20 | 4 | 8 | 7 | 8 | 8 | 5 | 7 | 4 | ... | 6 | 4 | 5 | 5 | 8 | 7 | 6 | 5 | 6 | NaN |
61 | 1 | 18 | 2 | 4 | 6 | 4 | 3 | 5 | 9 | 6 | ... | 4 | 3 | 5 | 8 | 5 | 1 | 3 | 5 | 1 | NaN |
62 | 1 | 25 | 2 | 3 | 7 | 8 | 6 | 5 | 9 | 8 | ... | 5 | 5 | 5 | 9 | 7 | 6 | 6 | 4 | 5 | NaN |
63 | 1 | 19 | 2 | 2 | 6 | 6 | 7 | 7 | 7 | 9 | ... | 8 | 7 | 5 | 9 | 5 | 6 | 6 | 8 | 5 | NaN |
64 | 1 | 21 | 2 | 4 | 8 | 7 | 9 | 6 | 9 | 9 | ... | 4 | 5 | 9 | 9 | 3 | 6 | 5 | 9 | 1 | NaN |
65 | 1 | 21 | 2 | 6 | 7 | 8 | 5 | 4 | 6 | 5 | ... | 7 | 7 | 5 | 6 | 4 | 8 | 5 | 4 | 6 | NaN |
66 | 1 | 20 | 2 | 1 | 7 | 7 | 7 | 7 | 8 | 6 | ... | 4 | 6 | 5 | 7 | 7 | 9 | 5 | 5 | 6 | NaN |
67 | 1 | 22 | 2 | 5 | 7 | 7 | 5 | 4 | 8 | 8 | ... | 3 | 3 | 4 | 8 | 7 | 8 | 5 | 3 | 4 | NaN |
68 | 1 | 21 | 2 | 5 | 5 | 5 | 6 | 7 | 7 | 9 | ... | 5 | 9 | 6 | 9 | 6 | 9 | 7 | 8 | 5 | NaN |
69 | 1 | 21 | 3 | 1 | 8 | 7 | 5 | 5 | 7 | 7 | ... | 5 | 4 | 5 | 7 | 8 | 5 | 5 | 5 | 5 | NaN |
70 | 1 | 21 | 2 | 6 | 7 | 7 | 9 | 8 | 9 | 5 | ... | 9 | 8 | 6 | 9 | 8 | 9 | 7 | 7 | 7 | NaN |
71 | 1 | 22 | 4 | 4 | 7 | 7 | 6 | 6 | 8 | 9 | ... | 7 | 5 | 5 | 9 | 7 | 3 | 7 | 6 | 8 | NaN |
72 | 1 | 24 | 1 | 2 | 6 | 7 | 5 | 6 | 4 | 8 | ... | 2 | 6 | 8 | 4 | 7 | 8 | 3 | 2 | 6 | NaN |
73 | 1 | 20 | 2 | 2 | 6 | 6 | 5 | 8 | 8 | 7 | ... | 7 | 8 | 7 | 8 | 5 | 8 | 8 | 8 | 7 | NaN |
74 | 1 | 20 | 2 | 5 | 8 | 8 | 4 | 8 | 9 | 7 | ... | 4 | 8 | 8 | 9 | 8 | 9 | 6 | 9 | 4 | NaN |
75 | 1 | 21 | 2 | 6 | 4 | 6 | 2 | 3 | 6 | 6 | ... | 2 | 3 | 2 | 6 | 8 | 1 | 1 | 2 | 2 | NaN |
76 | 1 | 20 | 2 | 4 | 5 | 6 | 4 | 9 | 6 | 7 | ... | 5 | 5 | 5 | 7 | 7 | 4 | 4 | 4 | 4 | NaN |
77 | 1 | 20 | 3 | 6 | 7 | 5 | 6 | 6 | 8 | 9 | ... | 9 | 5 | 5 | 9 | 7 | 6 | 5 | 6 | 7 | NaN |
78 | 1 | 23 | 1 | 5 | 4 | 8 | 8 | 9 | 9 | 9 | ... | 8 | 9 | 5 | 9 | 9 | 9 | 4 | 9 | 9 | NaN |
79 | 1 | 23 | 4 | 7 | 9 | 7 | 7 | 5 | 4 | 6 | ... | 7 | 4 | 5 | 8 | 8 | 5 | 5 | 8 | 4 | NaN |
80 | 1 | 22 | 2 | 2 | 8 | 5 | 5 | 2 | 5 | 4 | ... | 5 | 5 | 5 | 5 | 7 | 1 | 1 | 5 | 5 | NaN |
80 rows × 26 columns
うまく読み込めました。
次に、__「ワインの品質」__のデータを読み込んでみましょう。 (詳細)
# ウェブ上のリソースを指定する
url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
# 指定したURLからリソースをダウンロードし、名前をつける。
# urllib.urlretrieve(url, 'winequality-red.csv') # Python 2 の場合
urllib.request.urlretrieve(url, 'winequality-red.csv') # Python 3 の場合
('winequality-red.csv', <http.client.HTTPMessage at 0x10655de80>)
# データの読み込み
df3 = pd.read_csv('winequality-red.csv', sep='\t', index_col=0)
df3
fixed acidity;"volatile acidity";"citric acid";"residual sugar";"chlorides";"free sulfur dioxide";"total sulfur dioxide";"density";"pH";"sulphates";"alcohol";"quality" |
---|
7.4;0.7;0;1.9;0.076;11;34;0.9978;3.51;0.56;9.4;5 |
7.8;0.88;0;2.6;0.098;25;67;0.9968;3.2;0.68;9.8;5 |
7.8;0.76;0.04;2.3;0.092;15;54;0.997;3.26;0.65;9.8;5 |
11.2;0.28;0.56;1.9;0.075;17;60;0.998;3.16;0.58;9.8;6 |
7.4;0.7;0;1.9;0.076;11;34;0.9978;3.51;0.56;9.4;5 |
7.4;0.66;0;1.8;0.075;13;40;0.9978;3.51;0.56;9.4;5 |
7.9;0.6;0.06;1.6;0.069;15;59;0.9964;3.3;0.46;9.4;5 |
7.3;0.65;0;1.2;0.065;15;21;0.9946;3.39;0.47;10;7 |
7.8;0.58;0.02;2;0.073;9;18;0.9968;3.36;0.57;9.5;7 |
7.5;0.5;0.36;6.1;0.071;17;102;0.9978;3.35;0.8;10.5;5 |
6.7;0.58;0.08;1.8;0.097;15;65;0.9959;3.28;0.54;9.2;5 |
7.5;0.5;0.36;6.1;0.071;17;102;0.9978;3.35;0.8;10.5;5 |
5.6;0.615;0;1.6;0.089;16;59;0.9943;3.58;0.52;9.9;5 |
7.8;0.61;0.29;1.6;0.114;9;29;0.9974;3.26;1.56;9.1;5 |
8.9;0.62;0.18;3.8;0.176;52;145;0.9986;3.16;0.88;9.2;5 |
8.9;0.62;0.19;3.9;0.17;51;148;0.9986;3.17;0.93;9.2;5 |
8.5;0.28;0.56;1.8;0.092;35;103;0.9969;3.3;0.75;10.5;7 |
8.1;0.56;0.28;1.7;0.368;16;56;0.9968;3.11;1.28;9.3;5 |
7.4;0.59;0.08;4.4;0.086;6;29;0.9974;3.38;0.5;9;4 |
7.9;0.32;0.51;1.8;0.341;17;56;0.9969;3.04;1.08;9.2;6 |
8.9;0.22;0.48;1.8;0.077;29;60;0.9968;3.39;0.53;9.4;6 |
7.6;0.39;0.31;2.3;0.082;23;71;0.9982;3.52;0.65;9.7;5 |
7.9;0.43;0.21;1.6;0.106;10;37;0.9966;3.17;0.91;9.5;5 |
8.5;0.49;0.11;2.3;0.084;9;67;0.9968;3.17;0.53;9.4;5 |
6.9;0.4;0.14;2.4;0.085;21;40;0.9968;3.43;0.63;9.7;6 |
6.3;0.39;0.16;1.4;0.08;11;23;0.9955;3.34;0.56;9.3;5 |
7.6;0.41;0.24;1.8;0.08;4;11;0.9962;3.28;0.59;9.5;5 |
7.9;0.43;0.21;1.6;0.106;10;37;0.9966;3.17;0.91;9.5;5 |
7.1;0.71;0;1.9;0.08;14;35;0.9972;3.47;0.55;9.4;5 |
7.8;0.645;0;2;0.082;8;16;0.9964;3.38;0.59;9.8;6 |
... |
6.2;0.51;0.14;1.9;0.056;15;34;0.99396;3.48;0.57;11.5;6 |
6.4;0.36;0.53;2.2;0.23;19;35;0.9934;3.37;0.93;12.4;6 |
6.4;0.38;0.14;2.2;0.038;15;25;0.99514;3.44;0.65;11.1;6 |
7.3;0.69;0.32;2.2;0.069;35;104;0.99632;3.33;0.51;9.5;5 |
6;0.58;0.2;2.4;0.075;15;50;0.99467;3.58;0.67;12.5;6 |
5.6;0.31;0.78;13.9;0.074;23;92;0.99677;3.39;0.48;10.5;6 |
7.5;0.52;0.4;2.2;0.06;12;20;0.99474;3.26;0.64;11.8;6 |
8;0.3;0.63;1.6;0.081;16;29;0.99588;3.3;0.78;10.8;6 |
6.2;0.7;0.15;5.1;0.076;13;27;0.99622;3.54;0.6;11.9;6 |
6.8;0.67;0.15;1.8;0.118;13;20;0.9954;3.42;0.67;11.3;6 |
6.2;0.56;0.09;1.7;0.053;24;32;0.99402;3.54;0.6;11.3;5 |
7.4;0.35;0.33;2.4;0.068;9;26;0.9947;3.36;0.6;11.9;6 |
6.2;0.56;0.09;1.7;0.053;24;32;0.99402;3.54;0.6;11.3;5 |
6.1;0.715;0.1;2.6;0.053;13;27;0.99362;3.57;0.5;11.9;5 |
6.2;0.46;0.29;2.1;0.074;32;98;0.99578;3.33;0.62;9.8;5 |
6.7;0.32;0.44;2.4;0.061;24;34;0.99484;3.29;0.8;11.6;7 |
7.2;0.39;0.44;2.6;0.066;22;48;0.99494;3.3;0.84;11.5;6 |
7.5;0.31;0.41;2.4;0.065;34;60;0.99492;3.34;0.85;11.4;6 |
5.8;0.61;0.11;1.8;0.066;18;28;0.99483;3.55;0.66;10.9;6 |
7.2;0.66;0.33;2.5;0.068;34;102;0.99414;3.27;0.78;12.8;6 |
6.6;0.725;0.2;7.8;0.073;29;79;0.9977;3.29;0.54;9.2;5 |
6.3;0.55;0.15;1.8;0.077;26;35;0.99314;3.32;0.82;11.6;6 |
5.4;0.74;0.09;1.7;0.089;16;26;0.99402;3.67;0.56;11.6;6 |
6.3;0.51;0.13;2.3;0.076;29;40;0.99574;3.42;0.75;11;6 |
6.8;0.62;0.08;1.9;0.068;28;38;0.99651;3.42;0.82;9.5;6 |
6.2;0.6;0.08;2;0.09;32;44;0.9949;3.45;0.58;10.5;5 |
5.9;0.55;0.1;2.2;0.062;39;51;0.99512;3.52;0.76;11.2;6 |
6.3;0.51;0.13;2.3;0.076;29;40;0.99574;3.42;0.75;11;6 |
5.9;0.645;0.12;2;0.075;32;44;0.99547;3.57;0.71;10.2;5 |
6;0.31;0.47;3.6;0.067;18;42;0.99549;3.39;0.66;11;6 |
1599 rows × 0 columns
上の方法では、「ワインの品質」のデータがうまく読み込めていません。原因は、実際のデータ区切り文字が「;」(セミコロン)なのに、データの読み込み時に「sep='\t'」(データ区切り文字はタブ)と指定したからです。では改めて、データ区切り文字にセミコロンを指定して読み込んでみましょう。
# データの読み込み
df3 = pd.read_csv('winequality-red.csv', sep=';', index_col=0)
df3
volatile acidity | citric acid | residual sugar | chlorides | free sulfur dioxide | total sulfur dioxide | density | pH | sulphates | alcohol | quality | |
---|---|---|---|---|---|---|---|---|---|---|---|
fixed acidity | |||||||||||
7.4 | 0.700 | 0.00 | 1.9 | 0.076 | 11 | 34 | 0.99780 | 3.51 | 0.56 | 9.4 | 5 |
7.8 | 0.880 | 0.00 | 2.6 | 0.098 | 25 | 67 | 0.99680 | 3.20 | 0.68 | 9.8 | 5 |
7.8 | 0.760 | 0.04 | 2.3 | 0.092 | 15 | 54 | 0.99700 | 3.26 | 0.65 | 9.8 | 5 |
11.2 | 0.280 | 0.56 | 1.9 | 0.075 | 17 | 60 | 0.99800 | 3.16 | 0.58 | 9.8 | 6 |
7.4 | 0.700 | 0.00 | 1.9 | 0.076 | 11 | 34 | 0.99780 | 3.51 | 0.56 | 9.4 | 5 |
7.4 | 0.660 | 0.00 | 1.8 | 0.075 | 13 | 40 | 0.99780 | 3.51 | 0.56 | 9.4 | 5 |
7.9 | 0.600 | 0.06 | 1.6 | 0.069 | 15 | 59 | 0.99640 | 3.30 | 0.46 | 9.4 | 5 |
7.3 | 0.650 | 0.00 | 1.2 | 0.065 | 15 | 21 | 0.99460 | 3.39 | 0.47 | 10.0 | 7 |
7.8 | 0.580 | 0.02 | 2.0 | 0.073 | 9 | 18 | 0.99680 | 3.36 | 0.57 | 9.5 | 7 |
7.5 | 0.500 | 0.36 | 6.1 | 0.071 | 17 | 102 | 0.99780 | 3.35 | 0.80 | 10.5 | 5 |
6.7 | 0.580 | 0.08 | 1.8 | 0.097 | 15 | 65 | 0.99590 | 3.28 | 0.54 | 9.2 | 5 |
7.5 | 0.500 | 0.36 | 6.1 | 0.071 | 17 | 102 | 0.99780 | 3.35 | 0.80 | 10.5 | 5 |
5.6 | 0.615 | 0.00 | 1.6 | 0.089 | 16 | 59 | 0.99430 | 3.58 | 0.52 | 9.9 | 5 |
7.8 | 0.610 | 0.29 | 1.6 | 0.114 | 9 | 29 | 0.99740 | 3.26 | 1.56 | 9.1 | 5 |
8.9 | 0.620 | 0.18 | 3.8 | 0.176 | 52 | 145 | 0.99860 | 3.16 | 0.88 | 9.2 | 5 |
8.9 | 0.620 | 0.19 | 3.9 | 0.170 | 51 | 148 | 0.99860 | 3.17 | 0.93 | 9.2 | 5 |
8.5 | 0.280 | 0.56 | 1.8 | 0.092 | 35 | 103 | 0.99690 | 3.30 | 0.75 | 10.5 | 7 |
8.1 | 0.560 | 0.28 | 1.7 | 0.368 | 16 | 56 | 0.99680 | 3.11 | 1.28 | 9.3 | 5 |
7.4 | 0.590 | 0.08 | 4.4 | 0.086 | 6 | 29 | 0.99740 | 3.38 | 0.50 | 9.0 | 4 |
7.9 | 0.320 | 0.51 | 1.8 | 0.341 | 17 | 56 | 0.99690 | 3.04 | 1.08 | 9.2 | 6 |
8.9 | 0.220 | 0.48 | 1.8 | 0.077 | 29 | 60 | 0.99680 | 3.39 | 0.53 | 9.4 | 6 |
7.6 | 0.390 | 0.31 | 2.3 | 0.082 | 23 | 71 | 0.99820 | 3.52 | 0.65 | 9.7 | 5 |
7.9 | 0.430 | 0.21 | 1.6 | 0.106 | 10 | 37 | 0.99660 | 3.17 | 0.91 | 9.5 | 5 |
8.5 | 0.490 | 0.11 | 2.3 | 0.084 | 9 | 67 | 0.99680 | 3.17 | 0.53 | 9.4 | 5 |
6.9 | 0.400 | 0.14 | 2.4 | 0.085 | 21 | 40 | 0.99680 | 3.43 | 0.63 | 9.7 | 6 |
6.3 | 0.390 | 0.16 | 1.4 | 0.080 | 11 | 23 | 0.99550 | 3.34 | 0.56 | 9.3 | 5 |
7.6 | 0.410 | 0.24 | 1.8 | 0.080 | 4 | 11 | 0.99620 | 3.28 | 0.59 | 9.5 | 5 |
7.9 | 0.430 | 0.21 | 1.6 | 0.106 | 10 | 37 | 0.99660 | 3.17 | 0.91 | 9.5 | 5 |
7.1 | 0.710 | 0.00 | 1.9 | 0.080 | 14 | 35 | 0.99720 | 3.47 | 0.55 | 9.4 | 5 |
7.8 | 0.645 | 0.00 | 2.0 | 0.082 | 8 | 16 | 0.99640 | 3.38 | 0.59 | 9.8 | 6 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
6.2 | 0.510 | 0.14 | 1.9 | 0.056 | 15 | 34 | 0.99396 | 3.48 | 0.57 | 11.5 | 6 |
6.4 | 0.360 | 0.53 | 2.2 | 0.230 | 19 | 35 | 0.99340 | 3.37 | 0.93 | 12.4 | 6 |
6.4 | 0.380 | 0.14 | 2.2 | 0.038 | 15 | 25 | 0.99514 | 3.44 | 0.65 | 11.1 | 6 |
7.3 | 0.690 | 0.32 | 2.2 | 0.069 | 35 | 104 | 0.99632 | 3.33 | 0.51 | 9.5 | 5 |
6.0 | 0.580 | 0.20 | 2.4 | 0.075 | 15 | 50 | 0.99467 | 3.58 | 0.67 | 12.5 | 6 |
5.6 | 0.310 | 0.78 | 13.9 | 0.074 | 23 | 92 | 0.99677 | 3.39 | 0.48 | 10.5 | 6 |
7.5 | 0.520 | 0.40 | 2.2 | 0.060 | 12 | 20 | 0.99474 | 3.26 | 0.64 | 11.8 | 6 |
8.0 | 0.300 | 0.63 | 1.6 | 0.081 | 16 | 29 | 0.99588 | 3.30 | 0.78 | 10.8 | 6 |
6.2 | 0.700 | 0.15 | 5.1 | 0.076 | 13 | 27 | 0.99622 | 3.54 | 0.60 | 11.9 | 6 |
6.8 | 0.670 | 0.15 | 1.8 | 0.118 | 13 | 20 | 0.99540 | 3.42 | 0.67 | 11.3 | 6 |
6.2 | 0.560 | 0.09 | 1.7 | 0.053 | 24 | 32 | 0.99402 | 3.54 | 0.60 | 11.3 | 5 |
7.4 | 0.350 | 0.33 | 2.4 | 0.068 | 9 | 26 | 0.99470 | 3.36 | 0.60 | 11.9 | 6 |
6.2 | 0.560 | 0.09 | 1.7 | 0.053 | 24 | 32 | 0.99402 | 3.54 | 0.60 | 11.3 | 5 |
6.1 | 0.715 | 0.10 | 2.6 | 0.053 | 13 | 27 | 0.99362 | 3.57 | 0.50 | 11.9 | 5 |
6.2 | 0.460 | 0.29 | 2.1 | 0.074 | 32 | 98 | 0.99578 | 3.33 | 0.62 | 9.8 | 5 |
6.7 | 0.320 | 0.44 | 2.4 | 0.061 | 24 | 34 | 0.99484 | 3.29 | 0.80 | 11.6 | 7 |
7.2 | 0.390 | 0.44 | 2.6 | 0.066 | 22 | 48 | 0.99494 | 3.30 | 0.84 | 11.5 | 6 |
7.5 | 0.310 | 0.41 | 2.4 | 0.065 | 34 | 60 | 0.99492 | 3.34 | 0.85 | 11.4 | 6 |
5.8 | 0.610 | 0.11 | 1.8 | 0.066 | 18 | 28 | 0.99483 | 3.55 | 0.66 | 10.9 | 6 |
7.2 | 0.660 | 0.33 | 2.5 | 0.068 | 34 | 102 | 0.99414 | 3.27 | 0.78 | 12.8 | 6 |
6.6 | 0.725 | 0.20 | 7.8 | 0.073 | 29 | 79 | 0.99770 | 3.29 | 0.54 | 9.2 | 5 |
6.3 | 0.550 | 0.15 | 1.8 | 0.077 | 26 | 35 | 0.99314 | 3.32 | 0.82 | 11.6 | 6 |
5.4 | 0.740 | 0.09 | 1.7 | 0.089 | 16 | 26 | 0.99402 | 3.67 | 0.56 | 11.6 | 6 |
6.3 | 0.510 | 0.13 | 2.3 | 0.076 | 29 | 40 | 0.99574 | 3.42 | 0.75 | 11.0 | 6 |
6.8 | 0.620 | 0.08 | 1.9 | 0.068 | 28 | 38 | 0.99651 | 3.42 | 0.82 | 9.5 | 6 |
6.2 | 0.600 | 0.08 | 2.0 | 0.090 | 32 | 44 | 0.99490 | 3.45 | 0.58 | 10.5 | 5 |
5.9 | 0.550 | 0.10 | 2.2 | 0.062 | 39 | 51 | 0.99512 | 3.52 | 0.76 | 11.2 | 6 |
6.3 | 0.510 | 0.13 | 2.3 | 0.076 | 29 | 40 | 0.99574 | 3.42 | 0.75 | 11.0 | 6 |
5.9 | 0.645 | 0.12 | 2.0 | 0.075 | 32 | 44 | 0.99547 | 3.57 | 0.71 | 10.2 | 5 |
6.0 | 0.310 | 0.47 | 3.6 | 0.067 | 18 | 42 | 0.99549 | 3.39 | 0.66 | 11.0 | 6 |
1599 rows × 11 columns
うまく読み込めたように見えるかもしれませんが、不十分です。第1列目(いちばん左)のデータが、インデックス番号として取り扱われています。このデータにはインデックス番号が指定されていませんので、次のようにして読み込みましょう。
# データの読み込み
df3 = pd.read_csv('winequality-red.csv', sep=';')
df3
fixed acidity | volatile acidity | citric acid | residual sugar | chlorides | free sulfur dioxide | total sulfur dioxide | density | pH | sulphates | alcohol | quality | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 7.4 | 0.700 | 0.00 | 1.9 | 0.076 | 11 | 34 | 0.99780 | 3.51 | 0.56 | 9.4 | 5 |
1 | 7.8 | 0.880 | 0.00 | 2.6 | 0.098 | 25 | 67 | 0.99680 | 3.20 | 0.68 | 9.8 | 5 |
2 | 7.8 | 0.760 | 0.04 | 2.3 | 0.092 | 15 | 54 | 0.99700 | 3.26 | 0.65 | 9.8 | 5 |
3 | 11.2 | 0.280 | 0.56 | 1.9 | 0.075 | 17 | 60 | 0.99800 | 3.16 | 0.58 | 9.8 | 6 |
4 | 7.4 | 0.700 | 0.00 | 1.9 | 0.076 | 11 | 34 | 0.99780 | 3.51 | 0.56 | 9.4 | 5 |
5 | 7.4 | 0.660 | 0.00 | 1.8 | 0.075 | 13 | 40 | 0.99780 | 3.51 | 0.56 | 9.4 | 5 |
6 | 7.9 | 0.600 | 0.06 | 1.6 | 0.069 | 15 | 59 | 0.99640 | 3.30 | 0.46 | 9.4 | 5 |
7 | 7.3 | 0.650 | 0.00 | 1.2 | 0.065 | 15 | 21 | 0.99460 | 3.39 | 0.47 | 10.0 | 7 |
8 | 7.8 | 0.580 | 0.02 | 2.0 | 0.073 | 9 | 18 | 0.99680 | 3.36 | 0.57 | 9.5 | 7 |
9 | 7.5 | 0.500 | 0.36 | 6.1 | 0.071 | 17 | 102 | 0.99780 | 3.35 | 0.80 | 10.5 | 5 |
10 | 6.7 | 0.580 | 0.08 | 1.8 | 0.097 | 15 | 65 | 0.99590 | 3.28 | 0.54 | 9.2 | 5 |
11 | 7.5 | 0.500 | 0.36 | 6.1 | 0.071 | 17 | 102 | 0.99780 | 3.35 | 0.80 | 10.5 | 5 |
12 | 5.6 | 0.615 | 0.00 | 1.6 | 0.089 | 16 | 59 | 0.99430 | 3.58 | 0.52 | 9.9 | 5 |
13 | 7.8 | 0.610 | 0.29 | 1.6 | 0.114 | 9 | 29 | 0.99740 | 3.26 | 1.56 | 9.1 | 5 |
14 | 8.9 | 0.620 | 0.18 | 3.8 | 0.176 | 52 | 145 | 0.99860 | 3.16 | 0.88 | 9.2 | 5 |
15 | 8.9 | 0.620 | 0.19 | 3.9 | 0.170 | 51 | 148 | 0.99860 | 3.17 | 0.93 | 9.2 | 5 |
16 | 8.5 | 0.280 | 0.56 | 1.8 | 0.092 | 35 | 103 | 0.99690 | 3.30 | 0.75 | 10.5 | 7 |
17 | 8.1 | 0.560 | 0.28 | 1.7 | 0.368 | 16 | 56 | 0.99680 | 3.11 | 1.28 | 9.3 | 5 |
18 | 7.4 | 0.590 | 0.08 | 4.4 | 0.086 | 6 | 29 | 0.99740 | 3.38 | 0.50 | 9.0 | 4 |
19 | 7.9 | 0.320 | 0.51 | 1.8 | 0.341 | 17 | 56 | 0.99690 | 3.04 | 1.08 | 9.2 | 6 |
20 | 8.9 | 0.220 | 0.48 | 1.8 | 0.077 | 29 | 60 | 0.99680 | 3.39 | 0.53 | 9.4 | 6 |
21 | 7.6 | 0.390 | 0.31 | 2.3 | 0.082 | 23 | 71 | 0.99820 | 3.52 | 0.65 | 9.7 | 5 |
22 | 7.9 | 0.430 | 0.21 | 1.6 | 0.106 | 10 | 37 | 0.99660 | 3.17 | 0.91 | 9.5 | 5 |
23 | 8.5 | 0.490 | 0.11 | 2.3 | 0.084 | 9 | 67 | 0.99680 | 3.17 | 0.53 | 9.4 | 5 |
24 | 6.9 | 0.400 | 0.14 | 2.4 | 0.085 | 21 | 40 | 0.99680 | 3.43 | 0.63 | 9.7 | 6 |
25 | 6.3 | 0.390 | 0.16 | 1.4 | 0.080 | 11 | 23 | 0.99550 | 3.34 | 0.56 | 9.3 | 5 |
26 | 7.6 | 0.410 | 0.24 | 1.8 | 0.080 | 4 | 11 | 0.99620 | 3.28 | 0.59 | 9.5 | 5 |
27 | 7.9 | 0.430 | 0.21 | 1.6 | 0.106 | 10 | 37 | 0.99660 | 3.17 | 0.91 | 9.5 | 5 |
28 | 7.1 | 0.710 | 0.00 | 1.9 | 0.080 | 14 | 35 | 0.99720 | 3.47 | 0.55 | 9.4 | 5 |
29 | 7.8 | 0.645 | 0.00 | 2.0 | 0.082 | 8 | 16 | 0.99640 | 3.38 | 0.59 | 9.8 | 6 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
1569 | 6.2 | 0.510 | 0.14 | 1.9 | 0.056 | 15 | 34 | 0.99396 | 3.48 | 0.57 | 11.5 | 6 |
1570 | 6.4 | 0.360 | 0.53 | 2.2 | 0.230 | 19 | 35 | 0.99340 | 3.37 | 0.93 | 12.4 | 6 |
1571 | 6.4 | 0.380 | 0.14 | 2.2 | 0.038 | 15 | 25 | 0.99514 | 3.44 | 0.65 | 11.1 | 6 |
1572 | 7.3 | 0.690 | 0.32 | 2.2 | 0.069 | 35 | 104 | 0.99632 | 3.33 | 0.51 | 9.5 | 5 |
1573 | 6.0 | 0.580 | 0.20 | 2.4 | 0.075 | 15 | 50 | 0.99467 | 3.58 | 0.67 | 12.5 | 6 |
1574 | 5.6 | 0.310 | 0.78 | 13.9 | 0.074 | 23 | 92 | 0.99677 | 3.39 | 0.48 | 10.5 | 6 |
1575 | 7.5 | 0.520 | 0.40 | 2.2 | 0.060 | 12 | 20 | 0.99474 | 3.26 | 0.64 | 11.8 | 6 |
1576 | 8.0 | 0.300 | 0.63 | 1.6 | 0.081 | 16 | 29 | 0.99588 | 3.30 | 0.78 | 10.8 | 6 |
1577 | 6.2 | 0.700 | 0.15 | 5.1 | 0.076 | 13 | 27 | 0.99622 | 3.54 | 0.60 | 11.9 | 6 |
1578 | 6.8 | 0.670 | 0.15 | 1.8 | 0.118 | 13 | 20 | 0.99540 | 3.42 | 0.67 | 11.3 | 6 |
1579 | 6.2 | 0.560 | 0.09 | 1.7 | 0.053 | 24 | 32 | 0.99402 | 3.54 | 0.60 | 11.3 | 5 |
1580 | 7.4 | 0.350 | 0.33 | 2.4 | 0.068 | 9 | 26 | 0.99470 | 3.36 | 0.60 | 11.9 | 6 |
1581 | 6.2 | 0.560 | 0.09 | 1.7 | 0.053 | 24 | 32 | 0.99402 | 3.54 | 0.60 | 11.3 | 5 |
1582 | 6.1 | 0.715 | 0.10 | 2.6 | 0.053 | 13 | 27 | 0.99362 | 3.57 | 0.50 | 11.9 | 5 |
1583 | 6.2 | 0.460 | 0.29 | 2.1 | 0.074 | 32 | 98 | 0.99578 | 3.33 | 0.62 | 9.8 | 5 |
1584 | 6.7 | 0.320 | 0.44 | 2.4 | 0.061 | 24 | 34 | 0.99484 | 3.29 | 0.80 | 11.6 | 7 |
1585 | 7.2 | 0.390 | 0.44 | 2.6 | 0.066 | 22 | 48 | 0.99494 | 3.30 | 0.84 | 11.5 | 6 |
1586 | 7.5 | 0.310 | 0.41 | 2.4 | 0.065 | 34 | 60 | 0.99492 | 3.34 | 0.85 | 11.4 | 6 |
1587 | 5.8 | 0.610 | 0.11 | 1.8 | 0.066 | 18 | 28 | 0.99483 | 3.55 | 0.66 | 10.9 | 6 |
1588 | 7.2 | 0.660 | 0.33 | 2.5 | 0.068 | 34 | 102 | 0.99414 | 3.27 | 0.78 | 12.8 | 6 |
1589 | 6.6 | 0.725 | 0.20 | 7.8 | 0.073 | 29 | 79 | 0.99770 | 3.29 | 0.54 | 9.2 | 5 |
1590 | 6.3 | 0.550 | 0.15 | 1.8 | 0.077 | 26 | 35 | 0.99314 | 3.32 | 0.82 | 11.6 | 6 |
1591 | 5.4 | 0.740 | 0.09 | 1.7 | 0.089 | 16 | 26 | 0.99402 | 3.67 | 0.56 | 11.6 | 6 |
1592 | 6.3 | 0.510 | 0.13 | 2.3 | 0.076 | 29 | 40 | 0.99574 | 3.42 | 0.75 | 11.0 | 6 |
1593 | 6.8 | 0.620 | 0.08 | 1.9 | 0.068 | 28 | 38 | 0.99651 | 3.42 | 0.82 | 9.5 | 6 |
1594 | 6.2 | 0.600 | 0.08 | 2.0 | 0.090 | 32 | 44 | 0.99490 | 3.45 | 0.58 | 10.5 | 5 |
1595 | 5.9 | 0.550 | 0.10 | 2.2 | 0.062 | 39 | 51 | 0.99512 | 3.52 | 0.76 | 11.2 | 6 |
1596 | 6.3 | 0.510 | 0.13 | 2.3 | 0.076 | 29 | 40 | 0.99574 | 3.42 | 0.75 | 11.0 | 6 |
1597 | 5.9 | 0.645 | 0.12 | 2.0 | 0.075 | 32 | 44 | 0.99547 | 3.57 | 0.71 | 10.2 | 5 |
1598 | 6.0 | 0.310 | 0.47 | 3.6 | 0.067 | 18 | 42 | 0.99549 | 3.39 | 0.66 | 11.0 | 6 |
1599 rows × 12 columns
うまく読み込めました。
次は、__「あわびのデータ」__のデータを読み込んでみましょう。 (詳細)
# ウェブ上のリソースを指定する
url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data'
# 指定したURLからリソースをダウンロードし、名前をつける。
# urllib.urlretrieve(url, 'abalone.data') # Python 2 の場合
urllib.request.urlretrieve(url, 'abalone.data') # Python 3 の場合
('abalone.data', <http.client.HTTPMessage at 0x10655a780>)
# データの読み込み
df4 = pd.read_csv('abalone.data', sep='\t', index_col=0)
df4
M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15 |
---|
M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7 |
F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9 |
M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10 |
I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7 |
I,0.425,0.3,0.095,0.3515,0.141,0.0775,0.12,8 |
F,0.53,0.415,0.15,0.7775,0.237,0.1415,0.33,20 |
F,0.545,0.425,0.125,0.768,0.294,0.1495,0.26,16 |
M,0.475,0.37,0.125,0.5095,0.2165,0.1125,0.165,9 |
F,0.55,0.44,0.15,0.8945,0.3145,0.151,0.32,19 |
F,0.525,0.38,0.14,0.6065,0.194,0.1475,0.21,14 |
M,0.43,0.35,0.11,0.406,0.1675,0.081,0.135,10 |
M,0.49,0.38,0.135,0.5415,0.2175,0.095,0.19,11 |
F,0.535,0.405,0.145,0.6845,0.2725,0.171,0.205,10 |
F,0.47,0.355,0.1,0.4755,0.1675,0.0805,0.185,10 |
M,0.5,0.4,0.13,0.6645,0.258,0.133,0.24,12 |
I,0.355,0.28,0.085,0.2905,0.095,0.0395,0.115,7 |
F,0.44,0.34,0.1,0.451,0.188,0.087,0.13,10 |
M,0.365,0.295,0.08,0.2555,0.097,0.043,0.1,7 |
M,0.45,0.32,0.1,0.381,0.1705,0.075,0.115,9 |
M,0.355,0.28,0.095,0.2455,0.0955,0.062,0.075,11 |
I,0.38,0.275,0.1,0.2255,0.08,0.049,0.085,10 |
F,0.565,0.44,0.155,0.9395,0.4275,0.214,0.27,12 |
F,0.55,0.415,0.135,0.7635,0.318,0.21,0.2,9 |
F,0.615,0.48,0.165,1.1615,0.513,0.301,0.305,10 |
F,0.56,0.44,0.14,0.9285,0.3825,0.188,0.3,11 |
F,0.58,0.45,0.185,0.9955,0.3945,0.272,0.285,11 |
M,0.59,0.445,0.14,0.931,0.356,0.234,0.28,12 |
M,0.605,0.475,0.18,0.9365,0.394,0.219,0.295,15 |
M,0.575,0.425,0.14,0.8635,0.393,0.227,0.2,11 |
M,0.58,0.47,0.165,0.9975,0.3935,0.242,0.33,10 |
... |
M,0.695,0.55,0.195,1.6645,0.727,0.36,0.445,11 |
M,0.77,0.605,0.175,2.0505,0.8005,0.526,0.355,11 |
I,0.28,0.215,0.07,0.124,0.063,0.0215,0.03,6 |
I,0.33,0.23,0.08,0.14,0.0565,0.0365,0.046,7 |
I,0.35,0.25,0.075,0.1695,0.0835,0.0355,0.041,6 |
I,0.37,0.28,0.09,0.218,0.0995,0.0545,0.0615,7 |
I,0.43,0.315,0.115,0.384,0.1885,0.0715,0.11,8 |
I,0.435,0.33,0.095,0.393,0.219,0.075,0.0885,6 |
I,0.44,0.35,0.11,0.3805,0.1575,0.0895,0.115,6 |
M,0.475,0.37,0.11,0.4895,0.2185,0.107,0.146,8 |
M,0.475,0.36,0.14,0.5135,0.241,0.1045,0.155,8 |
I,0.48,0.355,0.11,0.4495,0.201,0.089,0.14,8 |
F,0.56,0.44,0.135,0.8025,0.35,0.1615,0.259,9 |
F,0.585,0.475,0.165,1.053,0.458,0.217,0.3,11 |
F,0.585,0.455,0.17,0.9945,0.4255,0.263,0.2845,11 |
M,0.385,0.255,0.1,0.3175,0.137,0.068,0.092,8 |
I,0.39,0.31,0.085,0.344,0.181,0.0695,0.079,7 |
I,0.39,0.29,0.1,0.2845,0.1255,0.0635,0.081,7 |
I,0.405,0.3,0.085,0.3035,0.15,0.0505,0.088,7 |
I,0.475,0.365,0.115,0.499,0.232,0.0885,0.156,10 |
M,0.5,0.38,0.125,0.577,0.269,0.1265,0.1535,9 |
F,0.515,0.4,0.125,0.615,0.2865,0.123,0.1765,8 |
M,0.52,0.385,0.165,0.791,0.375,0.18,0.1815,10 |
M,0.55,0.43,0.13,0.8395,0.3155,0.1955,0.2405,10 |
M,0.56,0.43,0.155,0.8675,0.4,0.172,0.229,8 |
F,0.565,0.45,0.165,0.887,0.37,0.239,0.249,11 |
M,0.59,0.44,0.135,0.966,0.439,0.2145,0.2605,10 |
M,0.6,0.475,0.205,1.176,0.5255,0.2875,0.308,9 |
F,0.625,0.485,0.15,1.0945,0.531,0.261,0.296,10 |
M,0.71,0.555,0.195,1.9485,0.9455,0.3765,0.495,12 |
4176 rows × 0 columns
上の方法では、「あわびのデータ」がうまく読み込めていません。原因は、実際のデータ区切り文字が「,」(コンマ)なのに、データの読み込み時に「sep='\t'」(データ区切り文字はタブ)と指定したからです。では改めて、データ区切り文字にコンマを指定して読み込んでみましょう。
# データの読み込み
df4 = pd.read_csv('abalone.data', sep=',', index_col=0)
df4
0.455 | 0.365 | 0.095 | 0.514 | 0.2245 | 0.101 | 0.15 | 15 | |
---|---|---|---|---|---|---|---|---|
M | ||||||||
M | 0.350 | 0.265 | 0.090 | 0.2255 | 0.0995 | 0.0485 | 0.0700 | 7 |
F | 0.530 | 0.420 | 0.135 | 0.6770 | 0.2565 | 0.1415 | 0.2100 | 9 |
M | 0.440 | 0.365 | 0.125 | 0.5160 | 0.2155 | 0.1140 | 0.1550 | 10 |
I | 0.330 | 0.255 | 0.080 | 0.2050 | 0.0895 | 0.0395 | 0.0550 | 7 |
I | 0.425 | 0.300 | 0.095 | 0.3515 | 0.1410 | 0.0775 | 0.1200 | 8 |
F | 0.530 | 0.415 | 0.150 | 0.7775 | 0.2370 | 0.1415 | 0.3300 | 20 |
F | 0.545 | 0.425 | 0.125 | 0.7680 | 0.2940 | 0.1495 | 0.2600 | 16 |
M | 0.475 | 0.370 | 0.125 | 0.5095 | 0.2165 | 0.1125 | 0.1650 | 9 |
F | 0.550 | 0.440 | 0.150 | 0.8945 | 0.3145 | 0.1510 | 0.3200 | 19 |
F | 0.525 | 0.380 | 0.140 | 0.6065 | 0.1940 | 0.1475 | 0.2100 | 14 |
M | 0.430 | 0.350 | 0.110 | 0.4060 | 0.1675 | 0.0810 | 0.1350 | 10 |
M | 0.490 | 0.380 | 0.135 | 0.5415 | 0.2175 | 0.0950 | 0.1900 | 11 |
F | 0.535 | 0.405 | 0.145 | 0.6845 | 0.2725 | 0.1710 | 0.2050 | 10 |
F | 0.470 | 0.355 | 0.100 | 0.4755 | 0.1675 | 0.0805 | 0.1850 | 10 |
M | 0.500 | 0.400 | 0.130 | 0.6645 | 0.2580 | 0.1330 | 0.2400 | 12 |
I | 0.355 | 0.280 | 0.085 | 0.2905 | 0.0950 | 0.0395 | 0.1150 | 7 |
F | 0.440 | 0.340 | 0.100 | 0.4510 | 0.1880 | 0.0870 | 0.1300 | 10 |
M | 0.365 | 0.295 | 0.080 | 0.2555 | 0.0970 | 0.0430 | 0.1000 | 7 |
M | 0.450 | 0.320 | 0.100 | 0.3810 | 0.1705 | 0.0750 | 0.1150 | 9 |
M | 0.355 | 0.280 | 0.095 | 0.2455 | 0.0955 | 0.0620 | 0.0750 | 11 |
I | 0.380 | 0.275 | 0.100 | 0.2255 | 0.0800 | 0.0490 | 0.0850 | 10 |
F | 0.565 | 0.440 | 0.155 | 0.9395 | 0.4275 | 0.2140 | 0.2700 | 12 |
F | 0.550 | 0.415 | 0.135 | 0.7635 | 0.3180 | 0.2100 | 0.2000 | 9 |
F | 0.615 | 0.480 | 0.165 | 1.1615 | 0.5130 | 0.3010 | 0.3050 | 10 |
F | 0.560 | 0.440 | 0.140 | 0.9285 | 0.3825 | 0.1880 | 0.3000 | 11 |
F | 0.580 | 0.450 | 0.185 | 0.9955 | 0.3945 | 0.2720 | 0.2850 | 11 |
M | 0.590 | 0.445 | 0.140 | 0.9310 | 0.3560 | 0.2340 | 0.2800 | 12 |
M | 0.605 | 0.475 | 0.180 | 0.9365 | 0.3940 | 0.2190 | 0.2950 | 15 |
M | 0.575 | 0.425 | 0.140 | 0.8635 | 0.3930 | 0.2270 | 0.2000 | 11 |
M | 0.580 | 0.470 | 0.165 | 0.9975 | 0.3935 | 0.2420 | 0.3300 | 10 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
M | 0.695 | 0.550 | 0.195 | 1.6645 | 0.7270 | 0.3600 | 0.4450 | 11 |
M | 0.770 | 0.605 | 0.175 | 2.0505 | 0.8005 | 0.5260 | 0.3550 | 11 |
I | 0.280 | 0.215 | 0.070 | 0.1240 | 0.0630 | 0.0215 | 0.0300 | 6 |
I | 0.330 | 0.230 | 0.080 | 0.1400 | 0.0565 | 0.0365 | 0.0460 | 7 |
I | 0.350 | 0.250 | 0.075 | 0.1695 | 0.0835 | 0.0355 | 0.0410 | 6 |
I | 0.370 | 0.280 | 0.090 | 0.2180 | 0.0995 | 0.0545 | 0.0615 | 7 |
I | 0.430 | 0.315 | 0.115 | 0.3840 | 0.1885 | 0.0715 | 0.1100 | 8 |
I | 0.435 | 0.330 | 0.095 | 0.3930 | 0.2190 | 0.0750 | 0.0885 | 6 |
I | 0.440 | 0.350 | 0.110 | 0.3805 | 0.1575 | 0.0895 | 0.1150 | 6 |
M | 0.475 | 0.370 | 0.110 | 0.4895 | 0.2185 | 0.1070 | 0.1460 | 8 |
M | 0.475 | 0.360 | 0.140 | 0.5135 | 0.2410 | 0.1045 | 0.1550 | 8 |
I | 0.480 | 0.355 | 0.110 | 0.4495 | 0.2010 | 0.0890 | 0.1400 | 8 |
F | 0.560 | 0.440 | 0.135 | 0.8025 | 0.3500 | 0.1615 | 0.2590 | 9 |
F | 0.585 | 0.475 | 0.165 | 1.0530 | 0.4580 | 0.2170 | 0.3000 | 11 |
F | 0.585 | 0.455 | 0.170 | 0.9945 | 0.4255 | 0.2630 | 0.2845 | 11 |
M | 0.385 | 0.255 | 0.100 | 0.3175 | 0.1370 | 0.0680 | 0.0920 | 8 |
I | 0.390 | 0.310 | 0.085 | 0.3440 | 0.1810 | 0.0695 | 0.0790 | 7 |
I | 0.390 | 0.290 | 0.100 | 0.2845 | 0.1255 | 0.0635 | 0.0810 | 7 |
I | 0.405 | 0.300 | 0.085 | 0.3035 | 0.1500 | 0.0505 | 0.0880 | 7 |
I | 0.475 | 0.365 | 0.115 | 0.4990 | 0.2320 | 0.0885 | 0.1560 | 10 |
M | 0.500 | 0.380 | 0.125 | 0.5770 | 0.2690 | 0.1265 | 0.1535 | 9 |
F | 0.515 | 0.400 | 0.125 | 0.6150 | 0.2865 | 0.1230 | 0.1765 | 8 |
M | 0.520 | 0.385 | 0.165 | 0.7910 | 0.3750 | 0.1800 | 0.1815 | 10 |
M | 0.550 | 0.430 | 0.130 | 0.8395 | 0.3155 | 0.1955 | 0.2405 | 10 |
M | 0.560 | 0.430 | 0.155 | 0.8675 | 0.4000 | 0.1720 | 0.2290 | 8 |
F | 0.565 | 0.450 | 0.165 | 0.8870 | 0.3700 | 0.2390 | 0.2490 | 11 |
M | 0.590 | 0.440 | 0.135 | 0.9660 | 0.4390 | 0.2145 | 0.2605 | 10 |
M | 0.600 | 0.475 | 0.205 | 1.1760 | 0.5255 | 0.2875 | 0.3080 | 9 |
F | 0.625 | 0.485 | 0.150 | 1.0945 | 0.5310 | 0.2610 | 0.2960 | 10 |
M | 0.710 | 0.555 | 0.195 | 1.9485 | 0.9455 | 0.3765 | 0.4950 | 12 |
4176 rows × 8 columns
うまく読み込めたように見えるかもしれませんが、不十分です。第1列目(いちばん左)のデータが、インデックス番号として取り扱われています。このデータにはインデックス番号が指定されていませんので、次のようにして読み込みましょう。
# データの読み込み
df4 = pd.read_csv('abalone.data', sep=',')
df4
M | 0.455 | 0.365 | 0.095 | 0.514 | 0.2245 | 0.101 | 0.15 | 15 | |
---|---|---|---|---|---|---|---|---|---|
0 | M | 0.350 | 0.265 | 0.090 | 0.2255 | 0.0995 | 0.0485 | 0.0700 | 7 |
1 | F | 0.530 | 0.420 | 0.135 | 0.6770 | 0.2565 | 0.1415 | 0.2100 | 9 |
2 | M | 0.440 | 0.365 | 0.125 | 0.5160 | 0.2155 | 0.1140 | 0.1550 | 10 |
3 | I | 0.330 | 0.255 | 0.080 | 0.2050 | 0.0895 | 0.0395 | 0.0550 | 7 |
4 | I | 0.425 | 0.300 | 0.095 | 0.3515 | 0.1410 | 0.0775 | 0.1200 | 8 |
5 | F | 0.530 | 0.415 | 0.150 | 0.7775 | 0.2370 | 0.1415 | 0.3300 | 20 |
6 | F | 0.545 | 0.425 | 0.125 | 0.7680 | 0.2940 | 0.1495 | 0.2600 | 16 |
7 | M | 0.475 | 0.370 | 0.125 | 0.5095 | 0.2165 | 0.1125 | 0.1650 | 9 |
8 | F | 0.550 | 0.440 | 0.150 | 0.8945 | 0.3145 | 0.1510 | 0.3200 | 19 |
9 | F | 0.525 | 0.380 | 0.140 | 0.6065 | 0.1940 | 0.1475 | 0.2100 | 14 |
10 | M | 0.430 | 0.350 | 0.110 | 0.4060 | 0.1675 | 0.0810 | 0.1350 | 10 |
11 | M | 0.490 | 0.380 | 0.135 | 0.5415 | 0.2175 | 0.0950 | 0.1900 | 11 |
12 | F | 0.535 | 0.405 | 0.145 | 0.6845 | 0.2725 | 0.1710 | 0.2050 | 10 |
13 | F | 0.470 | 0.355 | 0.100 | 0.4755 | 0.1675 | 0.0805 | 0.1850 | 10 |
14 | M | 0.500 | 0.400 | 0.130 | 0.6645 | 0.2580 | 0.1330 | 0.2400 | 12 |
15 | I | 0.355 | 0.280 | 0.085 | 0.2905 | 0.0950 | 0.0395 | 0.1150 | 7 |
16 | F | 0.440 | 0.340 | 0.100 | 0.4510 | 0.1880 | 0.0870 | 0.1300 | 10 |
17 | M | 0.365 | 0.295 | 0.080 | 0.2555 | 0.0970 | 0.0430 | 0.1000 | 7 |
18 | M | 0.450 | 0.320 | 0.100 | 0.3810 | 0.1705 | 0.0750 | 0.1150 | 9 |
19 | M | 0.355 | 0.280 | 0.095 | 0.2455 | 0.0955 | 0.0620 | 0.0750 | 11 |
20 | I | 0.380 | 0.275 | 0.100 | 0.2255 | 0.0800 | 0.0490 | 0.0850 | 10 |
21 | F | 0.565 | 0.440 | 0.155 | 0.9395 | 0.4275 | 0.2140 | 0.2700 | 12 |
22 | F | 0.550 | 0.415 | 0.135 | 0.7635 | 0.3180 | 0.2100 | 0.2000 | 9 |
23 | F | 0.615 | 0.480 | 0.165 | 1.1615 | 0.5130 | 0.3010 | 0.3050 | 10 |
24 | F | 0.560 | 0.440 | 0.140 | 0.9285 | 0.3825 | 0.1880 | 0.3000 | 11 |
25 | F | 0.580 | 0.450 | 0.185 | 0.9955 | 0.3945 | 0.2720 | 0.2850 | 11 |
26 | M | 0.590 | 0.445 | 0.140 | 0.9310 | 0.3560 | 0.2340 | 0.2800 | 12 |
27 | M | 0.605 | 0.475 | 0.180 | 0.9365 | 0.3940 | 0.2190 | 0.2950 | 15 |
28 | M | 0.575 | 0.425 | 0.140 | 0.8635 | 0.3930 | 0.2270 | 0.2000 | 11 |
29 | M | 0.580 | 0.470 | 0.165 | 0.9975 | 0.3935 | 0.2420 | 0.3300 | 10 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
4146 | M | 0.695 | 0.550 | 0.195 | 1.6645 | 0.7270 | 0.3600 | 0.4450 | 11 |
4147 | M | 0.770 | 0.605 | 0.175 | 2.0505 | 0.8005 | 0.5260 | 0.3550 | 11 |
4148 | I | 0.280 | 0.215 | 0.070 | 0.1240 | 0.0630 | 0.0215 | 0.0300 | 6 |
4149 | I | 0.330 | 0.230 | 0.080 | 0.1400 | 0.0565 | 0.0365 | 0.0460 | 7 |
4150 | I | 0.350 | 0.250 | 0.075 | 0.1695 | 0.0835 | 0.0355 | 0.0410 | 6 |
4151 | I | 0.370 | 0.280 | 0.090 | 0.2180 | 0.0995 | 0.0545 | 0.0615 | 7 |
4152 | I | 0.430 | 0.315 | 0.115 | 0.3840 | 0.1885 | 0.0715 | 0.1100 | 8 |
4153 | I | 0.435 | 0.330 | 0.095 | 0.3930 | 0.2190 | 0.0750 | 0.0885 | 6 |
4154 | I | 0.440 | 0.350 | 0.110 | 0.3805 | 0.1575 | 0.0895 | 0.1150 | 6 |
4155 | M | 0.475 | 0.370 | 0.110 | 0.4895 | 0.2185 | 0.1070 | 0.1460 | 8 |
4156 | M | 0.475 | 0.360 | 0.140 | 0.5135 | 0.2410 | 0.1045 | 0.1550 | 8 |
4157 | I | 0.480 | 0.355 | 0.110 | 0.4495 | 0.2010 | 0.0890 | 0.1400 | 8 |
4158 | F | 0.560 | 0.440 | 0.135 | 0.8025 | 0.3500 | 0.1615 | 0.2590 | 9 |
4159 | F | 0.585 | 0.475 | 0.165 | 1.0530 | 0.4580 | 0.2170 | 0.3000 | 11 |
4160 | F | 0.585 | 0.455 | 0.170 | 0.9945 | 0.4255 | 0.2630 | 0.2845 | 11 |
4161 | M | 0.385 | 0.255 | 0.100 | 0.3175 | 0.1370 | 0.0680 | 0.0920 | 8 |
4162 | I | 0.390 | 0.310 | 0.085 | 0.3440 | 0.1810 | 0.0695 | 0.0790 | 7 |
4163 | I | 0.390 | 0.290 | 0.100 | 0.2845 | 0.1255 | 0.0635 | 0.0810 | 7 |
4164 | I | 0.405 | 0.300 | 0.085 | 0.3035 | 0.1500 | 0.0505 | 0.0880 | 7 |
4165 | I | 0.475 | 0.365 | 0.115 | 0.4990 | 0.2320 | 0.0885 | 0.1560 | 10 |
4166 | M | 0.500 | 0.380 | 0.125 | 0.5770 | 0.2690 | 0.1265 | 0.1535 | 9 |
4167 | F | 0.515 | 0.400 | 0.125 | 0.6150 | 0.2865 | 0.1230 | 0.1765 | 8 |
4168 | M | 0.520 | 0.385 | 0.165 | 0.7910 | 0.3750 | 0.1800 | 0.1815 | 10 |
4169 | M | 0.550 | 0.430 | 0.130 | 0.8395 | 0.3155 | 0.1955 | 0.2405 | 10 |
4170 | M | 0.560 | 0.430 | 0.155 | 0.8675 | 0.4000 | 0.1720 | 0.2290 | 8 |
4171 | F | 0.565 | 0.450 | 0.165 | 0.8870 | 0.3700 | 0.2390 | 0.2490 | 11 |
4172 | M | 0.590 | 0.440 | 0.135 | 0.9660 | 0.4390 | 0.2145 | 0.2605 | 10 |
4173 | M | 0.600 | 0.475 | 0.205 | 1.1760 | 0.5255 | 0.2875 | 0.3080 | 9 |
4174 | F | 0.625 | 0.485 | 0.150 | 1.0945 | 0.5310 | 0.2610 | 0.2960 | 10 |
4175 | M | 0.710 | 0.555 | 0.195 | 1.9485 | 0.9455 | 0.3765 | 0.4950 | 12 |
4176 rows × 9 columns
うまく読み込めたように見えるかもしれませんが、不十分です。第1行目(いちばん上)のデータが、ヘッダ行として取り扱われています。このデータにはヘッダ行が指定されていませんので、次のようにして読み込みましょう。
# データの読み込み
df4 = pd.read_csv('abalone.data', sep=',', header=None)
df4
0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | |
---|---|---|---|---|---|---|---|---|---|
0 | M | 0.455 | 0.365 | 0.095 | 0.5140 | 0.2245 | 0.1010 | 0.1500 | 15 |
1 | M | 0.350 | 0.265 | 0.090 | 0.2255 | 0.0995 | 0.0485 | 0.0700 | 7 |
2 | F | 0.530 | 0.420 | 0.135 | 0.6770 | 0.2565 | 0.1415 | 0.2100 | 9 |
3 | M | 0.440 | 0.365 | 0.125 | 0.5160 | 0.2155 | 0.1140 | 0.1550 | 10 |
4 | I | 0.330 | 0.255 | 0.080 | 0.2050 | 0.0895 | 0.0395 | 0.0550 | 7 |
5 | I | 0.425 | 0.300 | 0.095 | 0.3515 | 0.1410 | 0.0775 | 0.1200 | 8 |
6 | F | 0.530 | 0.415 | 0.150 | 0.7775 | 0.2370 | 0.1415 | 0.3300 | 20 |
7 | F | 0.545 | 0.425 | 0.125 | 0.7680 | 0.2940 | 0.1495 | 0.2600 | 16 |
8 | M | 0.475 | 0.370 | 0.125 | 0.5095 | 0.2165 | 0.1125 | 0.1650 | 9 |
9 | F | 0.550 | 0.440 | 0.150 | 0.8945 | 0.3145 | 0.1510 | 0.3200 | 19 |
10 | F | 0.525 | 0.380 | 0.140 | 0.6065 | 0.1940 | 0.1475 | 0.2100 | 14 |
11 | M | 0.430 | 0.350 | 0.110 | 0.4060 | 0.1675 | 0.0810 | 0.1350 | 10 |
12 | M | 0.490 | 0.380 | 0.135 | 0.5415 | 0.2175 | 0.0950 | 0.1900 | 11 |
13 | F | 0.535 | 0.405 | 0.145 | 0.6845 | 0.2725 | 0.1710 | 0.2050 | 10 |
14 | F | 0.470 | 0.355 | 0.100 | 0.4755 | 0.1675 | 0.0805 | 0.1850 | 10 |
15 | M | 0.500 | 0.400 | 0.130 | 0.6645 | 0.2580 | 0.1330 | 0.2400 | 12 |
16 | I | 0.355 | 0.280 | 0.085 | 0.2905 | 0.0950 | 0.0395 | 0.1150 | 7 |
17 | F | 0.440 | 0.340 | 0.100 | 0.4510 | 0.1880 | 0.0870 | 0.1300 | 10 |
18 | M | 0.365 | 0.295 | 0.080 | 0.2555 | 0.0970 | 0.0430 | 0.1000 | 7 |
19 | M | 0.450 | 0.320 | 0.100 | 0.3810 | 0.1705 | 0.0750 | 0.1150 | 9 |
20 | M | 0.355 | 0.280 | 0.095 | 0.2455 | 0.0955 | 0.0620 | 0.0750 | 11 |
21 | I | 0.380 | 0.275 | 0.100 | 0.2255 | 0.0800 | 0.0490 | 0.0850 | 10 |
22 | F | 0.565 | 0.440 | 0.155 | 0.9395 | 0.4275 | 0.2140 | 0.2700 | 12 |
23 | F | 0.550 | 0.415 | 0.135 | 0.7635 | 0.3180 | 0.2100 | 0.2000 | 9 |
24 | F | 0.615 | 0.480 | 0.165 | 1.1615 | 0.5130 | 0.3010 | 0.3050 | 10 |
25 | F | 0.560 | 0.440 | 0.140 | 0.9285 | 0.3825 | 0.1880 | 0.3000 | 11 |
26 | F | 0.580 | 0.450 | 0.185 | 0.9955 | 0.3945 | 0.2720 | 0.2850 | 11 |
27 | M | 0.590 | 0.445 | 0.140 | 0.9310 | 0.3560 | 0.2340 | 0.2800 | 12 |
28 | M | 0.605 | 0.475 | 0.180 | 0.9365 | 0.3940 | 0.2190 | 0.2950 | 15 |
29 | M | 0.575 | 0.425 | 0.140 | 0.8635 | 0.3930 | 0.2270 | 0.2000 | 11 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
4147 | M | 0.695 | 0.550 | 0.195 | 1.6645 | 0.7270 | 0.3600 | 0.4450 | 11 |
4148 | M | 0.770 | 0.605 | 0.175 | 2.0505 | 0.8005 | 0.5260 | 0.3550 | 11 |
4149 | I | 0.280 | 0.215 | 0.070 | 0.1240 | 0.0630 | 0.0215 | 0.0300 | 6 |
4150 | I | 0.330 | 0.230 | 0.080 | 0.1400 | 0.0565 | 0.0365 | 0.0460 | 7 |
4151 | I | 0.350 | 0.250 | 0.075 | 0.1695 | 0.0835 | 0.0355 | 0.0410 | 6 |
4152 | I | 0.370 | 0.280 | 0.090 | 0.2180 | 0.0995 | 0.0545 | 0.0615 | 7 |
4153 | I | 0.430 | 0.315 | 0.115 | 0.3840 | 0.1885 | 0.0715 | 0.1100 | 8 |
4154 | I | 0.435 | 0.330 | 0.095 | 0.3930 | 0.2190 | 0.0750 | 0.0885 | 6 |
4155 | I | 0.440 | 0.350 | 0.110 | 0.3805 | 0.1575 | 0.0895 | 0.1150 | 6 |
4156 | M | 0.475 | 0.370 | 0.110 | 0.4895 | 0.2185 | 0.1070 | 0.1460 | 8 |
4157 | M | 0.475 | 0.360 | 0.140 | 0.5135 | 0.2410 | 0.1045 | 0.1550 | 8 |
4158 | I | 0.480 | 0.355 | 0.110 | 0.4495 | 0.2010 | 0.0890 | 0.1400 | 8 |
4159 | F | 0.560 | 0.440 | 0.135 | 0.8025 | 0.3500 | 0.1615 | 0.2590 | 9 |
4160 | F | 0.585 | 0.475 | 0.165 | 1.0530 | 0.4580 | 0.2170 | 0.3000 | 11 |
4161 | F | 0.585 | 0.455 | 0.170 | 0.9945 | 0.4255 | 0.2630 | 0.2845 | 11 |
4162 | M | 0.385 | 0.255 | 0.100 | 0.3175 | 0.1370 | 0.0680 | 0.0920 | 8 |
4163 | I | 0.390 | 0.310 | 0.085 | 0.3440 | 0.1810 | 0.0695 | 0.0790 | 7 |
4164 | I | 0.390 | 0.290 | 0.100 | 0.2845 | 0.1255 | 0.0635 | 0.0810 | 7 |
4165 | I | 0.405 | 0.300 | 0.085 | 0.3035 | 0.1500 | 0.0505 | 0.0880 | 7 |
4166 | I | 0.475 | 0.365 | 0.115 | 0.4990 | 0.2320 | 0.0885 | 0.1560 | 10 |
4167 | M | 0.500 | 0.380 | 0.125 | 0.5770 | 0.2690 | 0.1265 | 0.1535 | 9 |
4168 | F | 0.515 | 0.400 | 0.125 | 0.6150 | 0.2865 | 0.1230 | 0.1765 | 8 |
4169 | M | 0.520 | 0.385 | 0.165 | 0.7910 | 0.3750 | 0.1800 | 0.1815 | 10 |
4170 | M | 0.550 | 0.430 | 0.130 | 0.8395 | 0.3155 | 0.1955 | 0.2405 | 10 |
4171 | M | 0.560 | 0.430 | 0.155 | 0.8675 | 0.4000 | 0.1720 | 0.2290 | 8 |
4172 | F | 0.565 | 0.450 | 0.165 | 0.8870 | 0.3700 | 0.2390 | 0.2490 | 11 |
4173 | M | 0.590 | 0.440 | 0.135 | 0.9660 | 0.4390 | 0.2145 | 0.2605 | 10 |
4174 | M | 0.600 | 0.475 | 0.205 | 1.1760 | 0.5255 | 0.2875 | 0.3080 | 9 |
4175 | F | 0.625 | 0.485 | 0.150 | 1.0945 | 0.5310 | 0.2610 | 0.2960 | 10 |
4176 | M | 0.710 | 0.555 | 0.195 | 1.9485 | 0.9455 | 0.3765 | 0.4950 | 12 |
4177 rows × 9 columns
これで、うまくデータを読み込めました。