# リスト3.5.1:CSVファイルの読み込み
import os
import pandas as pd
base_url = (
"https://raw.githubusercontent.com/practical-jupyter/sample-data/master/anime/"
)
anime_csv = os.path.join(base_url, "anime.csv")
df = pd.read_csv(anime_csv)
df.head()
anime_id | name | genre | type | episodes | rating | members | |
---|---|---|---|---|---|---|---|
0 | 32281 | Kimi no Na wa. | Drama, Romance, School, Supernatural | Movie | 1 | 9.37 | 200630 |
1 | 5114 | Fullmetal Alchemist: Brotherhood | Action, Adventure, Drama, Fantasy, Magic, Mili... | TV | 64 | 9.26 | 793665 |
2 | 28977 | Gintama° | Action, Comedy, Historical, Parody, Samurai, S... | TV | 51 | 9.25 | 114262 |
3 | 9253 | Steins;Gate | Sci-Fi, Thriller | TV | 24 | 9.17 | 673572 |
4 | 9969 | Gintama' | Action, Comedy, Historical, Parody, Samurai, S... | TV | 51 | 9.16 | 151266 |
# リスト3.5.2:インデックス列を番号で指定
# インデックスにする列を番号で指定
df = pd.read_csv(anime_csv, index_col=0)
df.head()
name | genre | type | episodes | rating | members | |
---|---|---|---|---|---|---|
anime_id | ||||||
32281 | Kimi no Na wa. | Drama, Romance, School, Supernatural | Movie | 1 | 9.37 | 200630 |
5114 | Fullmetal Alchemist: Brotherhood | Action, Adventure, Drama, Fantasy, Magic, Mili... | TV | 64 | 9.26 | 793665 |
28977 | Gintama° | Action, Comedy, Historical, Parody, Samurai, S... | TV | 51 | 9.25 | 114262 |
9253 | Steins;Gate | Sci-Fi, Thriller | TV | 24 | 9.17 | 673572 |
9969 | Gintama' | Action, Comedy, Historical, Parody, Samurai, S... | TV | 51 | 9.16 | 151266 |
# リスト3.5.3:インデックス列を列名で指定
# インデックスにする列を列名で指定
df = pd.read_csv(anime_csv, index_col="anime_id")
df.head()
name | genre | type | episodes | rating | members | |
---|---|---|---|---|---|---|
anime_id | ||||||
32281 | Kimi no Na wa. | Drama, Romance, School, Supernatural | Movie | 1 | 9.37 | 200630 |
5114 | Fullmetal Alchemist: Brotherhood | Action, Adventure, Drama, Fantasy, Magic, Mili... | TV | 64 | 9.26 | 793665 |
28977 | Gintama° | Action, Comedy, Historical, Parody, Samurai, S... | TV | 51 | 9.25 | 114262 |
9253 | Steins;Gate | Sci-Fi, Thriller | TV | 24 | 9.17 | 673572 |
9969 | Gintama' | Action, Comedy, Historical, Parody, Samurai, S... | TV | 51 | 9.16 | 151266 |
# リスト3.5.4:型を指定
df = pd.read_csv(anime_csv, dtype={"members": float})
df.head()
anime_id | name | genre | type | episodes | rating | members | |
---|---|---|---|---|---|---|---|
0 | 32281 | Kimi no Na wa. | Drama, Romance, School, Supernatural | Movie | 1 | 9.37 | 200630.0 |
1 | 5114 | Fullmetal Alchemist: Brotherhood | Action, Adventure, Drama, Fantasy, Magic, Mili... | TV | 64 | 9.26 | 793665.0 |
2 | 28977 | Gintama° | Action, Comedy, Historical, Parody, Samurai, S... | TV | 51 | 9.25 | 114262.0 |
3 | 9253 | Steins;Gate | Sci-Fi, Thriller | TV | 24 | 9.17 | 673572.0 |
4 | 9969 | Gintama' | Action, Comedy, Historical, Parody, Samurai, S... | TV | 51 | 9.16 | 151266.0 |
# リスト3.5.5:datetime型の変換
anime_stock_price_csv = os.path.join(base_url, "anime_stock_price.csv")
df = pd.read_csv(anime_stock_price_csv, parse_dates=["Date"])
df.dtypes
Date datetime64[ns] TOEI ANIMATION float64 IG Port float64 dtype: object
# リスト3.5.7:区切り文字を指定
anime_tsv = os.path.join(base_url, "anime.tsv")
df = pd.read_csv(anime_tsv, sep="\t")
# リスト3.5.8:Excelファイルの読み込み
anime_xlsx = os.path.join(base_url, "anime.xlsx")
df = pd.read_excel(anime_xlsx)
df.head()
anime_id | name | genre | type | episodes | rating | members | |
---|---|---|---|---|---|---|---|
0 | 32281 | Kimi no Na wa. | Drama, Romance, School, Supernatural | Movie | 1 | 9.37 | 200630 |
1 | 15335 | Gintama Movie: Kanketsu-hen - Yorozuya yo Eien... | Action, Comedy, Historical, Parody, Samurai, S... | Movie | 1 | 9.10 | 72534 |
2 | 28851 | Koe no Katachi | Drama, School, Shounen | Movie | 1 | 9.05 | 102733 |
3 | 199 | Sen to Chihiro no Kamikakushi | Adventure, Drama, Supernatural | Movie | 1 | 8.93 | 466254 |
4 | 12355 | Ookami Kodomo no Ame to Yuki | Fantasy, Slice of Life | Movie | 1 | 8.84 | 226193 |
# リスト3.5.9:シート名を指定した読み込み
df = pd.read_excel(anime_xlsx, sheetname="Movie")
# リスト3.5.10:SQLiteの読み込み
from urllib.request import urlopen
import sqlite3
anime_db = os.path.join(base_url, "anime.db")
res = urlopen(anime_db)
with open("anime.db", "wb") as f:
f.write(res.read())
with sqlite3.connect(f.name) as conn:
df = pd.read_sql("SELECT * FROM anime", conn)
# リスト3.5.11:HTMLファイルの読み込み
url = "https://docs.python.org/3/py-modindex.html"
tables = pd.read_html(url, index_col=1)
tables[0].loc[:, 1:].dropna().head(10) # 1番目のDataFrameから空の列と欠損値を除外
2 | |
---|---|
1 | |
__future__ | Future statement definitions |
__main__ | The environment where the top-level script is ... |
_dummy_thread | Drop-in replacement for the _thread module. |
_thread | Low-level threading API. |
abc | Abstract base classes according to PEP 3119. |
aifc | Read and write audio files in AIFF or AIFC for... |
argparse | Command-line option and argument parsing library. |
array | Space efficient arrays of uniformly typed nume... |
ast | Abstract Syntax Tree classes and manipulation. |
asynchat | Support for asynchronous command/response prot... |