import os
import pandas as pd
base_url = 'https://raw.githubusercontent.com/practical-jupyter/sample-data/master/anime/'
anime_master_csv = os.path.join(base_url, 'anime_master.csv')
df = pd.read_csv(anime_master_csv)
df.mean()#平均値を算出
anime_id 14055.982035 episodes 13.939156 rating 6.507956 members 18924.950769 dtype: float64
df['members'].sum()#total
190668879
df['members'].min()
12
df['members'].max()
1013917
df.describe().round(1) #基本統計量
anime_id | episodes | rating | members | |
---|---|---|---|---|
count | 10075.0 | 10075.0 | 10075.0 | 10075.0 |
mean | 14056.0 | 13.9 | 6.5 | 18925.0 |
std | 11294.9 | 50.8 | 1.1 | 57117.5 |
min | 1.0 | 1.0 | 1.7 | 12.0 |
25% | 3431.0 | 1.0 | 5.9 | 177.0 |
50% | 10526.0 | 1.0 | 6.6 | 1227.0 |
75% | 24438.0 | 13.0 | 7.3 | 10254.0 |
max | 34519.0 | 1818.0 | 10.0 | 1013917.0 |
df.describe(percentiles=[0.1, 0.9]).round(1)
anime_id | episodes | rating | members | |
---|---|---|---|---|
count | 10075.0 | 10075.0 | 10075.0 | 10075.0 |
mean | 14056.0 | 13.9 | 6.5 | 18925.0 |
std | 11294.9 | 50.8 | 1.1 | 57117.5 |
min | 1.0 | 1.0 | 1.7 | 12.0 |
10% | 1259.4 | 1.0 | 5.1 | 74.0 |
50% | 10526.0 | 1.0 | 6.6 | 1227.0 |
90% | 31190.0 | 37.0 | 7.8 | 47587.6 |
max | 34519.0 | 1818.0 | 10.0 | 1013917.0 |
df[['genre', 'type']].describe()
genre | type | |
---|---|---|
count | 10075 | 10075 |
unique | 2735 | 6 |
top | Comedy | TV |
freq | 500 | 3330 |