import pandas as pd
data = {'name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
'age': [42, 52, 36, 24, 73],
'preTestScore': [4, 24, 31, 2, 3],
'postTestScore': [25, 94, 57, 62, 70]}
df = pd.DataFrame(data, columns = ['name', 'age', 'preTestScore', 'postTestScore'])
df
name | age | preTestScore | postTestScore | |
---|---|---|---|---|
0 | Jason | 42 | 4 | 25 |
1 | Molly | 52 | 24 | 94 |
2 | Tina | 36 | 31 | 57 |
3 | Jake | 24 | 2 | 62 |
4 | Amy | 73 | 3 | 70 |
5 rows × 4 columns
df['age'].sum()
227
df['preTestScore'].mean()
12.800000000000001
df['preTestScore'].cumsum()
0 4 1 28 2 59 3 61 4 64 Name: preTestScore, dtype: int64
df['preTestScore'].describe()
count 5.000000 mean 12.800000 std 13.663821 min 2.000000 25% 3.000000 50% 4.000000 75% 24.000000 max 31.000000 Name: preTestScore, dtype: float64
df['preTestScore'].count()
5
df['preTestScore'].min()
2
df['preTestScore'].max()
31
df['preTestScore'].median()
4.0
df['preTestScore'].var()
186.69999999999999
df['preTestScore'].std()
13.663820841916802
df['preTestScore'].skew()
0.74334524573267591
df['preTestScore'].kurt()
-2.4673543738411525
df.corr()
age | preTestScore | postTestScore | |
---|---|---|---|
age | 1.000000 | -0.105651 | 0.328852 |
preTestScore | -0.105651 | 1.000000 | 0.378039 |
postTestScore | 0.328852 | 0.378039 | 1.000000 |
3 rows × 3 columns
df.cov()
age | preTestScore | postTestScore | |
---|---|---|---|
age | 340.80 | -26.65 | 151.20 |
preTestScore | -26.65 | 186.70 | 128.65 |
postTestScore | 151.20 | 128.65 | 620.30 |
3 rows × 3 columns