import pandas as pd
Series are one-dimensional arrays (like R's vectors)
floodingReports = pd.Series([5, 6, 2, 9, 12])
floodingReports
0 5 1 6 2 2 3 9 4 12 dtype: int64
Note that the first column of numbers (0 to 4) are the index.
floodingReports = pd.Series([5, 6, 2, 9, 12], index=['Cochise County', 'Pima County', 'Santa Cruz County', 'Maricopa County', 'Yuma County'])
floodingReports
Cochise County 5 Pima County 6 Santa Cruz County 2 Maricopa County 9 Yuma County 12 dtype: int64
floodingReports['Cochise County']
5
floodingReports[floodingReports > 6]
Maricopa County 9 Yuma County 12 dtype: int64
Note: when you do this, the dict's key's will become the series's index
# Create a dictionary
fireReports_dict = {'Cochise County': 12, 'Pima County': 342, 'Santa Cruz County': 13, 'Maricopa County': 42, 'Yuma County' : 52}
# Convert the dictionary into a pd.Series, and view it
fireReports = pd.Series(fireReports_dict); fireReports
Cochise County 12 Maricopa County 42 Pima County 342 Santa Cruz County 13 Yuma County 52 dtype: int64
fireReports.index = ["Cochice", "Pima", "Santa Cruz", "Maricopa", "Yuma"]
fireReports
Cochice 12 Pima 42 Santa Cruz 342 Maricopa 13 Yuma 52 dtype: int64
DataFrames are like R's Dataframes
data = {'county': ['Cochice', 'Pima', 'Santa Cruz', 'Maricopa', 'Yuma'],
'year': [2012, 2012, 2013, 2014, 2014],
'reports': [4, 24, 31, 2, 3]}
df = pd.DataFrame(data)
df
county | reports | year | |
---|---|---|---|
0 | Cochice | 4 | 2012 |
1 | Pima | 24 | 2012 |
2 | Santa Cruz | 31 | 2013 |
3 | Maricopa | 2 | 2014 |
4 | Yuma | 3 | 2014 |
5 rows × 3 columns
dfColumnOrdered = pd.DataFrame(data, columns=['county', 'year', 'reports'])
dfColumnOrdered
county | year | reports | |
---|---|---|---|
0 | Cochice | 2012 | 4 |
1 | Pima | 2012 | 24 |
2 | Santa Cruz | 2013 | 31 |
3 | Maricopa | 2014 | 2 |
4 | Yuma | 2014 | 3 |
5 rows × 3 columns
dfColumnOrdered['newsCoverage'] = pd.Series([42.3, 92.1, 12.2, 39.3, 30.2])
dfColumnOrdered
county | year | reports | newsCoverage | |
---|---|---|---|---|
0 | Cochice | 2012 | 4 | 42.3 |
1 | Pima | 2012 | 24 | 92.1 |
2 | Santa Cruz | 2013 | 31 | 12.2 |
3 | Maricopa | 2014 | 2 | 39.3 |
4 | Yuma | 2014 | 3 | 30.2 |
5 rows × 4 columns
del dfColumnOrdered['newsCoverage']
dfColumnOrdered
county | year | reports | |
---|---|---|---|
0 | Cochice | 2012 | 4 |
1 | Pima | 2012 | 24 |
2 | Santa Cruz | 2013 | 31 |
3 | Maricopa | 2014 | 2 |
4 | Yuma | 2014 | 3 |
5 rows × 3 columns
dfColumnOrdered.T
0 | 1 | 2 | 3 | 4 | |
---|---|---|---|---|---|
county | Cochice | Pima | Santa Cruz | Maricopa | Yuma |
year | 2012 | 2012 | 2013 | 2014 | 2014 |
reports | 4 | 24 | 31 | 2 | 3 |
3 rows × 5 columns