import pandas as pd
import numpy as np
raw_data = {'first_name': ['Jason', 'Molly', 'Tina', 'Jake', 'Amy'],
'last_name': ['Miller', 'Jacobson', ".", 'Milner', 'Cooze'],
'age': [42, 52, 36, 24, 73],
'preTestScore': [4, 24, 31, ".", "."],
'postTestScore': ["25,000", "94,000", 57, 62, 70]}
df = pd.DataFrame(raw_data, columns = ['first_name', 'last_name', 'age', 'preTestScore', 'postTestScore'])
df
first_name | last_name | age | preTestScore | postTestScore | |
---|---|---|---|---|---|
0 | Jason | Miller | 42 | 4 | 25,000 |
1 | Molly | Jacobson | 52 | 24 | 94,000 |
2 | Tina | . | 36 | 31 | 57 |
3 | Jake | Milner | 24 | . | 62 |
4 | Amy | Cooze | 73 | . | 70 |
5 rows × 5 columns
df.to_csv('example.csv')
df = pd.read_csv('example.csv')
df
Unnamed: 0 | first_name | last_name | age | preTestScore | postTestScore | |
---|---|---|---|---|---|---|
0 | 0 | Jason | Miller | 42 | 4 | 25,000 |
1 | 1 | Molly | Jacobson | 52 | 24 | 94,000 |
2 | 2 | Tina | . | 36 | 31 | 57 |
3 | 3 | Jake | Milner | 24 | . | 62 |
4 | 4 | Amy | Cooze | 73 | . | 70 |
5 rows × 6 columns
df = pd.read_csv('example.csv', header=None)
df
0 | 1 | 2 | 3 | 4 | 5 | |
---|---|---|---|---|---|---|
0 | NaN | first_name | last_name | age | preTestScore | postTestScore |
1 | 0 | Jason | Miller | 42 | 4 | 25,000 |
2 | 1 | Molly | Jacobson | 52 | 24 | 94,000 |
3 | 2 | Tina | . | 36 | 31 | 57 |
4 | 3 | Jake | Milner | 24 | . | 62 |
5 | 4 | Amy | Cooze | 73 | . | 70 |
6 rows × 6 columns
df = pd.read_csv('example.csv', header=True, names=['UID', 'First Name', 'Last Name', 'Age', 'Pre-Test Score', 'Post-Test Score'])
df
UID | First Name | Last Name | Age | Pre-Test Score | Post-Test Score | |
---|---|---|---|---|---|---|
0 | 1 | Molly | Jacobson | 52 | 24 | 94,000 |
1 | 2 | Tina | . | 36 | 31 | 57 |
2 | 3 | Jake | Milner | 24 | . | 62 |
3 | 4 | Amy | Cooze | 73 | . | 70 |
4 rows × 6 columns
df = pd.read_csv('example.csv', index_col='UID', header=True, names=['UID', 'First Name', 'Last Name', 'Age', 'Pre-Test Score', 'Post-Test Score'])
df
First Name | Last Name | Age | Pre-Test Score | Post-Test Score | |
---|---|---|---|---|---|
UID | |||||
1 | Molly | Jacobson | 52 | 24 | 94,000 |
2 | Tina | . | 36 | 31 | 57 |
3 | Jake | Milner | 24 | . | 62 |
4 | Amy | Cooze | 73 | . | 70 |
4 rows × 5 columns
df = pd.read_csv('example.csv', index_col=['First Name', 'Last Name'], header=True, names=['UID', 'First Name', 'Last Name', 'Age', 'Pre-Test Score', 'Post-Test Score'])
df
UID | Age | Pre-Test Score | Post-Test Score | ||
---|---|---|---|---|---|
First Name | Last Name | ||||
Molly | Jacobson | 1 | 52 | 24 | 94,000 |
Tina | . | 2 | 36 | 31 | 57 |
Jake | Milner | 3 | 24 | . | 62 |
Amy | Cooze | 4 | 73 | . | 70 |
4 rows × 4 columns
df = pd.read_csv('example.csv', na_values=['.'])
pd.isnull(df)
Unnamed: 0 | first_name | last_name | age | preTestScore | postTestScore | |
---|---|---|---|---|---|---|
0 | False | False | False | False | False | False |
1 | False | False | False | False | False | False |
2 | False | False | True | False | False | False |
3 | False | False | False | False | True | False |
4 | False | False | False | False | True | False |
5 rows × 6 columns
sentinels = {'Last Name': ['.', 'NA'], 'Pre-Test Score': ['.']}
df = pd.read_csv('example.csv', na_values=sentinels)
df
Unnamed: 0 | first_name | last_name | age | preTestScore | postTestScore | |
---|---|---|---|---|---|---|
0 | 0 | Jason | Miller | 42 | 4 | 25,000 |
1 | 1 | Molly | Jacobson | 52 | 24 | 94,000 |
2 | 2 | Tina | . | 36 | 31 | 57 |
3 | 3 | Jake | Milner | 24 | . | 62 |
4 | 4 | Amy | Cooze | 73 | . | 70 |
5 rows × 6 columns
df = pd.read_csv('example.csv', na_values=sentinels, skiprows=3)
df
2 | Tina | . | 36 | 31 | 57 | |
---|---|---|---|---|---|---|
0 | 3 | Jake | Milner | 24 | . | 62 |
1 | 4 | Amy | Cooze | 73 | . | 70 |
2 rows × 6 columns
df = pd.read_csv('example.csv', na_values=sentinels, skip_footer=3)
df
Unnamed: 0 | first_name | last_name | age | preTestScore | postTestScore | |
---|---|---|---|---|---|---|
0 | 0 | Jason | Miller | 42 | 4 | 25,000 |
1 | 1 | Molly | Jacobson | 52 | 24 | 94,000 |
2 rows × 6 columns
df = pd.read_csv('example.csv', thousands=',')
df
Unnamed: 0 | first_name | last_name | age | preTestScore | postTestScore | |
---|---|---|---|---|---|---|
0 | 0 | Jason | Miller | 42 | 4 | 25000 |
1 | 1 | Molly | Jacobson | 52 | 24 | 94000 |
2 | 2 | Tina | . | 36 | 31 | 57 |
3 | 3 | Jake | Milner | 24 | . | 62 |
4 | 4 | Amy | Cooze | 73 | . | 70 |
5 rows × 6 columns