#import the pandas and numpy packages
import pandas as pd
import numpy as np
#read the csv file
autos = pd.read_csv("autos.csv", encoding = "Latin-1")
#first review of the data. this should show us the types of variables and the size of our dataset
autos.info()
autos.head()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 205 entries, 0 to 204 Data columns (total 26 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 symbol 205 non-null int64 1 loss 164 non-null float64 2 make 205 non-null object 3 fuel 205 non-null object 4 aspir 205 non-null object 5 doors 203 non-null object 6 style 205 non-null object 7 drive 205 non-null object 8 eng_loc 205 non-null object 9 wb 205 non-null float64 10 length 205 non-null float64 11 width 205 non-null float64 12 height 205 non-null float64 13 weight 205 non-null int64 14 eng_type 205 non-null object 15 cylinders 205 non-null object 16 eng_cc 205 non-null int64 17 fuel.sys 205 non-null object 18 bore 201 non-null float64 19 stroke 201 non-null float64 20 comp.ratio 205 non-null float64 21 hp 203 non-null float64 22 rpm 203 non-null float64 23 city_mpg 205 non-null int64 24 hw_mpg 205 non-null int64 25 price 201 non-null float64 dtypes: float64(11), int64(5), object(10) memory usage: 41.8+ KB
symbol | loss | make | fuel | aspir | doors | style | drive | eng_loc | wb | ... | eng_cc | fuel.sys | bore | stroke | comp.ratio | hp | rpm | city_mpg | hw_mpg | price | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 3 | NaN | alfa-romero | gas | std | two | convertible | rwd | front | 88.6 | ... | 130 | mpfi | 3.47 | 2.68 | 9.0 | 111.0 | 5000.0 | 21 | 27 | 13495.0 |
1 | 3 | NaN | alfa-romero | gas | std | two | convertible | rwd | front | 88.6 | ... | 130 | mpfi | 3.47 | 2.68 | 9.0 | 111.0 | 5000.0 | 21 | 27 | 16500.0 |
2 | 1 | NaN | alfa-romero | gas | std | two | hatchback | rwd | front | 94.5 | ... | 152 | mpfi | 2.68 | 3.47 | 9.0 | 154.0 | 5000.0 | 19 | 26 | 16500.0 |
3 | 2 | 164.0 | audi | gas | std | four | sedan | fwd | front | 99.8 | ... | 109 | mpfi | 3.19 | 3.40 | 10.0 | 102.0 | 5500.0 | 24 | 30 | 13950.0 |
4 | 2 | 164.0 | audi | gas | std | four | sedan | 4wd | front | 99.4 | ... | 136 | mpfi | 3.19 | 3.40 | 8.0 | 115.0 | 5500.0 | 18 | 22 | 17450.0 |
5 rows × 26 columns
autos.columns
Index(['symbol', 'loss', 'make', 'fuel', 'aspir', 'doors', 'style', 'drive', 'eng_loc', 'wb', 'length', 'width', 'height', 'weight', 'eng_type', 'cylinders', 'eng_cc', 'fuel.sys', 'bore', 'stroke', 'comp.ratio', 'hp', 'rpm', 'city_mpg', 'hw_mpg', 'price'], dtype='object')
#So, looking at this, I dont think I have the correct column names.
#Shouldn't I be seeing column names including yearOfRegistration, monthOfRegistration, notRepairedDamage?
#I'm going to stop here and ask for help. Maybe see if I can find the proper .CSV file to re-download.