This is a dataset of used cards scraped from ebay. For this project we will be analysing the used car listings and cleaning up the data where necessary. Tools in question are pandas, numpy and Jupyter notebook
import pandas as pd
import numpy as np
autos = pd.read_csv('autos.csv', encoding='Latin-1')
autos
dateCrawled | name | seller | offerType | price | abtest | vehicleType | yearOfRegistration | gearbox | powerPS | model | odometer | monthOfRegistration | fuelType | brand | notRepairedDamage | dateCreated | nrOfPictures | postalCode | lastSeen | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2016-03-26 17:47:46 | Peugeot_807_160_NAVTECH_ON_BOARD | privat | Angebot | $5,000 | control | bus | 2004 | manuell | 158 | andere | 150,000km | 3 | lpg | peugeot | nein | 2016-03-26 00:00:00 | 0 | 79588 | 2016-04-06 06:45:54 |
1 | 2016-04-04 13:38:56 | BMW_740i_4_4_Liter_HAMANN_UMBAU_Mega_Optik | privat | Angebot | $8,500 | control | limousine | 1997 | automatik | 286 | 7er | 150,000km | 6 | benzin | bmw | nein | 2016-04-04 00:00:00 | 0 | 71034 | 2016-04-06 14:45:08 |
2 | 2016-03-26 18:57:24 | Volkswagen_Golf_1.6_United | privat | Angebot | $8,990 | test | limousine | 2009 | manuell | 102 | golf | 70,000km | 7 | benzin | volkswagen | nein | 2016-03-26 00:00:00 | 0 | 35394 | 2016-04-06 20:15:37 |
3 | 2016-03-12 16:58:10 | Smart_smart_fortwo_coupe_softouch/F1/Klima/Pan... | privat | Angebot | $4,350 | control | kleinwagen | 2007 | automatik | 71 | fortwo | 70,000km | 6 | benzin | smart | nein | 2016-03-12 00:00:00 | 0 | 33729 | 2016-03-15 03:16:28 |
4 | 2016-04-01 14:38:50 | Ford_Focus_1_6_Benzin_TÜV_neu_ist_sehr_gepfleg... | privat | Angebot | $1,350 | test | kombi | 2003 | manuell | 0 | focus | 150,000km | 7 | benzin | ford | nein | 2016-04-01 00:00:00 | 0 | 39218 | 2016-04-01 14:38:50 |
5 | 2016-03-21 13:47:45 | Chrysler_Grand_Voyager_2.8_CRD_Aut.Limited_Sto... | privat | Angebot | $7,900 | test | bus | 2006 | automatik | 150 | voyager | 150,000km | 4 | diesel | chrysler | NaN | 2016-03-21 00:00:00 | 0 | 22962 | 2016-04-06 09:45:21 |
6 | 2016-03-20 17:55:21 | VW_Golf_III_GT_Special_Electronic_Green_Metall... | privat | Angebot | $300 | test | limousine | 1995 | manuell | 90 | golf | 150,000km | 8 | benzin | volkswagen | NaN | 2016-03-20 00:00:00 | 0 | 31535 | 2016-03-23 02:48:59 |
7 | 2016-03-16 18:55:19 | Golf_IV_1.9_TDI_90PS | privat | Angebot | $1,990 | control | limousine | 1998 | manuell | 90 | golf | 150,000km | 12 | diesel | volkswagen | nein | 2016-03-16 00:00:00 | 0 | 53474 | 2016-04-07 03:17:32 |
8 | 2016-03-22 16:51:34 | Seat_Arosa | privat | Angebot | $250 | test | NaN | 2000 | manuell | 0 | arosa | 150,000km | 10 | NaN | seat | nein | 2016-03-22 00:00:00 | 0 | 7426 | 2016-03-26 18:18:10 |
9 | 2016-03-16 13:47:02 | Renault_Megane_Scenic_1.6e_RT_Klimaanlage | privat | Angebot | $590 | control | bus | 1997 | manuell | 90 | megane | 150,000km | 7 | benzin | renault | nein | 2016-03-16 00:00:00 | 0 | 15749 | 2016-04-06 10:46:35 |
10 | 2016-03-15 01:41:36 | VW_Golf_Tuning_in_siber/grau | privat | Angebot | $999 | test | NaN | 2017 | manuell | 90 | NaN | 150,000km | 4 | benzin | volkswagen | nein | 2016-03-14 00:00:00 | 0 | 86157 | 2016-04-07 03:16:21 |
11 | 2016-03-16 18:45:34 | Mercedes_A140_Motorschaden | privat | Angebot | $350 | control | NaN | 2000 | NaN | 0 | NaN | 150,000km | 0 | benzin | mercedes_benz | NaN | 2016-03-16 00:00:00 | 0 | 17498 | 2016-03-16 18:45:34 |
12 | 2016-03-31 19:48:22 | Smart_smart_fortwo_coupe_softouch_pure_MHD_Pan... | privat | Angebot | $5,299 | control | kleinwagen | 2010 | automatik | 71 | fortwo | 50,000km | 9 | benzin | smart | nein | 2016-03-31 00:00:00 | 0 | 34590 | 2016-04-06 14:17:52 |
13 | 2016-03-23 10:48:32 | Audi_A3_1.6_tuning | privat | Angebot | $1,350 | control | limousine | 1999 | manuell | 101 | a3 | 150,000km | 11 | benzin | audi | nein | 2016-03-23 00:00:00 | 0 | 12043 | 2016-04-01 14:17:13 |
14 | 2016-03-23 11:50:46 | Renault_Clio_3__Dynamique_1.2__16_V;_viele_Ver... | privat | Angebot | $3,999 | test | kleinwagen | 2007 | manuell | 75 | clio | 150,000km | 9 | benzin | renault | NaN | 2016-03-23 00:00:00 | 0 | 81737 | 2016-04-01 15:46:47 |
15 | 2016-04-01 12:06:20 | Corvette_C3_Coupe_T_Top_Crossfire_Injection | privat | Angebot | $18,900 | test | coupe | 1982 | automatik | 203 | NaN | 80,000km | 6 | benzin | sonstige_autos | nein | 2016-04-01 00:00:00 | 0 | 61276 | 2016-04-02 21:10:48 |
16 | 2016-03-16 14:59:02 | Opel_Vectra_B_Kombi | privat | Angebot | $350 | test | kombi | 1999 | manuell | 101 | vectra | 150,000km | 5 | benzin | opel | nein | 2016-03-16 00:00:00 | 0 | 57299 | 2016-03-18 05:29:37 |
17 | 2016-03-29 11:46:22 | Volkswagen_Scirocco_2_G60 | privat | Angebot | $5,500 | test | coupe | 1990 | manuell | 205 | scirocco | 150,000km | 6 | benzin | volkswagen | nein | 2016-03-29 00:00:00 | 0 | 74821 | 2016-04-05 20:46:26 |
18 | 2016-03-26 19:57:44 | Verkaufen_mein_bmw_e36_320_i_touring | privat | Angebot | $300 | control | bus | 1995 | manuell | 150 | 3er | 150,000km | 0 | benzin | bmw | NaN | 2016-03-26 00:00:00 | 0 | 54329 | 2016-04-02 12:16:41 |
19 | 2016-03-17 13:36:21 | mazda_tribute_2.0_mit_gas_und_tuev_neu_2018 | privat | Angebot | $4,150 | control | suv | 2004 | manuell | 124 | andere | 150,000km | 2 | lpg | mazda | nein | 2016-03-17 00:00:00 | 0 | 40878 | 2016-03-17 14:45:58 |
20 | 2016-03-05 19:57:31 | Audi_A4_Avant_1.9_TDI_*6_Gang*AHK*Klimatronik*... | privat | Angebot | $3,500 | test | kombi | 2003 | manuell | 131 | a4 | 150,000km | 5 | diesel | audi | NaN | 2016-03-05 00:00:00 | 0 | 53913 | 2016-03-07 05:46:46 |
21 | 2016-03-06 19:07:10 | Porsche_911_Carrera_4S_Cabrio | privat | Angebot | $41,500 | test | cabrio | 2004 | manuell | 320 | 911 | 150,000km | 4 | benzin | porsche | nein | 2016-03-06 00:00:00 | 0 | 65428 | 2016-04-05 23:46:19 |
22 | 2016-03-28 20:50:54 | MINI_Cooper_S_Cabrio | privat | Angebot | $25,450 | control | cabrio | 2015 | manuell | 184 | cooper | 10,000km | 1 | benzin | mini | nein | 2016-03-28 00:00:00 | 0 | 44789 | 2016-04-01 06:45:30 |
23 | 2016-03-10 19:55:34 | Peugeot_Boxer_2_2_HDi_120_Ps_9_Sitzer_inkl_Klima | privat | Angebot | $7,999 | control | bus | 2010 | manuell | 120 | NaN | 150,000km | 2 | diesel | peugeot | nein | 2016-03-10 00:00:00 | 0 | 30900 | 2016-03-17 08:45:17 |
24 | 2016-04-03 11:57:02 | BMW_535i_xDrive_Sport_Aut. | privat | Angebot | $48,500 | control | limousine | 2014 | automatik | 306 | 5er | 30,000km | 12 | benzin | bmw | nein | 2016-04-03 00:00:00 | 0 | 22547 | 2016-04-07 13:16:50 |
25 | 2016-03-21 21:56:18 | Ford_escort_kombi_an_bastler_mit_ghia_ausstattung | privat | Angebot | $90 | control | kombi | 1996 | manuell | 116 | NaN | 150,000km | 4 | benzin | ford | ja | 2016-03-21 00:00:00 | 0 | 27574 | 2016-04-01 05:16:49 |
26 | 2016-04-03 22:46:28 | Volkswagen_Polo_Fox | privat | Angebot | $777 | control | kleinwagen | 1992 | manuell | 54 | polo | 125,000km | 2 | benzin | volkswagen | nein | 2016-04-03 00:00:00 | 0 | 38110 | 2016-04-05 23:46:48 |
27 | 2016-03-27 18:45:01 | Hat_einer_Ahnung_mit_Ford_Galaxy_HILFE | privat | Angebot | $0 | control | NaN | 2005 | NaN | 0 | NaN | 150,000km | 0 | NaN | ford | NaN | 2016-03-27 00:00:00 | 0 | 66701 | 2016-03-27 18:45:01 |
28 | 2016-03-19 21:56:19 | MINI_Cooper_D | privat | Angebot | $5,250 | control | kleinwagen | 2007 | manuell | 110 | cooper | 150,000km | 7 | diesel | mini | ja | 2016-03-19 00:00:00 | 0 | 15745 | 2016-04-07 14:58:48 |
29 | 2016-04-02 12:45:44 | Mercedes_Benz_E_320_T_CDI_Avantgarde_DPF7_Sitz... | privat | Angebot | $4,999 | test | kombi | 2004 | automatik | 204 | e_klasse | 150,000km | 10 | diesel | mercedes_benz | nein | 2016-04-02 00:00:00 | 0 | 47638 | 2016-04-02 12:45:44 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
49970 | 2016-03-21 22:47:37 | c4_Grand_Picasso_mit_Automatik_Leder_Navi_Temp... | privat | Angebot | $15,800 | control | bus | 2010 | automatik | 136 | c4 | 60,000km | 4 | diesel | citroen | nein | 2016-03-21 00:00:00 | 0 | 14947 | 2016-04-07 04:17:34 |
49971 | 2016-03-29 14:54:12 | W.Lupo_1.0 | privat | Angebot | $950 | test | kleinwagen | 2001 | manuell | 50 | lupo | 150,000km | 4 | benzin | volkswagen | nein | 2016-03-29 00:00:00 | 0 | 65197 | 2016-03-29 20:41:51 |
49972 | 2016-03-26 22:25:23 | Mercedes_Benz_Vito_115_CDI_Extralang_Aut. | privat | Angebot | $3,300 | control | bus | 2004 | automatik | 150 | vito | 150,000km | 10 | diesel | mercedes_benz | ja | 2016-03-26 00:00:00 | 0 | 65326 | 2016-03-28 11:28:18 |
49973 | 2016-03-27 05:32:39 | Mercedes_Benz_SLK_200_Kompressor | privat | Angebot | $6,000 | control | cabrio | 2004 | manuell | 163 | slk | 150,000km | 11 | benzin | mercedes_benz | nein | 2016-03-27 00:00:00 | 0 | 53567 | 2016-03-27 08:25:24 |
49974 | 2016-03-20 10:52:31 | Golf_1_Cabrio_Tuev_Neu_viele_Extras_alles_eing... | privat | Angebot | $0 | control | cabrio | 1983 | manuell | 70 | golf | 150,000km | 2 | benzin | volkswagen | nein | 2016-03-20 00:00:00 | 0 | 8209 | 2016-03-27 19:48:16 |
49975 | 2016-03-27 20:51:39 | Honda_Jazz_1.3_DSi_i_VTEC_IMA_CVT_Comfort | privat | Angebot | $9,700 | control | kleinwagen | 2012 | automatik | 88 | jazz | 100,000km | 11 | hybrid | honda | nein | 2016-03-27 00:00:00 | 0 | 84385 | 2016-04-05 19:45:34 |
49976 | 2016-03-19 18:56:05 | Audi_80_Avant_2.6_E__Vollausstattung!!_Einziga... | privat | Angebot | $5,900 | test | kombi | 1992 | automatik | 150 | 80 | 150,000km | 12 | benzin | audi | nein | 2016-03-19 00:00:00 | 0 | 36100 | 2016-04-07 06:16:44 |
49977 | 2016-03-31 18:37:18 | Mercedes_Benz_C200_Cdi_W203 | privat | Angebot | $5,500 | control | limousine | 2003 | manuell | 116 | c_klasse | 150,000km | 2 | diesel | mercedes_benz | nein | 2016-03-31 00:00:00 | 0 | 33739 | 2016-04-06 12:16:11 |
49978 | 2016-04-04 10:37:14 | Mercedes_Benz_E_200_Classic | privat | Angebot | $900 | control | limousine | 1996 | automatik | 136 | e_klasse | 150,000km | 9 | benzin | mercedes_benz | ja | 2016-04-04 00:00:00 | 0 | 24405 | 2016-04-06 12:44:20 |
49979 | 2016-03-20 18:38:40 | Volkswagen_Polo_1.6_TDI_Style | privat | Angebot | $11,000 | test | kleinwagen | 2011 | manuell | 90 | polo | 70,000km | 11 | diesel | volkswagen | nein | 2016-03-20 00:00:00 | 0 | 48455 | 2016-04-07 01:45:12 |
49980 | 2016-03-12 10:55:54 | Ford_Escort_Turnier_16V | privat | Angebot | $400 | control | kombi | 1995 | manuell | 105 | escort | 125,000km | 3 | benzin | ford | NaN | 2016-03-12 00:00:00 | 0 | 56218 | 2016-04-06 17:16:49 |
49981 | 2016-03-15 09:38:21 | Opel_Astra_Kombi_mit_Anhaengerkupplung | privat | Angebot | $2,000 | control | kombi | 1998 | manuell | 115 | astra | 150,000km | 12 | benzin | opel | nein | 2016-03-15 00:00:00 | 0 | 86859 | 2016-04-05 17:21:46 |
49982 | 2016-03-29 18:51:08 | Skoda_Fabia_4_Tuerer_Bj:2004__85.000Tkm | privat | Angebot | $1,950 | control | kleinwagen | 2004 | manuell | 0 | fabia | 90,000km | 7 | benzin | skoda | NaN | 2016-03-29 00:00:00 | 0 | 45884 | 2016-03-29 18:51:08 |
49983 | 2016-03-06 12:43:04 | Ford_focus_99 | privat | Angebot | $600 | test | kleinwagen | 1999 | manuell | 101 | focus | 150,000km | 4 | benzin | ford | NaN | 2016-03-06 00:00:00 | 0 | 52477 | 2016-03-09 06:16:08 |
49984 | 2016-03-31 22:48:48 | Student_sucht_ein__Anfaengerauto___ab_2000_BJ_... | privat | Angebot | $0 | test | NaN | 2000 | NaN | 0 | NaN | 150,000km | 0 | NaN | sonstige_autos | NaN | 2016-03-31 00:00:00 | 0 | 12103 | 2016-04-02 19:44:53 |
49985 | 2016-04-02 16:38:23 | Verkaufe_meinen_vw_vento! | privat | Angebot | $1,000 | control | NaN | 1995 | automatik | 0 | NaN | 150,000km | 0 | benzin | volkswagen | NaN | 2016-04-02 00:00:00 | 0 | 30900 | 2016-04-06 15:17:52 |
49986 | 2016-04-04 20:46:02 | Chrysler_300C_3.0_CRD_DPF_Automatik_Voll_Ausst... | privat | Angebot | $15,900 | control | limousine | 2010 | automatik | 218 | 300c | 125,000km | 11 | diesel | chrysler | nein | 2016-04-04 00:00:00 | 0 | 73527 | 2016-04-06 23:16:00 |
49987 | 2016-03-22 20:47:27 | Audi_A3_Limousine_2.0_TDI_DPF_Ambition__NAVI__... | privat | Angebot | $21,990 | control | limousine | 2013 | manuell | 150 | a3 | 50,000km | 11 | diesel | audi | nein | 2016-03-22 00:00:00 | 0 | 94362 | 2016-03-26 22:46:06 |
49988 | 2016-03-28 19:49:51 | BMW_330_Ci | privat | Angebot | $9,550 | control | coupe | 2001 | manuell | 231 | 3er | 150,000km | 10 | benzin | bmw | nein | 2016-03-28 00:00:00 | 0 | 83646 | 2016-04-07 02:17:40 |
49989 | 2016-03-11 19:50:37 | VW_Polo_zum_Ausschlachten_oder_Wiederaufbau | privat | Angebot | $150 | test | kleinwagen | 1997 | manuell | 0 | polo | 150,000km | 5 | benzin | volkswagen | ja | 2016-03-11 00:00:00 | 0 | 21244 | 2016-03-12 10:17:55 |
49990 | 2016-03-21 19:54:19 | Mercedes_Benz_A_200__BlueEFFICIENCY__Urban | privat | Angebot | $17,500 | test | limousine | 2012 | manuell | 156 | a_klasse | 30,000km | 12 | benzin | mercedes_benz | nein | 2016-03-21 00:00:00 | 0 | 58239 | 2016-04-06 22:46:57 |
49991 | 2016-03-06 15:25:19 | Kleinwagen | privat | Angebot | $500 | control | NaN | 2016 | manuell | 0 | twingo | 150,000km | 0 | benzin | renault | NaN | 2016-03-06 00:00:00 | 0 | 61350 | 2016-03-06 18:24:19 |
49992 | 2016-03-10 19:37:38 | Fiat_Grande_Punto_1.4_T_Jet_16V_Sport | privat | Angebot | $4,800 | control | kleinwagen | 2009 | manuell | 120 | andere | 125,000km | 9 | lpg | fiat | nein | 2016-03-10 00:00:00 | 0 | 68642 | 2016-03-13 01:44:51 |
49993 | 2016-03-15 18:47:35 | Audi_A3__1_8l__Silber;_schoenes_Fahrzeug | privat | Angebot | $1,650 | control | kleinwagen | 1997 | manuell | 0 | NaN | 150,000km | 7 | benzin | audi | NaN | 2016-03-15 00:00:00 | 0 | 65203 | 2016-04-06 19:46:53 |
49994 | 2016-03-22 17:36:42 | Audi_A6__S6__Avant_4.2_quattro_eventuell_Tausc... | privat | Angebot | $5,000 | control | kombi | 2001 | automatik | 299 | a6 | 150,000km | 1 | benzin | audi | nein | 2016-03-22 00:00:00 | 0 | 46537 | 2016-04-06 08:16:39 |
49995 | 2016-03-27 14:38:19 | Audi_Q5_3.0_TDI_qu._S_tr.__Navi__Panorama__Xenon | privat | Angebot | $24,900 | control | limousine | 2011 | automatik | 239 | q5 | 100,000km | 1 | diesel | audi | nein | 2016-03-27 00:00:00 | 0 | 82131 | 2016-04-01 13:47:40 |
49996 | 2016-03-28 10:50:25 | Opel_Astra_F_Cabrio_Bertone_Edition___TÜV_neu+... | privat | Angebot | $1,980 | control | cabrio | 1996 | manuell | 75 | astra | 150,000km | 5 | benzin | opel | nein | 2016-03-28 00:00:00 | 0 | 44807 | 2016-04-02 14:18:02 |
49997 | 2016-04-02 14:44:48 | Fiat_500_C_1.2_Dualogic_Lounge | privat | Angebot | $13,200 | test | cabrio | 2014 | automatik | 69 | 500 | 5,000km | 11 | benzin | fiat | nein | 2016-04-02 00:00:00 | 0 | 73430 | 2016-04-04 11:47:27 |
49998 | 2016-03-08 19:25:42 | Audi_A3_2.0_TDI_Sportback_Ambition | privat | Angebot | $22,900 | control | kombi | 2013 | manuell | 150 | a3 | 40,000km | 11 | diesel | audi | nein | 2016-03-08 00:00:00 | 0 | 35683 | 2016-04-05 16:45:07 |
49999 | 2016-03-14 00:42:12 | Opel_Vectra_1.6_16V | privat | Angebot | $1,250 | control | limousine | 1996 | manuell | 101 | vectra | 150,000km | 1 | benzin | opel | nein | 2016-03-13 00:00:00 | 0 | 45897 | 2016-04-06 21:18:48 |
50000 rows × 20 columns
autos.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 50000 entries, 0 to 49999 Data columns (total 20 columns): dateCrawled 50000 non-null object name 50000 non-null object seller 50000 non-null object offerType 50000 non-null object price 50000 non-null object abtest 50000 non-null object vehicleType 44905 non-null object yearOfRegistration 50000 non-null int64 gearbox 47320 non-null object powerPS 50000 non-null int64 model 47242 non-null object odometer 50000 non-null object monthOfRegistration 50000 non-null int64 fuelType 45518 non-null object brand 50000 non-null object notRepairedDamage 40171 non-null object dateCreated 50000 non-null object nrOfPictures 50000 non-null int64 postalCode 50000 non-null int64 lastSeen 50000 non-null object dtypes: int64(5), object(15) memory usage: 7.6+ MB
autos.head()
dateCrawled | name | seller | offerType | price | abtest | vehicleType | yearOfRegistration | gearbox | powerPS | model | odometer | monthOfRegistration | fuelType | brand | notRepairedDamage | dateCreated | nrOfPictures | postalCode | lastSeen | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2016-03-26 17:47:46 | Peugeot_807_160_NAVTECH_ON_BOARD | privat | Angebot | $5,000 | control | bus | 2004 | manuell | 158 | andere | 150,000km | 3 | lpg | peugeot | nein | 2016-03-26 00:00:00 | 0 | 79588 | 2016-04-06 06:45:54 |
1 | 2016-04-04 13:38:56 | BMW_740i_4_4_Liter_HAMANN_UMBAU_Mega_Optik | privat | Angebot | $8,500 | control | limousine | 1997 | automatik | 286 | 7er | 150,000km | 6 | benzin | bmw | nein | 2016-04-04 00:00:00 | 0 | 71034 | 2016-04-06 14:45:08 |
2 | 2016-03-26 18:57:24 | Volkswagen_Golf_1.6_United | privat | Angebot | $8,990 | test | limousine | 2009 | manuell | 102 | golf | 70,000km | 7 | benzin | volkswagen | nein | 2016-03-26 00:00:00 | 0 | 35394 | 2016-04-06 20:15:37 |
3 | 2016-03-12 16:58:10 | Smart_smart_fortwo_coupe_softouch/F1/Klima/Pan... | privat | Angebot | $4,350 | control | kleinwagen | 2007 | automatik | 71 | fortwo | 70,000km | 6 | benzin | smart | nein | 2016-03-12 00:00:00 | 0 | 33729 | 2016-03-15 03:16:28 |
4 | 2016-04-01 14:38:50 | Ford_Focus_1_6_Benzin_TÜV_neu_ist_sehr_gepfleg... | privat | Angebot | $1,350 | test | kombi | 2003 | manuell | 0 | focus | 150,000km | 7 | benzin | ford | nein | 2016-04-01 00:00:00 | 0 | 39218 | 2016-04-01 14:38:50 |
cols = autos.columns
print(cols)
Index(['dateCrawled', 'name', 'seller', 'offerType', 'price', 'abtest', 'vehicleType', 'yearOfRegistration', 'gearbox', 'powerPS', 'model', 'odometer', 'monthOfRegistration', 'fuelType', 'brand', 'notRepairedDamage', 'dateCreated', 'nrOfPictures', 'postalCode', 'lastSeen'], dtype='object')
cols_dict = {'dateCrawled': 'date_crawled',
'name': 'name',
'seller': 'seller',
'offerType': 'offer_type',
'price': 'price',
'abtest': 'abtest',
'vehicleType': 'vehicle_type',
'yearOfRegistration': 'registration_year',
'gearbox': 'gear_box',
'powerPS': 'power_ps',
'model': 'model',
'odometer': 'odometer',
'monthOfRegistration': 'registration_month',
'fuelType': 'fuel_type',
'brand': 'brand',
'notRepairedDamage': 'unrepaired_damage',
'dateCreated': 'ad_created',
'nrOfPictures': 'nr_of_pictures',
'postalCode': 'postal_code',
'lastSeen': 'last_seen'}
autos.rename(columns=cols_dict, inplace=True)
autos.head()
date_crawled | name | seller | offer_type | price | abtest | vehicle_type | registration_year | gear_box | power_ps | model | odometer | registration_month | fuel_type | brand | unrepaired_damage | ad_created | nr_of_pictures | postal_code | last_seen | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2016-03-26 17:47:46 | Peugeot_807_160_NAVTECH_ON_BOARD | privat | Angebot | $5,000 | control | bus | 2004 | manuell | 158 | andere | 150,000km | 3 | lpg | peugeot | nein | 2016-03-26 00:00:00 | 0 | 79588 | 2016-04-06 06:45:54 |
1 | 2016-04-04 13:38:56 | BMW_740i_4_4_Liter_HAMANN_UMBAU_Mega_Optik | privat | Angebot | $8,500 | control | limousine | 1997 | automatik | 286 | 7er | 150,000km | 6 | benzin | bmw | nein | 2016-04-04 00:00:00 | 0 | 71034 | 2016-04-06 14:45:08 |
2 | 2016-03-26 18:57:24 | Volkswagen_Golf_1.6_United | privat | Angebot | $8,990 | test | limousine | 2009 | manuell | 102 | golf | 70,000km | 7 | benzin | volkswagen | nein | 2016-03-26 00:00:00 | 0 | 35394 | 2016-04-06 20:15:37 |
3 | 2016-03-12 16:58:10 | Smart_smart_fortwo_coupe_softouch/F1/Klima/Pan... | privat | Angebot | $4,350 | control | kleinwagen | 2007 | automatik | 71 | fortwo | 70,000km | 6 | benzin | smart | nein | 2016-03-12 00:00:00 | 0 | 33729 | 2016-03-15 03:16:28 |
4 | 2016-04-01 14:38:50 | Ford_Focus_1_6_Benzin_TÜV_neu_ist_sehr_gepfleg... | privat | Angebot | $1,350 | test | kombi | 2003 | manuell | 0 | focus | 150,000km | 7 | benzin | ford | nein | 2016-04-01 00:00:00 | 0 | 39218 | 2016-04-01 14:38:50 |
autos.describe(include='all')
date_crawled | name | seller | offer_type | price | abtest | vehicle_type | registration_year | gear_box | power_ps | model | odometer | registration_month | fuel_type | brand | unrepaired_damage | ad_created | nr_of_pictures | postal_code | last_seen | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 50000 | 50000 | 50000 | 50000 | 50000 | 50000 | 44905 | 50000.000000 | 47320 | 50000.000000 | 47242 | 50000 | 50000.000000 | 45518 | 50000 | 40171 | 50000 | 50000.0 | 50000.000000 | 50000 |
unique | 48213 | 38754 | 2 | 2 | 2357 | 2 | 8 | NaN | 2 | NaN | 245 | 13 | NaN | 7 | 40 | 2 | 76 | NaN | NaN | 39481 |
top | 2016-03-21 20:37:19 | Ford_Fiesta | privat | Angebot | $0 | test | limousine | NaN | manuell | NaN | golf | 150,000km | NaN | benzin | volkswagen | nein | 2016-04-03 00:00:00 | NaN | NaN | 2016-04-07 06:17:27 |
freq | 3 | 78 | 49999 | 49999 | 1421 | 25756 | 12859 | NaN | 36993 | NaN | 4024 | 32424 | NaN | 30107 | 10687 | 35232 | 1946 | NaN | NaN | 8 |
mean | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2005.073280 | NaN | 116.355920 | NaN | NaN | 5.723360 | NaN | NaN | NaN | NaN | 0.0 | 50813.627300 | NaN |
std | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 105.712813 | NaN | 209.216627 | NaN | NaN | 3.711984 | NaN | NaN | NaN | NaN | 0.0 | 25779.747957 | NaN |
min | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1000.000000 | NaN | 0.000000 | NaN | NaN | 0.000000 | NaN | NaN | NaN | NaN | 0.0 | 1067.000000 | NaN |
25% | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1999.000000 | NaN | 70.000000 | NaN | NaN | 3.000000 | NaN | NaN | NaN | NaN | 0.0 | 30451.000000 | NaN |
50% | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2003.000000 | NaN | 105.000000 | NaN | NaN | 6.000000 | NaN | NaN | NaN | NaN | 0.0 | 49577.000000 | NaN |
75% | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2008.000000 | NaN | 150.000000 | NaN | NaN | 9.000000 | NaN | NaN | NaN | NaN | 0.0 | 71540.000000 | NaN |
max | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 9999.000000 | NaN | 17700.000000 | NaN | NaN | 12.000000 | NaN | NaN | NaN | NaN | 0.0 | 99998.000000 | NaN |
autos['odometer'] = autos['odometer'].str.replace('km', '').str.replace(',', '').astype(int)
autos.rename(columns={'odometer': 'odometer_km'}, inplace=True)
autos.head()
date_crawled | name | seller | offer_type | price | abtest | vehicle_type | registration_year | gear_box | power_ps | model | odometer_km | registration_month | fuel_type | brand | unrepaired_damage | ad_created | nr_of_pictures | postal_code | last_seen | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2016-03-26 17:47:46 | Peugeot_807_160_NAVTECH_ON_BOARD | privat | Angebot | $5,000 | control | bus | 2004 | manuell | 158 | andere | 150000 | 3 | lpg | peugeot | nein | 2016-03-26 00:00:00 | 0 | 79588 | 2016-04-06 06:45:54 |
1 | 2016-04-04 13:38:56 | BMW_740i_4_4_Liter_HAMANN_UMBAU_Mega_Optik | privat | Angebot | $8,500 | control | limousine | 1997 | automatik | 286 | 7er | 150000 | 6 | benzin | bmw | nein | 2016-04-04 00:00:00 | 0 | 71034 | 2016-04-06 14:45:08 |
2 | 2016-03-26 18:57:24 | Volkswagen_Golf_1.6_United | privat | Angebot | $8,990 | test | limousine | 2009 | manuell | 102 | golf | 70000 | 7 | benzin | volkswagen | nein | 2016-03-26 00:00:00 | 0 | 35394 | 2016-04-06 20:15:37 |
3 | 2016-03-12 16:58:10 | Smart_smart_fortwo_coupe_softouch/F1/Klima/Pan... | privat | Angebot | $4,350 | control | kleinwagen | 2007 | automatik | 71 | fortwo | 70000 | 6 | benzin | smart | nein | 2016-03-12 00:00:00 | 0 | 33729 | 2016-03-15 03:16:28 |
4 | 2016-04-01 14:38:50 | Ford_Focus_1_6_Benzin_TÜV_neu_ist_sehr_gepfleg... | privat | Angebot | $1,350 | test | kombi | 2003 | manuell | 0 | focus | 150000 | 7 | benzin | ford | nein | 2016-04-01 00:00:00 | 0 | 39218 | 2016-04-01 14:38:50 |
autos['price'] = autos['price'].str.replace('$', '').str.replace(',', '').astype(int)
autos['registration_month'].value_counts().sort_index(ascending=False)
12 3447 11 3360 10 3651 9 3389 8 3191 7 3949 6 4368 5 4107 4 4102 3 5071 2 3008 1 3282 0 5075 Name: registration_month, dtype: int64
autos = autos[autos['registration_month'].between(1,12)]
autos = autos[autos['registration_year'].between(1910, 2019)]
autos.shape
(44916, 20)
autos.head()
date_crawled | name | seller | offer_type | price | abtest | vehicle_type | registration_year | gear_box | power_ps | model | odometer_km | registration_month | fuel_type | brand | unrepaired_damage | ad_created | nr_of_pictures | postal_code | last_seen | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2016-03-26 17:47:46 | Peugeot_807_160_NAVTECH_ON_BOARD | privat | Angebot | 5000 | control | bus | 2004 | manuell | 158 | andere | 150000 | 3 | lpg | peugeot | nein | 2016-03-26 00:00:00 | 0 | 79588 | 2016-04-06 06:45:54 |
1 | 2016-04-04 13:38:56 | BMW_740i_4_4_Liter_HAMANN_UMBAU_Mega_Optik | privat | Angebot | 8500 | control | limousine | 1997 | automatik | 286 | 7er | 150000 | 6 | benzin | bmw | nein | 2016-04-04 00:00:00 | 0 | 71034 | 2016-04-06 14:45:08 |
2 | 2016-03-26 18:57:24 | Volkswagen_Golf_1.6_United | privat | Angebot | 8990 | test | limousine | 2009 | manuell | 102 | golf | 70000 | 7 | benzin | volkswagen | nein | 2016-03-26 00:00:00 | 0 | 35394 | 2016-04-06 20:15:37 |
3 | 2016-03-12 16:58:10 | Smart_smart_fortwo_coupe_softouch/F1/Klima/Pan... | privat | Angebot | 4350 | control | kleinwagen | 2007 | automatik | 71 | fortwo | 70000 | 6 | benzin | smart | nein | 2016-03-12 00:00:00 | 0 | 33729 | 2016-03-15 03:16:28 |
4 | 2016-04-01 14:38:50 | Ford_Focus_1_6_Benzin_TÜV_neu_ist_sehr_gepfleg... | privat | Angebot | 1350 | test | kombi | 2003 | manuell | 0 | focus | 150000 | 7 | benzin | ford | nein | 2016-04-01 00:00:00 | 0 | 39218 | 2016-04-01 14:38:50 |
autos['date_crawled'].str[:10].value_counts(normalize=True, dropna=False).sort_index()
2016-03-05 0.025247 2016-03-06 0.014138 2016-03-07 0.036090 2016-03-08 0.033129 2016-03-09 0.032906 2016-03-10 0.032060 2016-03-11 0.032906 2016-03-12 0.036913 2016-03-13 0.015473 2016-03-14 0.036936 2016-03-15 0.033841 2016-03-16 0.029433 2016-03-17 0.031258 2016-03-18 0.013024 2016-03-19 0.034509 2016-03-20 0.037937 2016-03-21 0.037381 2016-03-22 0.032795 2016-03-23 0.032349 2016-03-24 0.028965 2016-03-25 0.031325 2016-03-26 0.032505 2016-03-27 0.030991 2016-03-28 0.034976 2016-03-29 0.033863 2016-03-30 0.033552 2016-03-31 0.031904 2016-04-01 0.034197 2016-04-02 0.035622 2016-04-03 0.039162 2016-04-04 0.036579 2016-04-05 0.013336 2016-04-06 0.003251 2016-04-07 0.001447 Name: date_crawled, dtype: float64
autos['ad_created'].str[:10].value_counts(normalize=True, dropna=False).sort_index()
2015-06-11 0.000022 2015-08-10 0.000022 2015-09-09 0.000022 2015-11-10 0.000022 2015-12-05 0.000022 2015-12-30 0.000022 2016-01-03 0.000022 2016-01-07 0.000022 2016-01-10 0.000045 2016-01-13 0.000022 2016-01-14 0.000022 2016-01-16 0.000022 2016-01-22 0.000022 2016-01-27 0.000067 2016-01-29 0.000022 2016-02-01 0.000022 2016-02-02 0.000045 2016-02-05 0.000045 2016-02-07 0.000022 2016-02-08 0.000022 2016-02-09 0.000022 2016-02-11 0.000022 2016-02-12 0.000067 2016-02-14 0.000045 2016-02-16 0.000022 2016-02-17 0.000022 2016-02-18 0.000045 2016-02-19 0.000067 2016-02-20 0.000045 2016-02-21 0.000067 ... 2016-03-09 0.032973 2016-03-10 0.031793 2016-03-11 0.033129 2016-03-12 0.036802 2016-03-13 0.016765 2016-03-14 0.035600 2016-03-15 0.033596 2016-03-16 0.029945 2016-03-17 0.030969 2016-03-18 0.013714 2016-03-19 0.033396 2016-03-20 0.037893 2016-03-21 0.037626 2016-03-22 0.032661 2016-03-23 0.032149 2016-03-24 0.028943 2016-03-25 0.031392 2016-03-26 0.032728 2016-03-27 0.030746 2016-03-28 0.035132 2016-03-29 0.033774 2016-03-30 0.033373 2016-03-31 0.031993 2016-04-01 0.034153 2016-04-02 0.035355 2016-04-03 0.039296 2016-04-04 0.036980 2016-04-05 0.012067 2016-04-06 0.003340 2016-04-07 0.001291 Name: ad_created, Length: 76, dtype: float64
autos['last_seen'].str[:10].value_counts(normalize=True, dropna=False).sort_index()
2016-03-05 0.001135 2016-03-06 0.004163 2016-03-07 0.005366 2016-03-08 0.007369 2016-03-09 0.009729 2016-03-10 0.010664 2016-03-11 0.012468 2016-03-12 0.023666 2016-03-13 0.008705 2016-03-14 0.012690 2016-03-15 0.015763 2016-03-16 0.016297 2016-03-17 0.027563 2016-03-18 0.007414 2016-03-19 0.015674 2016-03-20 0.020305 2016-03-21 0.020505 2016-03-22 0.021173 2016-03-23 0.018368 2016-03-24 0.019570 2016-03-25 0.018969 2016-03-26 0.016676 2016-03-27 0.015407 2016-03-28 0.020683 2016-03-29 0.022375 2016-03-30 0.024624 2016-03-31 0.023689 2016-04-01 0.023266 2016-04-02 0.025225 2016-04-03 0.025180 2016-04-04 0.024045 2016-04-05 0.126436 2016-04-06 0.222482 2016-04-07 0.132358 Name: last_seen, dtype: float64
autos['registration_year'].describe()
count 44916.000000 mean 2003.599141 std 7.320794 min 1910.000000 25% 1999.000000 50% 2004.000000 75% 2008.000000 max 2019.000000 Name: registration_year, dtype: float64
autos['registration_year'].value_counts(normalize=True).sort_index()
1910 0.000022 1927 0.000022 1929 0.000022 1931 0.000022 1934 0.000022 1937 0.000067 1938 0.000022 1939 0.000022 1941 0.000045 1943 0.000022 1948 0.000022 1950 0.000045 1951 0.000045 1952 0.000022 1953 0.000022 1954 0.000022 1955 0.000045 1956 0.000089 1957 0.000022 1958 0.000045 1959 0.000156 1960 0.000356 1961 0.000134 1962 0.000089 1963 0.000156 1964 0.000245 1965 0.000356 1966 0.000445 1967 0.000512 1968 0.000512 ... 1990 0.005766 1991 0.006902 1992 0.007436 1993 0.008037 1994 0.012445 1995 0.022642 1996 0.026806 1997 0.039140 1998 0.047511 1999 0.059934 2000 0.059645 2001 0.054769 2002 0.052498 2003 0.057084 2004 0.058175 2005 0.060802 2006 0.057953 2007 0.049337 2008 0.047867 2009 0.045418 2010 0.034910 2011 0.035667 2012 0.029054 2013 0.017700 2014 0.014627 2015 0.008238 2016 0.021685 2017 0.024201 2018 0.008594 2019 0.000045 Name: registration_year, Length: 81, dtype: float64
data = autos['brand'].value_counts()[:6].index
brand_data = {}
len(data)
6
autos[autos['brand']=='volkswagen']['price'].mean()
5784.082582423407
for d in data:
mean = autos[autos['brand']==d]['price'].mean()
brand_data[d] = mean
brand_data
{'audi': 9500.667261146496, 'bmw': 8663.624148978775, 'ford': 7806.144155844156, 'mercedes_benz': 31358.238958097394, 'opel': 5676.325443786982, 'volkswagen': 5784.082582423407}
From my top 20 selection, the cheapest car on average was the renault followed closely by the fiat. The two suprising details were that the most expensive brands were sonstige_autos which I could find no information about and citroen. Citroen was truly shocking. According to the data volvo's are on average more expensive than all of the expected entrants. BMW and Audi are not as expensive as expected averagin in at 8663 and 9500 respectively. This data requires further introspection.
keys = brand_data.keys()
mean_mileage = {}
for k in keys:
avg_miles = autos[autos['brand']==k]['odometer_km'].mean()
mean_mileage[k] = avg_miles
avg_price_series = pd.Series(brand_data)
avg_mileage_series = pd.Series(mean_mileage)
df = pd.DataFrame(avg_price_series, columns=['mean_price'])
df['mean_mileage'] = avg_mileage_series
df
mean_price | mean_mileage | |
---|---|---|
audi | 9500.667261 | 129152.866242 |
bmw | 8663.624149 | 132640.168202 |
ford | 7806.144156 | 123972.402597 |
mercedes_benz | 31358.238958 | 131046.432616 |
opel | 5676.325444 | 128985.629755 |
volkswagen | 5784.082582 | 128535.460617 |
autos.head()
date_crawled | name | seller | offer_type | price | abtest | vehicle_type | registration_year | gear_box | power_ps | model | odometer_km | registration_month | fuel_type | brand | unrepaired_damage | ad_created | nr_of_pictures | postal_code | last_seen | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2016-03-26 17:47:46 | Peugeot_807_160_NAVTECH_ON_BOARD | privat | Angebot | 5000 | control | bus | 2004 | manuell | 158 | andere | 150000 | 3 | lpg | peugeot | nein | 2016-03-26 00:00:00 | 0 | 79588 | 2016-04-06 06:45:54 |
1 | 2016-04-04 13:38:56 | BMW_740i_4_4_Liter_HAMANN_UMBAU_Mega_Optik | privat | Angebot | 8500 | control | limousine | 1997 | automatik | 286 | 7er | 150000 | 6 | benzin | bmw | nein | 2016-04-04 00:00:00 | 0 | 71034 | 2016-04-06 14:45:08 |
2 | 2016-03-26 18:57:24 | Volkswagen_Golf_1.6_United | privat | Angebot | 8990 | test | limousine | 2009 | manuell | 102 | golf | 70000 | 7 | benzin | volkswagen | nein | 2016-03-26 00:00:00 | 0 | 35394 | 2016-04-06 20:15:37 |
3 | 2016-03-12 16:58:10 | Smart_smart_fortwo_coupe_softouch/F1/Klima/Pan... | privat | Angebot | 4350 | control | kleinwagen | 2007 | automatik | 71 | fortwo | 70000 | 6 | benzin | smart | nein | 2016-03-12 00:00:00 | 0 | 33729 | 2016-03-15 03:16:28 |
4 | 2016-04-01 14:38:50 | Ford_Focus_1_6_Benzin_TÜV_neu_ist_sehr_gepfleg... | privat | Angebot | 1350 | test | kombi | 2003 | manuell | 0 | focus | 150000 | 7 | benzin | ford | nein | 2016-04-01 00:00:00 | 0 | 39218 | 2016-04-01 14:38:50 |
autos['seller'].unique()
array(['privat', 'gewerblich'], dtype=object)
sellers = {
'privat': 'private',
'gewerblich': 'commercial'
}
autos.replace(sellers, inplace=True)
autos['offer_type'].unique()
array(['Angebot'], dtype=object)
offers = {
'Angebot': 'Offer'
}
autos.replace(offers, inplace=True)
autos['vehicle_type'].unique()
array(['bus', 'limousine', 'kleinwagen', 'kombi', nan, 'coupe', 'suv', 'cabrio', 'andere'], dtype=object)
vehicle_type = {
'bus': 'bus',
'limousine': 'limousine',
'kleinwagen': 'small car',
'kombi': 'station wagon',
'nan': 'nan',
'coupe': 'coupe',
'suv': 'suv',
'cabrio': 'convertible',
'andere': 'other'
}
autos.replace(vehicle_type, inplace=True)
autos['gear_box'].unique()
array(['manuell', 'automatik', nan], dtype=object)
gear_type = {
'manuell': 'manual',
'automatik': 'automatic',
}
autos.replace(gear_type, inplace=True)
autos['fuel_type'].unique()
array(['lpg', 'benzin', 'diesel', nan, 'cng', 'hybrid', 'elektro', 'other'], dtype=object)
petrol = {
'lpg': 'lpg',
'benzin': 'petrol',
'diesel': 'diesel',
'cng': 'cng',
'hybrid': 'hybrid',
'elektro': 'electric',
'other': 'other'
}
autos.replace(petrol, inplace=True)
autos['unrepaired_damage'].unique()
array(['nein', nan, 'ja'], dtype=object)
damage = {
'nein': 'no',
'ja': 'yes'
}
autos.replace(damage, inplace=True)
autos['unrepaired_damage'].head()
0 no 1 no 2 no 3 no 4 no Name: unrepaired_damage, dtype: object
autos.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 44916 entries, 0 to 49999 Data columns (total 20 columns): date_crawled 44916 non-null object name 44916 non-null object seller 44916 non-null object offer_type 44916 non-null object price 44916 non-null int64 abtest 44916 non-null object vehicle_type 41782 non-null object registration_year 44916 non-null int64 gear_box 43550 non-null object power_ps 44916 non-null int64 model 43064 non-null object odometer_km 44916 non-null int64 registration_month 44916 non-null int64 fuel_type 42289 non-null object brand 44916 non-null object unrepaired_damage 38135 non-null object ad_created 44916 non-null object nr_of_pictures 44916 non-null int64 postal_code 44916 non-null int64 last_seen 44916 non-null object dtypes: int64(7), object(13) memory usage: 7.2+ MB
cols = ['date_crawled', 'ad_created', 'last_seen']
for c in cols:
autos[c] = autos[c].str[:10].str.replace('-', '').astype(int)
AttributeErrorTraceback (most recent call last) <ipython-input-107-49afe0ecc035> in <module>() 1 for c in cols: ----> 2 autos[c] = autos[c].str[:10].str.replace('-', '').astype(int) /dataquest/system/env/python3/lib/python3.4/site-packages/pandas/core/generic.py in __getattr__(self, name) 3608 if (name in self._internal_names_set or name in self._metadata or 3609 name in self._accessors): -> 3610 return object.__getattribute__(self, name) 3611 else: 3612 if name in self._info_axis: /dataquest/system/env/python3/lib/python3.4/site-packages/pandas/core/accessor.py in __get__(self, instance, owner) 52 # this ensures that Series.str.<method> is well defined 53 return self.accessor_cls ---> 54 return self.construct_accessor(instance) 55 56 def __set__(self, instance, value): /dataquest/system/env/python3/lib/python3.4/site-packages/pandas/core/strings.py in _make_accessor(cls, data) 1908 # (instead of test for object dtype), but that isn't practical for 1909 # performance reasons until we have a str dtype (GH 9343) -> 1910 raise AttributeError("Can only use .str accessor with string " 1911 "values, which use np.object_ dtype in " 1912 "pandas") AttributeError: Can only use .str accessor with string values, which use np.object_ dtype in pandas
additional_data = autos['name'].str.split('_').str[:4].str.join(' ')
autos['additional_model_info'] = additional_data
autos.head()
date_crawled | name | seller | offer_type | price | abtest | vehicle_type | registration_year | gear_box | power_ps | ... | odometer_km | registration_month | fuel_type | brand | unrepaired_damage | ad_created | nr_of_pictures | postal_code | last_seen | additional_model_info | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 20160326 | Peugeot_807_160_NAVTECH_ON_BOARD | private | Offer | 5000 | control | bus | 2004 | manual | 158 | ... | 150000 | 3 | lpg | peugeot | no | 20160326 | 0 | 79588 | 20160406 | Peugeot 807 160 NAVTECH |
1 | 20160404 | BMW_740i_4_4_Liter_HAMANN_UMBAU_Mega_Optik | private | Offer | 8500 | control | limousine | 1997 | automatic | 286 | ... | 150000 | 6 | petrol | bmw | no | 20160404 | 0 | 71034 | 20160406 | BMW 740i 4 4 |
2 | 20160326 | Volkswagen_Golf_1.6_United | private | Offer | 8990 | test | limousine | 2009 | manual | 102 | ... | 70000 | 7 | petrol | volkswagen | no | 20160326 | 0 | 35394 | 20160406 | Volkswagen Golf 1.6 United |
3 | 20160312 | Smart_smart_fortwo_coupe_softouch/F1/Klima/Pan... | private | Offer | 4350 | control | small car | 2007 | automatic | 71 | ... | 70000 | 6 | petrol | smart | no | 20160312 | 0 | 33729 | 20160315 | Smart smart fortwo coupe |
4 | 20160401 | Ford_Focus_1_6_Benzin_TÜV_neu_ist_sehr_gepfleg... | private | Offer | 1350 | test | station wagon | 2003 | manual | 0 | ... | 150000 | 7 | petrol | ford | no | 20160401 | 0 | 39218 | 20160401 | Ford Focus 1 6 |
5 rows × 21 columns
autos[autos['brand']=='volkswagen']['model'].value_counts().index[0]
'golf'
brands = autos['brand'].unique()
brand_model_combination = {}
for b in brands:
try:
data = autos[autos['brand']==b]['model'].value_counts().index[0]
except IndexError:
continue
brand_model_combination[b] = data
brand_model_combination
{'alfa_romeo': '147', 'audi': 'a4', 'b': 'a4', 'bmw': '3er', 'chevrolet': 'other', 'chrysler': 'voyager', 'citroen': 'other', 'dacia': 'sandero', 'daewoo': 'matiz', 'daihatsu': 'cuore', 'fiat': 'punto', 'ford': 'focus', 'honda': 'civic', 'hyundai': 'i_reihe', 'jaguar': 'other', 'jeep': 'grand', 'kia': 'other', 'lada': 'niva', 'lancia': 'ypsilon', 'land_rover': 'freelander', 'mazda': '3_reihe', 'mercedes_benz': 'c_klasse', 'mini': 'cooper', 'mitsubishi': 'colt', 'nissan': 'micra', 'opel': 'corsa', 'peugeot': '2_reihe', 'porsche': '911', 'renault': 'twingo', 'rover': 'other', 'saab': 'other', 'seat': 'ibiza', 'skoda': 'octavia', 'smart': 'fortwo', 'subaru': 'legacy', 'suzuki': 'other', 'toyota': 'yaris', 'trabant': '601', 'volkswagen': 'golf', 'volvo': 'v40'}
mileage = autos['odometer_km'].unique()
mileage_based_price = {}
for m in mileage:
data = autos[autos['odometer_km']==m]['price'].mean()
mileage_based_price[m] = data
mileage_based_price
{5000: 17330.70017035775, 10000: 21366.46956521739, 20000: 18613.963265306124, 30000: 16807.827176781004, 40000: 49857.3106918239, 50000: 26545.66700610998, 60000: 12449.801785714286, 70000: 11077.593509820666, 80000: 9767.567153284672, 90000: 8553.29963898917, 100000: 13236.51554663992, 125000: 6430.415355960265, 150000: 8308.480578800558}
vals = autos['unrepaired_damage'].dropna().unique()
damage_assessor = {}
for v in vals:
data = autos[autos['unrepaired_damage']==v]['price'].mean()
damage_assessor[v] = data
damage_assessor['no'] - damage_assessor['yes']
6146.265474701376