# This dataset is pulled from the used cars from eBay Kleinanzeigen of the German Ebay Website
# The data dictionary provided with data is as follows:
# dateCrawled - When this ad was first crawled. All field-values are taken from this date.
# name - Name of the car.
# seller - Whether the seller is private or a dealer.
# offerType - The type of listing
# price - The price on the ad to sell the car.
# abtest - Whether the listing is included in an A/B test.
# vehicleType - The vehicle Type.
# yearOfRegistration - The year in which the car was first registered.
# gearbox - The transmission type.
# powerPS - The power of the car in PS.
# model - The car model name.
# kilometre - How many kilometers the car has driven.
# monthOfRegistration - The month in which the car was first registered.
# fuelType - What type of fuel the car uses.
# brand - The brand of the car.
# notRepairedDamage - If the car has a damage which is not yet repaired.
# dateCreated - The date on which the eBay listing was created.
# nrOfPictures - The number of pictures in the ad.
# postalCode - The postal code for the location of the vehicle.
# lastSeenOnline - When the crawler saw this ad last online.
#Aim of the project is to clean the data
import numpy
import pandas
autos = pandas.read_csv('autos.csv', encoding='Latin-1')
autos
dateCrawled | name | seller | offerType | price | abtest | vehicleType | yearOfRegistration | gearbox | powerPS | model | odometer | monthOfRegistration | fuelType | brand | notRepairedDamage | dateCreated | nrOfPictures | postalCode | lastSeen | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2016-03-26 17:47:46 | Peugeot_807_160_NAVTECH_ON_BOARD | privat | Angebot | $5,000 | control | bus | 2004 | manuell | 158 | andere | 150,000km | 3 | lpg | peugeot | nein | 2016-03-26 00:00:00 | 0 | 79588 | 2016-04-06 06:45:54 |
1 | 2016-04-04 13:38:56 | BMW_740i_4_4_Liter_HAMANN_UMBAU_Mega_Optik | privat | Angebot | $8,500 | control | limousine | 1997 | automatik | 286 | 7er | 150,000km | 6 | benzin | bmw | nein | 2016-04-04 00:00:00 | 0 | 71034 | 2016-04-06 14:45:08 |
2 | 2016-03-26 18:57:24 | Volkswagen_Golf_1.6_United | privat | Angebot | $8,990 | test | limousine | 2009 | manuell | 102 | golf | 70,000km | 7 | benzin | volkswagen | nein | 2016-03-26 00:00:00 | 0 | 35394 | 2016-04-06 20:15:37 |
3 | 2016-03-12 16:58:10 | Smart_smart_fortwo_coupe_softouch/F1/Klima/Pan... | privat | Angebot | $4,350 | control | kleinwagen | 2007 | automatik | 71 | fortwo | 70,000km | 6 | benzin | smart | nein | 2016-03-12 00:00:00 | 0 | 33729 | 2016-03-15 03:16:28 |
4 | 2016-04-01 14:38:50 | Ford_Focus_1_6_Benzin_TÜV_neu_ist_sehr_gepfleg... | privat | Angebot | $1,350 | test | kombi | 2003 | manuell | 0 | focus | 150,000km | 7 | benzin | ford | nein | 2016-04-01 00:00:00 | 0 | 39218 | 2016-04-01 14:38:50 |
5 | 2016-03-21 13:47:45 | Chrysler_Grand_Voyager_2.8_CRD_Aut.Limited_Sto... | privat | Angebot | $7,900 | test | bus | 2006 | automatik | 150 | voyager | 150,000km | 4 | diesel | chrysler | NaN | 2016-03-21 00:00:00 | 0 | 22962 | 2016-04-06 09:45:21 |
6 | 2016-03-20 17:55:21 | VW_Golf_III_GT_Special_Electronic_Green_Metall... | privat | Angebot | $300 | test | limousine | 1995 | manuell | 90 | golf | 150,000km | 8 | benzin | volkswagen | NaN | 2016-03-20 00:00:00 | 0 | 31535 | 2016-03-23 02:48:59 |
7 | 2016-03-16 18:55:19 | Golf_IV_1.9_TDI_90PS | privat | Angebot | $1,990 | control | limousine | 1998 | manuell | 90 | golf | 150,000km | 12 | diesel | volkswagen | nein | 2016-03-16 00:00:00 | 0 | 53474 | 2016-04-07 03:17:32 |
8 | 2016-03-22 16:51:34 | Seat_Arosa | privat | Angebot | $250 | test | NaN | 2000 | manuell | 0 | arosa | 150,000km | 10 | NaN | seat | nein | 2016-03-22 00:00:00 | 0 | 7426 | 2016-03-26 18:18:10 |
9 | 2016-03-16 13:47:02 | Renault_Megane_Scenic_1.6e_RT_Klimaanlage | privat | Angebot | $590 | control | bus | 1997 | manuell | 90 | megane | 150,000km | 7 | benzin | renault | nein | 2016-03-16 00:00:00 | 0 | 15749 | 2016-04-06 10:46:35 |
10 | 2016-03-15 01:41:36 | VW_Golf_Tuning_in_siber/grau | privat | Angebot | $999 | test | NaN | 2017 | manuell | 90 | NaN | 150,000km | 4 | benzin | volkswagen | nein | 2016-03-14 00:00:00 | 0 | 86157 | 2016-04-07 03:16:21 |
11 | 2016-03-16 18:45:34 | Mercedes_A140_Motorschaden | privat | Angebot | $350 | control | NaN | 2000 | NaN | 0 | NaN | 150,000km | 0 | benzin | mercedes_benz | NaN | 2016-03-16 00:00:00 | 0 | 17498 | 2016-03-16 18:45:34 |
12 | 2016-03-31 19:48:22 | Smart_smart_fortwo_coupe_softouch_pure_MHD_Pan... | privat | Angebot | $5,299 | control | kleinwagen | 2010 | automatik | 71 | fortwo | 50,000km | 9 | benzin | smart | nein | 2016-03-31 00:00:00 | 0 | 34590 | 2016-04-06 14:17:52 |
13 | 2016-03-23 10:48:32 | Audi_A3_1.6_tuning | privat | Angebot | $1,350 | control | limousine | 1999 | manuell | 101 | a3 | 150,000km | 11 | benzin | audi | nein | 2016-03-23 00:00:00 | 0 | 12043 | 2016-04-01 14:17:13 |
14 | 2016-03-23 11:50:46 | Renault_Clio_3__Dynamique_1.2__16_V;_viele_Ver... | privat | Angebot | $3,999 | test | kleinwagen | 2007 | manuell | 75 | clio | 150,000km | 9 | benzin | renault | NaN | 2016-03-23 00:00:00 | 0 | 81737 | 2016-04-01 15:46:47 |
15 | 2016-04-01 12:06:20 | Corvette_C3_Coupe_T_Top_Crossfire_Injection | privat | Angebot | $18,900 | test | coupe | 1982 | automatik | 203 | NaN | 80,000km | 6 | benzin | sonstige_autos | nein | 2016-04-01 00:00:00 | 0 | 61276 | 2016-04-02 21:10:48 |
16 | 2016-03-16 14:59:02 | Opel_Vectra_B_Kombi | privat | Angebot | $350 | test | kombi | 1999 | manuell | 101 | vectra | 150,000km | 5 | benzin | opel | nein | 2016-03-16 00:00:00 | 0 | 57299 | 2016-03-18 05:29:37 |
17 | 2016-03-29 11:46:22 | Volkswagen_Scirocco_2_G60 | privat | Angebot | $5,500 | test | coupe | 1990 | manuell | 205 | scirocco | 150,000km | 6 | benzin | volkswagen | nein | 2016-03-29 00:00:00 | 0 | 74821 | 2016-04-05 20:46:26 |
18 | 2016-03-26 19:57:44 | Verkaufen_mein_bmw_e36_320_i_touring | privat | Angebot | $300 | control | bus | 1995 | manuell | 150 | 3er | 150,000km | 0 | benzin | bmw | NaN | 2016-03-26 00:00:00 | 0 | 54329 | 2016-04-02 12:16:41 |
19 | 2016-03-17 13:36:21 | mazda_tribute_2.0_mit_gas_und_tuev_neu_2018 | privat | Angebot | $4,150 | control | suv | 2004 | manuell | 124 | andere | 150,000km | 2 | lpg | mazda | nein | 2016-03-17 00:00:00 | 0 | 40878 | 2016-03-17 14:45:58 |
20 | 2016-03-05 19:57:31 | Audi_A4_Avant_1.9_TDI_*6_Gang*AHK*Klimatronik*... | privat | Angebot | $3,500 | test | kombi | 2003 | manuell | 131 | a4 | 150,000km | 5 | diesel | audi | NaN | 2016-03-05 00:00:00 | 0 | 53913 | 2016-03-07 05:46:46 |
21 | 2016-03-06 19:07:10 | Porsche_911_Carrera_4S_Cabrio | privat | Angebot | $41,500 | test | cabrio | 2004 | manuell | 320 | 911 | 150,000km | 4 | benzin | porsche | nein | 2016-03-06 00:00:00 | 0 | 65428 | 2016-04-05 23:46:19 |
22 | 2016-03-28 20:50:54 | MINI_Cooper_S_Cabrio | privat | Angebot | $25,450 | control | cabrio | 2015 | manuell | 184 | cooper | 10,000km | 1 | benzin | mini | nein | 2016-03-28 00:00:00 | 0 | 44789 | 2016-04-01 06:45:30 |
23 | 2016-03-10 19:55:34 | Peugeot_Boxer_2_2_HDi_120_Ps_9_Sitzer_inkl_Klima | privat | Angebot | $7,999 | control | bus | 2010 | manuell | 120 | NaN | 150,000km | 2 | diesel | peugeot | nein | 2016-03-10 00:00:00 | 0 | 30900 | 2016-03-17 08:45:17 |
24 | 2016-04-03 11:57:02 | BMW_535i_xDrive_Sport_Aut. | privat | Angebot | $48,500 | control | limousine | 2014 | automatik | 306 | 5er | 30,000km | 12 | benzin | bmw | nein | 2016-04-03 00:00:00 | 0 | 22547 | 2016-04-07 13:16:50 |
25 | 2016-03-21 21:56:18 | Ford_escort_kombi_an_bastler_mit_ghia_ausstattung | privat | Angebot | $90 | control | kombi | 1996 | manuell | 116 | NaN | 150,000km | 4 | benzin | ford | ja | 2016-03-21 00:00:00 | 0 | 27574 | 2016-04-01 05:16:49 |
26 | 2016-04-03 22:46:28 | Volkswagen_Polo_Fox | privat | Angebot | $777 | control | kleinwagen | 1992 | manuell | 54 | polo | 125,000km | 2 | benzin | volkswagen | nein | 2016-04-03 00:00:00 | 0 | 38110 | 2016-04-05 23:46:48 |
27 | 2016-03-27 18:45:01 | Hat_einer_Ahnung_mit_Ford_Galaxy_HILFE | privat | Angebot | $0 | control | NaN | 2005 | NaN | 0 | NaN | 150,000km | 0 | NaN | ford | NaN | 2016-03-27 00:00:00 | 0 | 66701 | 2016-03-27 18:45:01 |
28 | 2016-03-19 21:56:19 | MINI_Cooper_D | privat | Angebot | $5,250 | control | kleinwagen | 2007 | manuell | 110 | cooper | 150,000km | 7 | diesel | mini | ja | 2016-03-19 00:00:00 | 0 | 15745 | 2016-04-07 14:58:48 |
29 | 2016-04-02 12:45:44 | Mercedes_Benz_E_320_T_CDI_Avantgarde_DPF7_Sitz... | privat | Angebot | $4,999 | test | kombi | 2004 | automatik | 204 | e_klasse | 150,000km | 10 | diesel | mercedes_benz | nein | 2016-04-02 00:00:00 | 0 | 47638 | 2016-04-02 12:45:44 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
49970 | 2016-03-21 22:47:37 | c4_Grand_Picasso_mit_Automatik_Leder_Navi_Temp... | privat | Angebot | $15,800 | control | bus | 2010 | automatik | 136 | c4 | 60,000km | 4 | diesel | citroen | nein | 2016-03-21 00:00:00 | 0 | 14947 | 2016-04-07 04:17:34 |
49971 | 2016-03-29 14:54:12 | W.Lupo_1.0 | privat | Angebot | $950 | test | kleinwagen | 2001 | manuell | 50 | lupo | 150,000km | 4 | benzin | volkswagen | nein | 2016-03-29 00:00:00 | 0 | 65197 | 2016-03-29 20:41:51 |
49972 | 2016-03-26 22:25:23 | Mercedes_Benz_Vito_115_CDI_Extralang_Aut. | privat | Angebot | $3,300 | control | bus | 2004 | automatik | 150 | vito | 150,000km | 10 | diesel | mercedes_benz | ja | 2016-03-26 00:00:00 | 0 | 65326 | 2016-03-28 11:28:18 |
49973 | 2016-03-27 05:32:39 | Mercedes_Benz_SLK_200_Kompressor | privat | Angebot | $6,000 | control | cabrio | 2004 | manuell | 163 | slk | 150,000km | 11 | benzin | mercedes_benz | nein | 2016-03-27 00:00:00 | 0 | 53567 | 2016-03-27 08:25:24 |
49974 | 2016-03-20 10:52:31 | Golf_1_Cabrio_Tuev_Neu_viele_Extras_alles_eing... | privat | Angebot | $0 | control | cabrio | 1983 | manuell | 70 | golf | 150,000km | 2 | benzin | volkswagen | nein | 2016-03-20 00:00:00 | 0 | 8209 | 2016-03-27 19:48:16 |
49975 | 2016-03-27 20:51:39 | Honda_Jazz_1.3_DSi_i_VTEC_IMA_CVT_Comfort | privat | Angebot | $9,700 | control | kleinwagen | 2012 | automatik | 88 | jazz | 100,000km | 11 | hybrid | honda | nein | 2016-03-27 00:00:00 | 0 | 84385 | 2016-04-05 19:45:34 |
49976 | 2016-03-19 18:56:05 | Audi_80_Avant_2.6_E__Vollausstattung!!_Einziga... | privat | Angebot | $5,900 | test | kombi | 1992 | automatik | 150 | 80 | 150,000km | 12 | benzin | audi | nein | 2016-03-19 00:00:00 | 0 | 36100 | 2016-04-07 06:16:44 |
49977 | 2016-03-31 18:37:18 | Mercedes_Benz_C200_Cdi_W203 | privat | Angebot | $5,500 | control | limousine | 2003 | manuell | 116 | c_klasse | 150,000km | 2 | diesel | mercedes_benz | nein | 2016-03-31 00:00:00 | 0 | 33739 | 2016-04-06 12:16:11 |
49978 | 2016-04-04 10:37:14 | Mercedes_Benz_E_200_Classic | privat | Angebot | $900 | control | limousine | 1996 | automatik | 136 | e_klasse | 150,000km | 9 | benzin | mercedes_benz | ja | 2016-04-04 00:00:00 | 0 | 24405 | 2016-04-06 12:44:20 |
49979 | 2016-03-20 18:38:40 | Volkswagen_Polo_1.6_TDI_Style | privat | Angebot | $11,000 | test | kleinwagen | 2011 | manuell | 90 | polo | 70,000km | 11 | diesel | volkswagen | nein | 2016-03-20 00:00:00 | 0 | 48455 | 2016-04-07 01:45:12 |
49980 | 2016-03-12 10:55:54 | Ford_Escort_Turnier_16V | privat | Angebot | $400 | control | kombi | 1995 | manuell | 105 | escort | 125,000km | 3 | benzin | ford | NaN | 2016-03-12 00:00:00 | 0 | 56218 | 2016-04-06 17:16:49 |
49981 | 2016-03-15 09:38:21 | Opel_Astra_Kombi_mit_Anhaengerkupplung | privat | Angebot | $2,000 | control | kombi | 1998 | manuell | 115 | astra | 150,000km | 12 | benzin | opel | nein | 2016-03-15 00:00:00 | 0 | 86859 | 2016-04-05 17:21:46 |
49982 | 2016-03-29 18:51:08 | Skoda_Fabia_4_Tuerer_Bj:2004__85.000Tkm | privat | Angebot | $1,950 | control | kleinwagen | 2004 | manuell | 0 | fabia | 90,000km | 7 | benzin | skoda | NaN | 2016-03-29 00:00:00 | 0 | 45884 | 2016-03-29 18:51:08 |
49983 | 2016-03-06 12:43:04 | Ford_focus_99 | privat | Angebot | $600 | test | kleinwagen | 1999 | manuell | 101 | focus | 150,000km | 4 | benzin | ford | NaN | 2016-03-06 00:00:00 | 0 | 52477 | 2016-03-09 06:16:08 |
49984 | 2016-03-31 22:48:48 | Student_sucht_ein__Anfaengerauto___ab_2000_BJ_... | privat | Angebot | $0 | test | NaN | 2000 | NaN | 0 | NaN | 150,000km | 0 | NaN | sonstige_autos | NaN | 2016-03-31 00:00:00 | 0 | 12103 | 2016-04-02 19:44:53 |
49985 | 2016-04-02 16:38:23 | Verkaufe_meinen_vw_vento! | privat | Angebot | $1,000 | control | NaN | 1995 | automatik | 0 | NaN | 150,000km | 0 | benzin | volkswagen | NaN | 2016-04-02 00:00:00 | 0 | 30900 | 2016-04-06 15:17:52 |
49986 | 2016-04-04 20:46:02 | Chrysler_300C_3.0_CRD_DPF_Automatik_Voll_Ausst... | privat | Angebot | $15,900 | control | limousine | 2010 | automatik | 218 | 300c | 125,000km | 11 | diesel | chrysler | nein | 2016-04-04 00:00:00 | 0 | 73527 | 2016-04-06 23:16:00 |
49987 | 2016-03-22 20:47:27 | Audi_A3_Limousine_2.0_TDI_DPF_Ambition__NAVI__... | privat | Angebot | $21,990 | control | limousine | 2013 | manuell | 150 | a3 | 50,000km | 11 | diesel | audi | nein | 2016-03-22 00:00:00 | 0 | 94362 | 2016-03-26 22:46:06 |
49988 | 2016-03-28 19:49:51 | BMW_330_Ci | privat | Angebot | $9,550 | control | coupe | 2001 | manuell | 231 | 3er | 150,000km | 10 | benzin | bmw | nein | 2016-03-28 00:00:00 | 0 | 83646 | 2016-04-07 02:17:40 |
49989 | 2016-03-11 19:50:37 | VW_Polo_zum_Ausschlachten_oder_Wiederaufbau | privat | Angebot | $150 | test | kleinwagen | 1997 | manuell | 0 | polo | 150,000km | 5 | benzin | volkswagen | ja | 2016-03-11 00:00:00 | 0 | 21244 | 2016-03-12 10:17:55 |
49990 | 2016-03-21 19:54:19 | Mercedes_Benz_A_200__BlueEFFICIENCY__Urban | privat | Angebot | $17,500 | test | limousine | 2012 | manuell | 156 | a_klasse | 30,000km | 12 | benzin | mercedes_benz | nein | 2016-03-21 00:00:00 | 0 | 58239 | 2016-04-06 22:46:57 |
49991 | 2016-03-06 15:25:19 | Kleinwagen | privat | Angebot | $500 | control | NaN | 2016 | manuell | 0 | twingo | 150,000km | 0 | benzin | renault | NaN | 2016-03-06 00:00:00 | 0 | 61350 | 2016-03-06 18:24:19 |
49992 | 2016-03-10 19:37:38 | Fiat_Grande_Punto_1.4_T_Jet_16V_Sport | privat | Angebot | $4,800 | control | kleinwagen | 2009 | manuell | 120 | andere | 125,000km | 9 | lpg | fiat | nein | 2016-03-10 00:00:00 | 0 | 68642 | 2016-03-13 01:44:51 |
49993 | 2016-03-15 18:47:35 | Audi_A3__1_8l__Silber;_schoenes_Fahrzeug | privat | Angebot | $1,650 | control | kleinwagen | 1997 | manuell | 0 | NaN | 150,000km | 7 | benzin | audi | NaN | 2016-03-15 00:00:00 | 0 | 65203 | 2016-04-06 19:46:53 |
49994 | 2016-03-22 17:36:42 | Audi_A6__S6__Avant_4.2_quattro_eventuell_Tausc... | privat | Angebot | $5,000 | control | kombi | 2001 | automatik | 299 | a6 | 150,000km | 1 | benzin | audi | nein | 2016-03-22 00:00:00 | 0 | 46537 | 2016-04-06 08:16:39 |
49995 | 2016-03-27 14:38:19 | Audi_Q5_3.0_TDI_qu._S_tr.__Navi__Panorama__Xenon | privat | Angebot | $24,900 | control | limousine | 2011 | automatik | 239 | q5 | 100,000km | 1 | diesel | audi | nein | 2016-03-27 00:00:00 | 0 | 82131 | 2016-04-01 13:47:40 |
49996 | 2016-03-28 10:50:25 | Opel_Astra_F_Cabrio_Bertone_Edition___TÜV_neu+... | privat | Angebot | $1,980 | control | cabrio | 1996 | manuell | 75 | astra | 150,000km | 5 | benzin | opel | nein | 2016-03-28 00:00:00 | 0 | 44807 | 2016-04-02 14:18:02 |
49997 | 2016-04-02 14:44:48 | Fiat_500_C_1.2_Dualogic_Lounge | privat | Angebot | $13,200 | test | cabrio | 2014 | automatik | 69 | 500 | 5,000km | 11 | benzin | fiat | nein | 2016-04-02 00:00:00 | 0 | 73430 | 2016-04-04 11:47:27 |
49998 | 2016-03-08 19:25:42 | Audi_A3_2.0_TDI_Sportback_Ambition | privat | Angebot | $22,900 | control | kombi | 2013 | manuell | 150 | a3 | 40,000km | 11 | diesel | audi | nein | 2016-03-08 00:00:00 | 0 | 35683 | 2016-04-05 16:45:07 |
49999 | 2016-03-14 00:42:12 | Opel_Vectra_1.6_16V | privat | Angebot | $1,250 | control | limousine | 1996 | manuell | 101 | vectra | 150,000km | 1 | benzin | opel | nein | 2016-03-13 00:00:00 | 0 | 45897 | 2016-04-06 21:18:48 |
50000 rows × 20 columns
autos.info()
autos.head(5)
<class 'pandas.core.frame.DataFrame'> RangeIndex: 50000 entries, 0 to 49999 Data columns (total 20 columns): dateCrawled 50000 non-null object name 50000 non-null object seller 50000 non-null object offerType 50000 non-null object price 50000 non-null object abtest 50000 non-null object vehicleType 44905 non-null object yearOfRegistration 50000 non-null int64 gearbox 47320 non-null object powerPS 50000 non-null int64 model 47242 non-null object odometer 50000 non-null object monthOfRegistration 50000 non-null int64 fuelType 45518 non-null object brand 50000 non-null object notRepairedDamage 40171 non-null object dateCreated 50000 non-null object nrOfPictures 50000 non-null int64 postalCode 50000 non-null int64 lastSeen 50000 non-null object dtypes: int64(5), object(15) memory usage: 7.6+ MB
dateCrawled | name | seller | offerType | price | abtest | vehicleType | yearOfRegistration | gearbox | powerPS | model | odometer | monthOfRegistration | fuelType | brand | notRepairedDamage | dateCreated | nrOfPictures | postalCode | lastSeen | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2016-03-26 17:47:46 | Peugeot_807_160_NAVTECH_ON_BOARD | privat | Angebot | $5,000 | control | bus | 2004 | manuell | 158 | andere | 150,000km | 3 | lpg | peugeot | nein | 2016-03-26 00:00:00 | 0 | 79588 | 2016-04-06 06:45:54 |
1 | 2016-04-04 13:38:56 | BMW_740i_4_4_Liter_HAMANN_UMBAU_Mega_Optik | privat | Angebot | $8,500 | control | limousine | 1997 | automatik | 286 | 7er | 150,000km | 6 | benzin | bmw | nein | 2016-04-04 00:00:00 | 0 | 71034 | 2016-04-06 14:45:08 |
2 | 2016-03-26 18:57:24 | Volkswagen_Golf_1.6_United | privat | Angebot | $8,990 | test | limousine | 2009 | manuell | 102 | golf | 70,000km | 7 | benzin | volkswagen | nein | 2016-03-26 00:00:00 | 0 | 35394 | 2016-04-06 20:15:37 |
3 | 2016-03-12 16:58:10 | Smart_smart_fortwo_coupe_softouch/F1/Klima/Pan... | privat | Angebot | $4,350 | control | kleinwagen | 2007 | automatik | 71 | fortwo | 70,000km | 6 | benzin | smart | nein | 2016-03-12 00:00:00 | 0 | 33729 | 2016-03-15 03:16:28 |
4 | 2016-04-01 14:38:50 | Ford_Focus_1_6_Benzin_TÜV_neu_ist_sehr_gepfleg... | privat | Angebot | $1,350 | test | kombi | 2003 | manuell | 0 | focus | 150,000km | 7 | benzin | ford | nein | 2016-04-01 00:00:00 | 0 | 39218 | 2016-04-01 14:38:50 |
autos.columns
Index(['dateCrawled', 'name', 'seller', 'offerType', 'price', 'abtest', 'vehicleType', 'yearOfRegistration', 'gearbox', 'powerPS', 'model', 'odometer', 'monthOfRegistration', 'fuelType', 'brand', 'notRepairedDamage', 'dateCreated', 'nrOfPictures', 'postalCode', 'lastSeen'], dtype='object')
autos_copy = autos.copy()
columns = autos_copy.columns
print(columns)
Index(['dateCrawled', 'name', 'seller', 'offerType', 'price', 'abtest', 'vehicleType', 'yearOfRegistration', 'gearbox', 'powerPS', 'model', 'odometer', 'monthOfRegistration', 'fuelType', 'brand', 'notRepairedDamage', 'dateCreated', 'nrOfPictures', 'postalCode', 'lastSeen'], dtype='object')
autos_copy = autos.rename(columns = {
'dateCrawled':'date_crawled',
'name':'name',
'seller':'seller',
'offerType':'offer_type',
'price':'price',
'abtest':'abtest',
'vehicleType':'vehicle_type',
'yearOfRegistration':'registration_year',
'gearbox':'gearbox',
'powerPS':'power_ps',
'model':'model',
'odometer':'odometer_km',
'monthOfRegistration':'registration_month',
'fuelType':'fuel_type',
'brand':'brand',
'notRepairedDamage':'unrepaired_damage',
'dateCreated':'ad_created',
'nrOfPictures':'nr_of_pictures',
'postalCode':'postal_code',
'lastSeen':'last_seen'})
autos_copy.head()
date_crawled | name | seller | offer_type | price | abtest | vehicle_type | registration_year | gearbox | power_ps | model | odometer_km | registration_month | fuel_type | brand | unrepaired_damage | ad_created | nr_of_pictures | postal_code | last_seen | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2016-03-26 17:47:46 | Peugeot_807_160_NAVTECH_ON_BOARD | privat | Angebot | $5,000 | control | bus | 2004 | manuell | 158 | andere | 150,000km | 3 | lpg | peugeot | nein | 2016-03-26 00:00:00 | 0 | 79588 | 2016-04-06 06:45:54 |
1 | 2016-04-04 13:38:56 | BMW_740i_4_4_Liter_HAMANN_UMBAU_Mega_Optik | privat | Angebot | $8,500 | control | limousine | 1997 | automatik | 286 | 7er | 150,000km | 6 | benzin | bmw | nein | 2016-04-04 00:00:00 | 0 | 71034 | 2016-04-06 14:45:08 |
2 | 2016-03-26 18:57:24 | Volkswagen_Golf_1.6_United | privat | Angebot | $8,990 | test | limousine | 2009 | manuell | 102 | golf | 70,000km | 7 | benzin | volkswagen | nein | 2016-03-26 00:00:00 | 0 | 35394 | 2016-04-06 20:15:37 |
3 | 2016-03-12 16:58:10 | Smart_smart_fortwo_coupe_softouch/F1/Klima/Pan... | privat | Angebot | $4,350 | control | kleinwagen | 2007 | automatik | 71 | fortwo | 70,000km | 6 | benzin | smart | nein | 2016-03-12 00:00:00 | 0 | 33729 | 2016-03-15 03:16:28 |
4 | 2016-04-01 14:38:50 | Ford_Focus_1_6_Benzin_TÜV_neu_ist_sehr_gepfleg... | privat | Angebot | $1,350 | test | kombi | 2003 | manuell | 0 | focus | 150,000km | 7 | benzin | ford | nein | 2016-04-01 00:00:00 | 0 | 39218 | 2016-04-01 14:38:50 |
#Seller and offer_type are all the same value
autos_copy.describe(include='all')
date_crawled | name | seller | offer_type | price | abtest | vehicle_type | registration_year | gearbox | power_ps | model | odometer_km | registration_month | fuel_type | brand | unrepaired_damage | ad_created | nr_of_pictures | postal_code | last_seen | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | 50000 | 50000 | 50000 | 50000 | 50000 | 50000 | 44905 | 50000.000000 | 47320 | 50000.000000 | 47242 | 50000 | 50000.000000 | 45518 | 50000 | 40171 | 50000 | 50000.0 | 50000.000000 | 50000 |
unique | 48213 | 38754 | 2 | 2 | 2357 | 2 | 8 | NaN | 2 | NaN | 245 | 13 | NaN | 7 | 40 | 2 | 76 | NaN | NaN | 39481 |
top | 2016-03-27 22:55:05 | Ford_Fiesta | privat | Angebot | $0 | test | limousine | NaN | manuell | NaN | golf | 150,000km | NaN | benzin | volkswagen | nein | 2016-04-03 00:00:00 | NaN | NaN | 2016-04-07 06:17:27 |
freq | 3 | 78 | 49999 | 49999 | 1421 | 25756 | 12859 | NaN | 36993 | NaN | 4024 | 32424 | NaN | 30107 | 10687 | 35232 | 1946 | NaN | NaN | 8 |
mean | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2005.073280 | NaN | 116.355920 | NaN | NaN | 5.723360 | NaN | NaN | NaN | NaN | 0.0 | 50813.627300 | NaN |
std | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 105.712813 | NaN | 209.216627 | NaN | NaN | 3.711984 | NaN | NaN | NaN | NaN | 0.0 | 25779.747957 | NaN |
min | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1000.000000 | NaN | 0.000000 | NaN | NaN | 0.000000 | NaN | NaN | NaN | NaN | 0.0 | 1067.000000 | NaN |
25% | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1999.000000 | NaN | 70.000000 | NaN | NaN | 3.000000 | NaN | NaN | NaN | NaN | 0.0 | 30451.000000 | NaN |
50% | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2003.000000 | NaN | 105.000000 | NaN | NaN | 6.000000 | NaN | NaN | NaN | NaN | 0.0 | 49577.000000 | NaN |
75% | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2008.000000 | NaN | 150.000000 | NaN | NaN | 9.000000 | NaN | NaN | NaN | NaN | 0.0 | 71540.000000 | NaN |
max | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 9999.000000 | NaN | 17700.000000 | NaN | NaN | 12.000000 | NaN | NaN | NaN | NaN | 0.0 | 99998.000000 | NaN |
#convert odometer to integer
autos_copy["odometer_km"] = autos_copy["odometer_km"].str.replace(',|km','')
autos_copy["odometer_km"] = autos_copy["odometer_km"].astype(float)
#convert price to integer
autos_copy["price"] = autos_copy["price"].str.replace('\$|,','')
autos_copy["price"] = autos_copy["price"].astype(float)
#Find unique prices for vechiles
autos_copy["price"].unique().shape
(2357,)
#Find unique kms per vehicle
autos_copy["price"].describe()
count 5.000000e+04 mean 9.840044e+03 std 4.811044e+05 min 0.000000e+00 25% 1.100000e+03 50% 2.950000e+03 75% 7.200000e+03 max 1.000000e+08 Name: price, dtype: float64
#Find unique price per vehicle
autos_copy["price"].value_counts().sort_index(ascending=False).head(20)
99999999.0 1 27322222.0 1 12345678.0 3 11111111.0 2 10000000.0 1 3890000.0 1 1300000.0 1 1234566.0 1 999999.0 2 999990.0 1 350000.0 1 345000.0 1 299000.0 1 295000.0 1 265000.0 1 259000.0 1 250000.0 1 220000.0 1 198000.0 1 197000.0 1 Name: price, dtype: int64
autos_copy["price"].value_counts().sort_index(ascending=False).tail(20)
35.0 1 30.0 7 29.0 1 25.0 5 20.0 4 18.0 1 17.0 3 15.0 2 14.0 1 13.0 2 12.0 3 11.0 2 10.0 7 9.0 1 8.0 1 5.0 2 3.0 1 2.0 3 1.0 156 0.0 1421 Name: price, dtype: int64
#There is too much variation between the pricing on the vechiles
#Pricing above 350,000 appears to be random key entries or errorenous data
#removal of vechiles with zero cost
autos_copy[autos_copy["price"].between(1,350000)]
date_crawled | name | seller | offer_type | price | abtest | vehicle_type | registration_year | gearbox | power_ps | model | odometer_km | registration_month | fuel_type | brand | unrepaired_damage | ad_created | nr_of_pictures | postal_code | last_seen | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2016-03-26 17:47:46 | Peugeot_807_160_NAVTECH_ON_BOARD | privat | Angebot | 5000.0 | control | bus | 2004 | manuell | 158 | andere | 150000.0 | 3 | lpg | peugeot | nein | 2016-03-26 00:00:00 | 0 | 79588 | 2016-04-06 06:45:54 |
1 | 2016-04-04 13:38:56 | BMW_740i_4_4_Liter_HAMANN_UMBAU_Mega_Optik | privat | Angebot | 8500.0 | control | limousine | 1997 | automatik | 286 | 7er | 150000.0 | 6 | benzin | bmw | nein | 2016-04-04 00:00:00 | 0 | 71034 | 2016-04-06 14:45:08 |
2 | 2016-03-26 18:57:24 | Volkswagen_Golf_1.6_United | privat | Angebot | 8990.0 | test | limousine | 2009 | manuell | 102 | golf | 70000.0 | 7 | benzin | volkswagen | nein | 2016-03-26 00:00:00 | 0 | 35394 | 2016-04-06 20:15:37 |
3 | 2016-03-12 16:58:10 | Smart_smart_fortwo_coupe_softouch/F1/Klima/Pan... | privat | Angebot | 4350.0 | control | kleinwagen | 2007 | automatik | 71 | fortwo | 70000.0 | 6 | benzin | smart | nein | 2016-03-12 00:00:00 | 0 | 33729 | 2016-03-15 03:16:28 |
4 | 2016-04-01 14:38:50 | Ford_Focus_1_6_Benzin_TÜV_neu_ist_sehr_gepfleg... | privat | Angebot | 1350.0 | test | kombi | 2003 | manuell | 0 | focus | 150000.0 | 7 | benzin | ford | nein | 2016-04-01 00:00:00 | 0 | 39218 | 2016-04-01 14:38:50 |
5 | 2016-03-21 13:47:45 | Chrysler_Grand_Voyager_2.8_CRD_Aut.Limited_Sto... | privat | Angebot | 7900.0 | test | bus | 2006 | automatik | 150 | voyager | 150000.0 | 4 | diesel | chrysler | NaN | 2016-03-21 00:00:00 | 0 | 22962 | 2016-04-06 09:45:21 |
6 | 2016-03-20 17:55:21 | VW_Golf_III_GT_Special_Electronic_Green_Metall... | privat | Angebot | 300.0 | test | limousine | 1995 | manuell | 90 | golf | 150000.0 | 8 | benzin | volkswagen | NaN | 2016-03-20 00:00:00 | 0 | 31535 | 2016-03-23 02:48:59 |
7 | 2016-03-16 18:55:19 | Golf_IV_1.9_TDI_90PS | privat | Angebot | 1990.0 | control | limousine | 1998 | manuell | 90 | golf | 150000.0 | 12 | diesel | volkswagen | nein | 2016-03-16 00:00:00 | 0 | 53474 | 2016-04-07 03:17:32 |
8 | 2016-03-22 16:51:34 | Seat_Arosa | privat | Angebot | 250.0 | test | NaN | 2000 | manuell | 0 | arosa | 150000.0 | 10 | NaN | seat | nein | 2016-03-22 00:00:00 | 0 | 7426 | 2016-03-26 18:18:10 |
9 | 2016-03-16 13:47:02 | Renault_Megane_Scenic_1.6e_RT_Klimaanlage | privat | Angebot | 590.0 | control | bus | 1997 | manuell | 90 | megane | 150000.0 | 7 | benzin | renault | nein | 2016-03-16 00:00:00 | 0 | 15749 | 2016-04-06 10:46:35 |
10 | 2016-03-15 01:41:36 | VW_Golf_Tuning_in_siber/grau | privat | Angebot | 999.0 | test | NaN | 2017 | manuell | 90 | NaN | 150000.0 | 4 | benzin | volkswagen | nein | 2016-03-14 00:00:00 | 0 | 86157 | 2016-04-07 03:16:21 |
11 | 2016-03-16 18:45:34 | Mercedes_A140_Motorschaden | privat | Angebot | 350.0 | control | NaN | 2000 | NaN | 0 | NaN | 150000.0 | 0 | benzin | mercedes_benz | NaN | 2016-03-16 00:00:00 | 0 | 17498 | 2016-03-16 18:45:34 |
12 | 2016-03-31 19:48:22 | Smart_smart_fortwo_coupe_softouch_pure_MHD_Pan... | privat | Angebot | 5299.0 | control | kleinwagen | 2010 | automatik | 71 | fortwo | 50000.0 | 9 | benzin | smart | nein | 2016-03-31 00:00:00 | 0 | 34590 | 2016-04-06 14:17:52 |
13 | 2016-03-23 10:48:32 | Audi_A3_1.6_tuning | privat | Angebot | 1350.0 | control | limousine | 1999 | manuell | 101 | a3 | 150000.0 | 11 | benzin | audi | nein | 2016-03-23 00:00:00 | 0 | 12043 | 2016-04-01 14:17:13 |
14 | 2016-03-23 11:50:46 | Renault_Clio_3__Dynamique_1.2__16_V;_viele_Ver... | privat | Angebot | 3999.0 | test | kleinwagen | 2007 | manuell | 75 | clio | 150000.0 | 9 | benzin | renault | NaN | 2016-03-23 00:00:00 | 0 | 81737 | 2016-04-01 15:46:47 |
15 | 2016-04-01 12:06:20 | Corvette_C3_Coupe_T_Top_Crossfire_Injection | privat | Angebot | 18900.0 | test | coupe | 1982 | automatik | 203 | NaN | 80000.0 | 6 | benzin | sonstige_autos | nein | 2016-04-01 00:00:00 | 0 | 61276 | 2016-04-02 21:10:48 |
16 | 2016-03-16 14:59:02 | Opel_Vectra_B_Kombi | privat | Angebot | 350.0 | test | kombi | 1999 | manuell | 101 | vectra | 150000.0 | 5 | benzin | opel | nein | 2016-03-16 00:00:00 | 0 | 57299 | 2016-03-18 05:29:37 |
17 | 2016-03-29 11:46:22 | Volkswagen_Scirocco_2_G60 | privat | Angebot | 5500.0 | test | coupe | 1990 | manuell | 205 | scirocco | 150000.0 | 6 | benzin | volkswagen | nein | 2016-03-29 00:00:00 | 0 | 74821 | 2016-04-05 20:46:26 |
18 | 2016-03-26 19:57:44 | Verkaufen_mein_bmw_e36_320_i_touring | privat | Angebot | 300.0 | control | bus | 1995 | manuell | 150 | 3er | 150000.0 | 0 | benzin | bmw | NaN | 2016-03-26 00:00:00 | 0 | 54329 | 2016-04-02 12:16:41 |
19 | 2016-03-17 13:36:21 | mazda_tribute_2.0_mit_gas_und_tuev_neu_2018 | privat | Angebot | 4150.0 | control | suv | 2004 | manuell | 124 | andere | 150000.0 | 2 | lpg | mazda | nein | 2016-03-17 00:00:00 | 0 | 40878 | 2016-03-17 14:45:58 |
20 | 2016-03-05 19:57:31 | Audi_A4_Avant_1.9_TDI_*6_Gang*AHK*Klimatronik*... | privat | Angebot | 3500.0 | test | kombi | 2003 | manuell | 131 | a4 | 150000.0 | 5 | diesel | audi | NaN | 2016-03-05 00:00:00 | 0 | 53913 | 2016-03-07 05:46:46 |
21 | 2016-03-06 19:07:10 | Porsche_911_Carrera_4S_Cabrio | privat | Angebot | 41500.0 | test | cabrio | 2004 | manuell | 320 | 911 | 150000.0 | 4 | benzin | porsche | nein | 2016-03-06 00:00:00 | 0 | 65428 | 2016-04-05 23:46:19 |
22 | 2016-03-28 20:50:54 | MINI_Cooper_S_Cabrio | privat | Angebot | 25450.0 | control | cabrio | 2015 | manuell | 184 | cooper | 10000.0 | 1 | benzin | mini | nein | 2016-03-28 00:00:00 | 0 | 44789 | 2016-04-01 06:45:30 |
23 | 2016-03-10 19:55:34 | Peugeot_Boxer_2_2_HDi_120_Ps_9_Sitzer_inkl_Klima | privat | Angebot | 7999.0 | control | bus | 2010 | manuell | 120 | NaN | 150000.0 | 2 | diesel | peugeot | nein | 2016-03-10 00:00:00 | 0 | 30900 | 2016-03-17 08:45:17 |
24 | 2016-04-03 11:57:02 | BMW_535i_xDrive_Sport_Aut. | privat | Angebot | 48500.0 | control | limousine | 2014 | automatik | 306 | 5er | 30000.0 | 12 | benzin | bmw | nein | 2016-04-03 00:00:00 | 0 | 22547 | 2016-04-07 13:16:50 |
25 | 2016-03-21 21:56:18 | Ford_escort_kombi_an_bastler_mit_ghia_ausstattung | privat | Angebot | 90.0 | control | kombi | 1996 | manuell | 116 | NaN | 150000.0 | 4 | benzin | ford | ja | 2016-03-21 00:00:00 | 0 | 27574 | 2016-04-01 05:16:49 |
26 | 2016-04-03 22:46:28 | Volkswagen_Polo_Fox | privat | Angebot | 777.0 | control | kleinwagen | 1992 | manuell | 54 | polo | 125000.0 | 2 | benzin | volkswagen | nein | 2016-04-03 00:00:00 | 0 | 38110 | 2016-04-05 23:46:48 |
28 | 2016-03-19 21:56:19 | MINI_Cooper_D | privat | Angebot | 5250.0 | control | kleinwagen | 2007 | manuell | 110 | cooper | 150000.0 | 7 | diesel | mini | ja | 2016-03-19 00:00:00 | 0 | 15745 | 2016-04-07 14:58:48 |
29 | 2016-04-02 12:45:44 | Mercedes_Benz_E_320_T_CDI_Avantgarde_DPF7_Sitz... | privat | Angebot | 4999.0 | test | kombi | 2004 | automatik | 204 | e_klasse | 150000.0 | 10 | diesel | mercedes_benz | nein | 2016-04-02 00:00:00 | 0 | 47638 | 2016-04-02 12:45:44 |
30 | 2016-03-14 11:47:31 | Peugeot_206_Unfallfahrzeug | privat | Angebot | 80.0 | test | kleinwagen | 2002 | manuell | 60 | 2_reihe | 150000.0 | 6 | benzin | peugeot | ja | 2016-03-14 00:00:00 | 0 | 57076 | 2016-03-14 11:47:31 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
49968 | 2016-04-01 17:49:15 | Mercedes_Benz_190_D_2.5_Automatik | privat | Angebot | 2100.0 | test | limousine | 1986 | automatik | 90 | andere | 150000.0 | 9 | diesel | mercedes_benz | nein | 2016-04-01 00:00:00 | 0 | 40227 | 2016-04-05 13:16:35 |
49969 | 2016-03-17 18:49:02 | Nissan_X_Trail_2.2_dCi_4x4_Sport_m.AHZ | privat | Angebot | 4500.0 | control | suv | 2005 | manuell | 136 | x_trail | 150000.0 | 5 | diesel | nissan | nein | 2016-03-17 00:00:00 | 0 | 17379 | 2016-03-25 23:18:15 |
49970 | 2016-03-21 22:47:37 | c4_Grand_Picasso_mit_Automatik_Leder_Navi_Temp... | privat | Angebot | 15800.0 | control | bus | 2010 | automatik | 136 | c4 | 60000.0 | 4 | diesel | citroen | nein | 2016-03-21 00:00:00 | 0 | 14947 | 2016-04-07 04:17:34 |
49971 | 2016-03-29 14:54:12 | W.Lupo_1.0 | privat | Angebot | 950.0 | test | kleinwagen | 2001 | manuell | 50 | lupo | 150000.0 | 4 | benzin | volkswagen | nein | 2016-03-29 00:00:00 | 0 | 65197 | 2016-03-29 20:41:51 |
49972 | 2016-03-26 22:25:23 | Mercedes_Benz_Vito_115_CDI_Extralang_Aut. | privat | Angebot | 3300.0 | control | bus | 2004 | automatik | 150 | vito | 150000.0 | 10 | diesel | mercedes_benz | ja | 2016-03-26 00:00:00 | 0 | 65326 | 2016-03-28 11:28:18 |
49973 | 2016-03-27 05:32:39 | Mercedes_Benz_SLK_200_Kompressor | privat | Angebot | 6000.0 | control | cabrio | 2004 | manuell | 163 | slk | 150000.0 | 11 | benzin | mercedes_benz | nein | 2016-03-27 00:00:00 | 0 | 53567 | 2016-03-27 08:25:24 |
49975 | 2016-03-27 20:51:39 | Honda_Jazz_1.3_DSi_i_VTEC_IMA_CVT_Comfort | privat | Angebot | 9700.0 | control | kleinwagen | 2012 | automatik | 88 | jazz | 100000.0 | 11 | hybrid | honda | nein | 2016-03-27 00:00:00 | 0 | 84385 | 2016-04-05 19:45:34 |
49976 | 2016-03-19 18:56:05 | Audi_80_Avant_2.6_E__Vollausstattung!!_Einziga... | privat | Angebot | 5900.0 | test | kombi | 1992 | automatik | 150 | 80 | 150000.0 | 12 | benzin | audi | nein | 2016-03-19 00:00:00 | 0 | 36100 | 2016-04-07 06:16:44 |
49977 | 2016-03-31 18:37:18 | Mercedes_Benz_C200_Cdi_W203 | privat | Angebot | 5500.0 | control | limousine | 2003 | manuell | 116 | c_klasse | 150000.0 | 2 | diesel | mercedes_benz | nein | 2016-03-31 00:00:00 | 0 | 33739 | 2016-04-06 12:16:11 |
49978 | 2016-04-04 10:37:14 | Mercedes_Benz_E_200_Classic | privat | Angebot | 900.0 | control | limousine | 1996 | automatik | 136 | e_klasse | 150000.0 | 9 | benzin | mercedes_benz | ja | 2016-04-04 00:00:00 | 0 | 24405 | 2016-04-06 12:44:20 |
49979 | 2016-03-20 18:38:40 | Volkswagen_Polo_1.6_TDI_Style | privat | Angebot | 11000.0 | test | kleinwagen | 2011 | manuell | 90 | polo | 70000.0 | 11 | diesel | volkswagen | nein | 2016-03-20 00:00:00 | 0 | 48455 | 2016-04-07 01:45:12 |
49980 | 2016-03-12 10:55:54 | Ford_Escort_Turnier_16V | privat | Angebot | 400.0 | control | kombi | 1995 | manuell | 105 | escort | 125000.0 | 3 | benzin | ford | NaN | 2016-03-12 00:00:00 | 0 | 56218 | 2016-04-06 17:16:49 |
49981 | 2016-03-15 09:38:21 | Opel_Astra_Kombi_mit_Anhaengerkupplung | privat | Angebot | 2000.0 | control | kombi | 1998 | manuell | 115 | astra | 150000.0 | 12 | benzin | opel | nein | 2016-03-15 00:00:00 | 0 | 86859 | 2016-04-05 17:21:46 |
49982 | 2016-03-29 18:51:08 | Skoda_Fabia_4_Tuerer_Bj:2004__85.000Tkm | privat | Angebot | 1950.0 | control | kleinwagen | 2004 | manuell | 0 | fabia | 90000.0 | 7 | benzin | skoda | NaN | 2016-03-29 00:00:00 | 0 | 45884 | 2016-03-29 18:51:08 |
49983 | 2016-03-06 12:43:04 | Ford_focus_99 | privat | Angebot | 600.0 | test | kleinwagen | 1999 | manuell | 101 | focus | 150000.0 | 4 | benzin | ford | NaN | 2016-03-06 00:00:00 | 0 | 52477 | 2016-03-09 06:16:08 |
49985 | 2016-04-02 16:38:23 | Verkaufe_meinen_vw_vento! | privat | Angebot | 1000.0 | control | NaN | 1995 | automatik | 0 | NaN | 150000.0 | 0 | benzin | volkswagen | NaN | 2016-04-02 00:00:00 | 0 | 30900 | 2016-04-06 15:17:52 |
49986 | 2016-04-04 20:46:02 | Chrysler_300C_3.0_CRD_DPF_Automatik_Voll_Ausst... | privat | Angebot | 15900.0 | control | limousine | 2010 | automatik | 218 | 300c | 125000.0 | 11 | diesel | chrysler | nein | 2016-04-04 00:00:00 | 0 | 73527 | 2016-04-06 23:16:00 |
49987 | 2016-03-22 20:47:27 | Audi_A3_Limousine_2.0_TDI_DPF_Ambition__NAVI__... | privat | Angebot | 21990.0 | control | limousine | 2013 | manuell | 150 | a3 | 50000.0 | 11 | diesel | audi | nein | 2016-03-22 00:00:00 | 0 | 94362 | 2016-03-26 22:46:06 |
49988 | 2016-03-28 19:49:51 | BMW_330_Ci | privat | Angebot | 9550.0 | control | coupe | 2001 | manuell | 231 | 3er | 150000.0 | 10 | benzin | bmw | nein | 2016-03-28 00:00:00 | 0 | 83646 | 2016-04-07 02:17:40 |
49989 | 2016-03-11 19:50:37 | VW_Polo_zum_Ausschlachten_oder_Wiederaufbau | privat | Angebot | 150.0 | test | kleinwagen | 1997 | manuell | 0 | polo | 150000.0 | 5 | benzin | volkswagen | ja | 2016-03-11 00:00:00 | 0 | 21244 | 2016-03-12 10:17:55 |
49990 | 2016-03-21 19:54:19 | Mercedes_Benz_A_200__BlueEFFICIENCY__Urban | privat | Angebot | 17500.0 | test | limousine | 2012 | manuell | 156 | a_klasse | 30000.0 | 12 | benzin | mercedes_benz | nein | 2016-03-21 00:00:00 | 0 | 58239 | 2016-04-06 22:46:57 |
49991 | 2016-03-06 15:25:19 | Kleinwagen | privat | Angebot | 500.0 | control | NaN | 2016 | manuell | 0 | twingo | 150000.0 | 0 | benzin | renault | NaN | 2016-03-06 00:00:00 | 0 | 61350 | 2016-03-06 18:24:19 |
49992 | 2016-03-10 19:37:38 | Fiat_Grande_Punto_1.4_T_Jet_16V_Sport | privat | Angebot | 4800.0 | control | kleinwagen | 2009 | manuell | 120 | andere | 125000.0 | 9 | lpg | fiat | nein | 2016-03-10 00:00:00 | 0 | 68642 | 2016-03-13 01:44:51 |
49993 | 2016-03-15 18:47:35 | Audi_A3__1_8l__Silber;_schoenes_Fahrzeug | privat | Angebot | 1650.0 | control | kleinwagen | 1997 | manuell | 0 | NaN | 150000.0 | 7 | benzin | audi | NaN | 2016-03-15 00:00:00 | 0 | 65203 | 2016-04-06 19:46:53 |
49994 | 2016-03-22 17:36:42 | Audi_A6__S6__Avant_4.2_quattro_eventuell_Tausc... | privat | Angebot | 5000.0 | control | kombi | 2001 | automatik | 299 | a6 | 150000.0 | 1 | benzin | audi | nein | 2016-03-22 00:00:00 | 0 | 46537 | 2016-04-06 08:16:39 |
49995 | 2016-03-27 14:38:19 | Audi_Q5_3.0_TDI_qu._S_tr.__Navi__Panorama__Xenon | privat | Angebot | 24900.0 | control | limousine | 2011 | automatik | 239 | q5 | 100000.0 | 1 | diesel | audi | nein | 2016-03-27 00:00:00 | 0 | 82131 | 2016-04-01 13:47:40 |
49996 | 2016-03-28 10:50:25 | Opel_Astra_F_Cabrio_Bertone_Edition___TÜV_neu+... | privat | Angebot | 1980.0 | control | cabrio | 1996 | manuell | 75 | astra | 150000.0 | 5 | benzin | opel | nein | 2016-03-28 00:00:00 | 0 | 44807 | 2016-04-02 14:18:02 |
49997 | 2016-04-02 14:44:48 | Fiat_500_C_1.2_Dualogic_Lounge | privat | Angebot | 13200.0 | test | cabrio | 2014 | automatik | 69 | 500 | 5000.0 | 11 | benzin | fiat | nein | 2016-04-02 00:00:00 | 0 | 73430 | 2016-04-04 11:47:27 |
49998 | 2016-03-08 19:25:42 | Audi_A3_2.0_TDI_Sportback_Ambition | privat | Angebot | 22900.0 | control | kombi | 2013 | manuell | 150 | a3 | 40000.0 | 11 | diesel | audi | nein | 2016-03-08 00:00:00 | 0 | 35683 | 2016-04-05 16:45:07 |
49999 | 2016-03-14 00:42:12 | Opel_Vectra_1.6_16V | privat | Angebot | 1250.0 | control | limousine | 1996 | manuell | 101 | vectra | 150000.0 | 1 | benzin | opel | nein | 2016-03-13 00:00:00 | 0 | 45897 | 2016-04-06 21:18:48 |
48565 rows × 20 columns
#Find unique odometer readings for vechiles
autos_copy["odometer_km"].unique().shape
(13,)
autos_copy["odometer_km"].describe()
count 50000.000000 mean 125732.700000 std 40042.211706 min 5000.000000 25% 125000.000000 50% 150000.000000 75% 150000.000000 max 150000.000000 Name: odometer_km, dtype: float64
autos_copy["odometer_km"].value_counts().sort_index(ascending=False).head(20)
150000.0 32424 125000.0 5170 100000.0 2169 90000.0 1757 80000.0 1436 70000.0 1230 60000.0 1164 50000.0 1027 40000.0 819 30000.0 789 20000.0 784 10000.0 264 5000.0 967 Name: odometer_km, dtype: int64
#Review of the dataset does not appear to have any outliers, it is possible for vehicles
#to have 150000, this would indicate vehicle are generally put up for sale when
#they reach this odometer milestone
autos_copy["odometer_km"].value_counts().sort_index(ascending=False).tail(20)
150000.0 32424 125000.0 5170 100000.0 2169 90000.0 1757 80000.0 1436 70000.0 1230 60000.0 1164 50000.0 1027 40000.0 819 30000.0 789 20000.0 784 10000.0 264 5000.0 967 Name: odometer_km, dtype: int64
autos_copy[['date_crawled','ad_created','last_seen']][0:5]
date_crawled | ad_created | last_seen | |
---|---|---|---|
0 | 2016-03-26 17:47:46 | 2016-03-26 00:00:00 | 2016-04-06 06:45:54 |
1 | 2016-04-04 13:38:56 | 2016-04-04 00:00:00 | 2016-04-06 14:45:08 |
2 | 2016-03-26 18:57:24 | 2016-03-26 00:00:00 | 2016-04-06 20:15:37 |
3 | 2016-03-12 16:58:10 | 2016-03-12 00:00:00 | 2016-03-15 03:16:28 |
4 | 2016-04-01 14:38:50 | 2016-04-01 00:00:00 | 2016-04-01 14:38:50 |
#Majority of the data crawled is within the month of March, it would appear that the 5th/6th of the month of March and April
#appear to be a period of low activity, but not enough data to make a conclusive statement.
autos_copy['date_crawled'].str[:10].value_counts(normalize=True, dropna=False).sort_index()
2016-03-05 0.02538 2016-03-06 0.01394 2016-03-07 0.03596 2016-03-08 0.03330 2016-03-09 0.03322 2016-03-10 0.03212 2016-03-11 0.03248 2016-03-12 0.03678 2016-03-13 0.01556 2016-03-14 0.03662 2016-03-15 0.03398 2016-03-16 0.02950 2016-03-17 0.03152 2016-03-18 0.01306 2016-03-19 0.03490 2016-03-20 0.03782 2016-03-21 0.03752 2016-03-22 0.03294 2016-03-23 0.03238 2016-03-24 0.02910 2016-03-25 0.03174 2016-03-26 0.03248 2016-03-27 0.03104 2016-03-28 0.03484 2016-03-29 0.03418 2016-03-30 0.03362 2016-03-31 0.03192 2016-04-01 0.03380 2016-04-02 0.03540 2016-04-03 0.03868 2016-04-04 0.03652 2016-04-05 0.01310 2016-04-06 0.00318 2016-04-07 0.00142 Name: date_crawled, dtype: float64
#Dataset would indicate that most of the ads where created during the month of March
autos_copy['ad_created'].str[:10].value_counts(normalize=True, dropna=False).sort_index()
2015-06-11 0.00002 2015-08-10 0.00002 2015-09-09 0.00002 2015-11-10 0.00002 2015-12-05 0.00002 2015-12-30 0.00002 2016-01-03 0.00002 2016-01-07 0.00002 2016-01-10 0.00004 2016-01-13 0.00002 2016-01-14 0.00002 2016-01-16 0.00002 2016-01-22 0.00002 2016-01-27 0.00006 2016-01-29 0.00002 2016-02-01 0.00002 2016-02-02 0.00004 2016-02-05 0.00004 2016-02-07 0.00002 2016-02-08 0.00002 2016-02-09 0.00004 2016-02-11 0.00002 2016-02-12 0.00006 2016-02-14 0.00004 2016-02-16 0.00002 2016-02-17 0.00002 2016-02-18 0.00004 2016-02-19 0.00006 2016-02-20 0.00004 2016-02-21 0.00006 ... 2016-03-09 0.03324 2016-03-10 0.03186 2016-03-11 0.03278 2016-03-12 0.03662 2016-03-13 0.01692 2016-03-14 0.03522 2016-03-15 0.03374 2016-03-16 0.03000 2016-03-17 0.03120 2016-03-18 0.01372 2016-03-19 0.03384 2016-03-20 0.03786 2016-03-21 0.03772 2016-03-22 0.03280 2016-03-23 0.03218 2016-03-24 0.02908 2016-03-25 0.03188 2016-03-26 0.03256 2016-03-27 0.03090 2016-03-28 0.03496 2016-03-29 0.03414 2016-03-30 0.03344 2016-03-31 0.03192 2016-04-01 0.03380 2016-04-02 0.03508 2016-04-03 0.03892 2016-04-04 0.03688 2016-04-05 0.01184 2016-04-06 0.00326 2016-04-07 0.00128 Name: ad_created, Length: 76, dtype: float64
autos_copy['last_seen'].str[:10].value_counts(normalize=True, dropna=False).sort_index()
2016-03-05 0.00108 2016-03-06 0.00442 2016-03-07 0.00536 2016-03-08 0.00760 2016-03-09 0.00986 2016-03-10 0.01076 2016-03-11 0.01252 2016-03-12 0.02382 2016-03-13 0.00898 2016-03-14 0.01280 2016-03-15 0.01588 2016-03-16 0.01644 2016-03-17 0.02792 2016-03-18 0.00742 2016-03-19 0.01574 2016-03-20 0.02070 2016-03-21 0.02074 2016-03-22 0.02158 2016-03-23 0.01858 2016-03-24 0.01956 2016-03-25 0.01920 2016-03-26 0.01696 2016-03-27 0.01602 2016-03-28 0.02086 2016-03-29 0.02234 2016-03-30 0.02484 2016-03-31 0.02384 2016-04-01 0.02310 2016-04-02 0.02490 2016-04-03 0.02536 2016-04-04 0.02462 2016-04-05 0.12428 2016-04-06 0.22100 2016-04-07 0.13092 Name: last_seen, dtype: float64
autos_copy['registration_year'].describe()
#There are 50000 vehicles listed, there appears to be erroneous data as the min is 1000
# and max is 9999, this data will require clean up.
count 50000.000000 mean 2005.073280 std 105.712813 min 1000.000000 25% 1999.000000 50% 2003.000000 75% 2008.000000 max 9999.000000 Name: registration_year, dtype: float64
autos_copy['registration_year'].value_counts(normalize=True).sort_index()
1000 0.00002 1001 0.00002 1111 0.00002 1500 0.00002 1800 0.00004 1910 0.00018 1927 0.00002 1929 0.00002 1931 0.00002 1934 0.00004 1937 0.00008 1938 0.00002 1939 0.00002 1941 0.00004 1943 0.00002 1948 0.00002 1950 0.00006 1951 0.00004 1952 0.00002 1953 0.00002 1954 0.00004 1955 0.00004 1956 0.00010 1957 0.00004 1958 0.00008 1959 0.00014 1960 0.00068 1961 0.00012 1962 0.00008 1963 0.00018 ... 2001 0.05406 2002 0.05066 2003 0.05454 2004 0.05474 2005 0.06030 2006 0.05416 2007 0.04608 2008 0.04462 2009 0.04196 2010 0.03194 2011 0.03268 2012 0.02646 2013 0.01612 2014 0.01332 2015 0.00798 2016 0.02632 2017 0.02906 2018 0.00984 2019 0.00006 2800 0.00002 4100 0.00002 4500 0.00002 4800 0.00002 5000 0.00008 5911 0.00002 6200 0.00002 8888 0.00002 9000 0.00004 9996 0.00002 9999 0.00008 Name: registration_year, Length: 97, dtype: float64
#Vechile Registration year before 1910 and after 2016 marked as erroneous data
(autos_copy['registration_year'] < 1910).value_counts(normalize=True)
False 0.99988 True 0.00012 Name: registration_year, dtype: float64
autos_copy = autos_copy[(autos_copy['registration_year'] >= 1910) & (autos_copy['registration_year'] <= 2016)]
autos_copy.head(5)
date_crawled | name | seller | offer_type | price | abtest | vehicle_type | registration_year | gearbox | power_ps | model | odometer_km | registration_month | fuel_type | brand | unrepaired_damage | ad_created | nr_of_pictures | postal_code | last_seen | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2016-03-26 17:47:46 | Peugeot_807_160_NAVTECH_ON_BOARD | privat | Angebot | 5000.0 | control | bus | 2004 | manuell | 158 | andere | 150000.0 | 3 | lpg | peugeot | nein | 2016-03-26 00:00:00 | 0 | 79588 | 2016-04-06 06:45:54 |
1 | 2016-04-04 13:38:56 | BMW_740i_4_4_Liter_HAMANN_UMBAU_Mega_Optik | privat | Angebot | 8500.0 | control | limousine | 1997 | automatik | 286 | 7er | 150000.0 | 6 | benzin | bmw | nein | 2016-04-04 00:00:00 | 0 | 71034 | 2016-04-06 14:45:08 |
2 | 2016-03-26 18:57:24 | Volkswagen_Golf_1.6_United | privat | Angebot | 8990.0 | test | limousine | 2009 | manuell | 102 | golf | 70000.0 | 7 | benzin | volkswagen | nein | 2016-03-26 00:00:00 | 0 | 35394 | 2016-04-06 20:15:37 |
3 | 2016-03-12 16:58:10 | Smart_smart_fortwo_coupe_softouch/F1/Klima/Pan... | privat | Angebot | 4350.0 | control | kleinwagen | 2007 | automatik | 71 | fortwo | 70000.0 | 6 | benzin | smart | nein | 2016-03-12 00:00:00 | 0 | 33729 | 2016-03-15 03:16:28 |
4 | 2016-04-01 14:38:50 | Ford_Focus_1_6_Benzin_TÜV_neu_ist_sehr_gepfleg... | privat | Angebot | 1350.0 | test | kombi | 2003 | manuell | 0 | focus | 150000.0 | 7 | benzin | ford | nein | 2016-04-01 00:00:00 | 0 | 39218 | 2016-04-01 14:38:50 |
#Significant majority vehicles were registered in the last 30 years. With most of these vehicles being 5 - 15 years
#old. From 1995 - 2012. Indicating people tend to sell vehicles which are over 5 years old
autos_copy['registration_year'].value_counts().sort_index()
1910 9 1927 1 1929 1 1931 1 1934 2 1937 4 1938 1 1939 1 1941 2 1943 1 1948 1 1950 3 1951 2 1952 1 1953 1 1954 2 1955 2 1956 5 1957 2 1958 4 1959 7 1960 34 1961 6 1962 4 1963 9 1964 12 1965 17 1966 22 1967 27 1968 26 ... 1987 75 1988 142 1989 181 1990 395 1991 356 1992 391 1993 445 1994 660 1995 1313 1996 1444 1997 2028 1998 2453 1999 3000 2000 3354 2001 2703 2002 2533 2003 2727 2004 2737 2005 3015 2006 2708 2007 2304 2008 2231 2009 2098 2010 1597 2011 1634 2012 1323 2013 806 2014 666 2015 399 2016 1316 Name: registration_year, Length: 78, dtype: int64
autos_copy['brand'].value_counts(normalize=True).sort_values(ascending=False)
volkswagen 0.212126 bmw 0.110019 opel 0.108166 mercedes_benz 0.095361 audi 0.086387 ford 0.069793 renault 0.047347 peugeot 0.029524 fiat 0.025860 seat 0.018177 skoda 0.016032 mazda 0.015137 nissan 0.015095 citroen 0.013929 smart 0.013909 toyota 0.012472 sonstige_autos 0.010952 hyundai 0.009848 volvo 0.009245 mini 0.008641 mitsubishi 0.008141 honda 0.007850 kia 0.007100 alfa_romeo 0.006621 porsche 0.006101 suzuki 0.005913 chevrolet 0.005705 chrysler 0.003665 dacia 0.002561 daihatsu 0.002561 jeep 0.002249 subaru 0.002186 land_rover 0.002040 saab 0.001603 jaguar 0.001582 trabant 0.001562 daewoo 0.001499 rover 0.001353 lancia 0.001083 lada 0.000604 Name: brand, dtype: float64
brand_counts = autos_copy['brand'].value_counts(normalize=True)
top_brands = brand_counts[brand_counts > .05].index
top_brands
#Volkswagen is the market leader with over 21% market share
Index(['volkswagen', 'bmw', 'opel', 'mercedes_benz', 'audi', 'ford'], dtype='object')
autos_copy.groupby('brand')['price'].mean()
brand alfa_romeo 3984.855346 audi 9093.650036 bmw 8334.645155 chevrolet 6488.981752 chrysler 3229.443182 citroen 44534.796712 dacia 5915.528455 daewoo 1019.861111 daihatsu 1556.382114 fiat 2711.801127 ford 7263.015811 honda 3988.000000 hyundai 5308.539112 jaguar 11176.197368 jeep 11434.750000 kia 5789.351906 lada 2502.896552 lancia 3246.365385 land_rover 19108.091837 mazda 4010.771664 mercedes_benz 30317.447817 mini 10460.012048 mitsubishi 3333.800512 nissan 4664.891034 opel 5252.616554 peugeot 3039.468265 porsche 44553.467577 renault 2395.416447 rover 1528.338462 saab 3211.649351 seat 4296.492554 skoda 6334.919481 smart 3542.706587 sonstige_autos 39621.779468 subaru 3765.038095 suzuki 3995.757042 toyota 5115.333890 trabant 1552.080000 volkswagen 6516.457597 volvo 4757.108108 Name: price, dtype: float64
autos_copy.groupby(['brand'])['price'].mean().sort_values(ascending=False)
brand porsche 44553.467577 citroen 44534.796712 sonstige_autos 39621.779468 mercedes_benz 30317.447817 land_rover 19108.091837 jeep 11434.750000 jaguar 11176.197368 mini 10460.012048 audi 9093.650036 bmw 8334.645155 ford 7263.015811 volkswagen 6516.457597 chevrolet 6488.981752 skoda 6334.919481 dacia 5915.528455 kia 5789.351906 hyundai 5308.539112 opel 5252.616554 toyota 5115.333890 volvo 4757.108108 nissan 4664.891034 seat 4296.492554 mazda 4010.771664 suzuki 3995.757042 honda 3988.000000 alfa_romeo 3984.855346 subaru 3765.038095 smart 3542.706587 mitsubishi 3333.800512 lancia 3246.365385 chrysler 3229.443182 saab 3211.649351 peugeot 3039.468265 fiat 2711.801127 lada 2502.896552 renault 2395.416447 daihatsu 1556.382114 trabant 1552.080000 rover 1528.338462 daewoo 1019.861111 Name: price, dtype: float64
brand_mean_price ={}
for brand in top_brands:
top_brand = autos_copy[autos_copy['brand'] == brand]
mean_price = top_brand['price'].mean()
brand_mean_price[brand] = int(mean_price)
print(mean_data)
#The data would indicate that of the top brand ie. have greater than 5% market
#Mercedes is the most expensive car on average
#Opel is the least expensive car average
{'mercedes_benz': 30317, 'audi': 9093, 'volkswagen': 6516, 'opel': 5252, 'ford': 7263, 'bmw': 8334}
bmp_series = pandas.Series(brand_mean_price)
print(bmp_series)
audi 9093 bmw 8334 ford 7263 mercedes_benz 30317 opel 5252 volkswagen 6516 dtype: int64
brand_mean_mileage ={}
for brand in top_brands:
top_brand = autos_copy[autos_copy['brand'] == brand]
mean_mileage = top_brand['odometer_km'].mean()
brand_mean_mileage[brand] = int(mean_mileage)
print(brand_mean_mileage)
{'mercedes_benz': 130860, 'audi': 129287, 'volkswagen': 128730, 'opel': 129227, 'ford': 124046, 'bmw': 132434}
mean_price = pandas.Series(brand_mean_price).sort_values(ascending=False)
mean_mileage = pandas.Series(brand_mean_mileage).sort_values(ascending=False)
print(mean_price)
print(type(mean_price))
print(mean_mileage)
print(type(mean_mileage))
mercedes_benz 30317 audi 9093 bmw 8334 ford 7263 volkswagen 6516 opel 5252 dtype: int64 <class 'pandas.core.series.Series'> bmw 132434 mercedes_benz 130860 audi 129287 opel 129227 volkswagen 128730 ford 124046 dtype: int64 <class 'pandas.core.series.Series'>
pandas.DataFrame([mean_price]).transpose()
0 | |
---|---|
mercedes_benz | 30317 |
audi | 9093 |
bmw | 8334 |
ford | 7263 |
volkswagen | 6516 |
opel | 5252 |
pandas.concat([mean_price,mean_mileage],keys=['mean_price','mean_mileage'],axis=1)
mean_price | mean_mileage | |
---|---|---|
audi | 9093 | 129287 |
bmw | 8334 | 132434 |
ford | 7263 | 124046 |
mercedes_benz | 30317 | 130860 |
opel | 5252 | 129227 |
volkswagen | 6516 | 128730 |
#The data does does indicate that the more expensive brand have lower odometer readings (mileage)
#mercedes has the highest price and the second highest mileage
#opel has the lowest mean price with low mileage