Cleaning and preparing the dataset of used cars from eBay Kleinanzeigen
import pandas as pd
autos = pd.read_csv("autos.csv", encoding="Latin-1")
#autos.to_csv ('C:\python_notes\Dataquest\PANDAS\autos.csv', index = None, header=True)
autos
dateCrawled | name | seller | offerType | price | abtest | vehicleType | yearOfRegistration | gearbox | powerPS | model | odometer | monthOfRegistration | fuelType | brand | notRepairedDamage | dateCreated | nrOfPictures | postalCode | lastSeen | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2016-03-26 17:47:46 | Peugeot_807_160_NAVTECH_ON_BOARD | privat | Angebot | $5,000 | control | bus | 2004 | manuell | 158 | andere | 150,000km | 3 | lpg | peugeot | nein | 2016-03-26 00:00:00 | 0 | 79588 | 2016-04-06 06:45:54 |
1 | 2016-04-04 13:38:56 | BMW_740i_4_4_Liter_HAMANN_UMBAU_Mega_Optik | privat | Angebot | $8,500 | control | limousine | 1997 | automatik | 286 | 7er | 150,000km | 6 | benzin | bmw | nein | 2016-04-04 00:00:00 | 0 | 71034 | 2016-04-06 14:45:08 |
2 | 2016-03-26 18:57:24 | Volkswagen_Golf_1.6_United | privat | Angebot | $8,990 | test | limousine | 2009 | manuell | 102 | golf | 70,000km | 7 | benzin | volkswagen | nein | 2016-03-26 00:00:00 | 0 | 35394 | 2016-04-06 20:15:37 |
3 | 2016-03-12 16:58:10 | Smart_smart_fortwo_coupe_softouch/F1/Klima/Pan... | privat | Angebot | $4,350 | control | kleinwagen | 2007 | automatik | 71 | fortwo | 70,000km | 6 | benzin | smart | nein | 2016-03-12 00:00:00 | 0 | 33729 | 2016-03-15 03:16:28 |
4 | 2016-04-01 14:38:50 | Ford_Focus_1_6_Benzin_TÜV_neu_ist_sehr_gepfleg... | privat | Angebot | $1,350 | test | kombi | 2003 | manuell | 0 | focus | 150,000km | 7 | benzin | ford | nein | 2016-04-01 00:00:00 | 0 | 39218 | 2016-04-01 14:38:50 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
49995 | 2016-03-27 14:38:19 | Audi_Q5_3.0_TDI_qu._S_tr.__Navi__Panorama__Xenon | privat | Angebot | $24,900 | control | limousine | 2011 | automatik | 239 | q5 | 100,000km | 1 | diesel | audi | nein | 2016-03-27 00:00:00 | 0 | 82131 | 2016-04-01 13:47:40 |
49996 | 2016-03-28 10:50:25 | Opel_Astra_F_Cabrio_Bertone_Edition___TÜV_neu+... | privat | Angebot | $1,980 | control | cabrio | 1996 | manuell | 75 | astra | 150,000km | 5 | benzin | opel | nein | 2016-03-28 00:00:00 | 0 | 44807 | 2016-04-02 14:18:02 |
49997 | 2016-04-02 14:44:48 | Fiat_500_C_1.2_Dualogic_Lounge | privat | Angebot | $13,200 | test | cabrio | 2014 | automatik | 69 | 500 | 5,000km | 11 | benzin | fiat | nein | 2016-04-02 00:00:00 | 0 | 73430 | 2016-04-04 11:47:27 |
49998 | 2016-03-08 19:25:42 | Audi_A3_2.0_TDI_Sportback_Ambition | privat | Angebot | $22,900 | control | kombi | 2013 | manuell | 150 | a3 | 40,000km | 11 | diesel | audi | nein | 2016-03-08 00:00:00 | 0 | 35683 | 2016-04-05 16:45:07 |
49999 | 2016-03-14 00:42:12 | Opel_Vectra_1.6_16V | privat | Angebot | $1,250 | control | limousine | 1996 | manuell | 101 | vectra | 150,000km | 1 | benzin | opel | nein | 2016-03-13 00:00:00 | 0 | 45897 | 2016-04-06 21:18:48 |
50000 rows × 20 columns
autos.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 50000 entries, 0 to 49999 Data columns (total 20 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 dateCrawled 50000 non-null object 1 name 50000 non-null object 2 seller 50000 non-null object 3 offerType 50000 non-null object 4 price 50000 non-null object 5 abtest 50000 non-null object 6 vehicleType 44905 non-null object 7 yearOfRegistration 50000 non-null int64 8 gearbox 47320 non-null object 9 powerPS 50000 non-null int64 10 model 47242 non-null object 11 odometer 50000 non-null object 12 monthOfRegistration 50000 non-null int64 13 fuelType 45518 non-null object 14 brand 50000 non-null object 15 notRepairedDamage 40171 non-null object 16 dateCreated 50000 non-null object 17 nrOfPictures 50000 non-null int64 18 postalCode 50000 non-null int64 19 lastSeen 50000 non-null object dtypes: int64(5), object(15) memory usage: 7.6+ MB
autos.head()
dateCrawled | name | seller | offerType | price | abtest | vehicleType | yearOfRegistration | gearbox | powerPS | model | odometer | monthOfRegistration | fuelType | brand | notRepairedDamage | dateCreated | nrOfPictures | postalCode | lastSeen | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2016-03-26 17:47:46 | Peugeot_807_160_NAVTECH_ON_BOARD | privat | Angebot | $5,000 | control | bus | 2004 | manuell | 158 | andere | 150,000km | 3 | lpg | peugeot | nein | 2016-03-26 00:00:00 | 0 | 79588 | 2016-04-06 06:45:54 |
1 | 2016-04-04 13:38:56 | BMW_740i_4_4_Liter_HAMANN_UMBAU_Mega_Optik | privat | Angebot | $8,500 | control | limousine | 1997 | automatik | 286 | 7er | 150,000km | 6 | benzin | bmw | nein | 2016-04-04 00:00:00 | 0 | 71034 | 2016-04-06 14:45:08 |
2 | 2016-03-26 18:57:24 | Volkswagen_Golf_1.6_United | privat | Angebot | $8,990 | test | limousine | 2009 | manuell | 102 | golf | 70,000km | 7 | benzin | volkswagen | nein | 2016-03-26 00:00:00 | 0 | 35394 | 2016-04-06 20:15:37 |
3 | 2016-03-12 16:58:10 | Smart_smart_fortwo_coupe_softouch/F1/Klima/Pan... | privat | Angebot | $4,350 | control | kleinwagen | 2007 | automatik | 71 | fortwo | 70,000km | 6 | benzin | smart | nein | 2016-03-12 00:00:00 | 0 | 33729 | 2016-03-15 03:16:28 |
4 | 2016-04-01 14:38:50 | Ford_Focus_1_6_Benzin_TÜV_neu_ist_sehr_gepfleg... | privat | Angebot | $1,350 | test | kombi | 2003 | manuell | 0 | focus | 150,000km | 7 | benzin | ford | nein | 2016-04-01 00:00:00 | 0 | 39218 | 2016-04-01 14:38:50 |
autos.rename({"yearOfRegistration":"registration_year","monthOfRegistration":"registration_month","notRepairedDamage":"unrepaired_damage","dateCreated":"ad_created"},axis=1, inplace=True)
colnm = autos.columns
print(colnm)
def camel_tosnake(s):
res = [idx for idx in range(len(s)) if s[idx].isupper()]
if len(res) != 0:
i = res[0]
s=s.lower()
s = s[:i] + "_" + s[i:]
return s
def col_corector(cols):
newcol = []
for col in cols:
ncol = camel_tosnake(col)
newcol.append(ncol)
return newcol
new_colnm = col_corector(colnm)
autos.columns = new_colnm
autos.info()
Index(['dateCrawled', 'name', 'seller', 'offerType', 'price', 'abtest', 'vehicleType', 'registration_year', 'gearbox', 'powerPS', 'model', 'odometer', 'registration_month', 'fuelType', 'brand', 'unrepaired_damage', 'ad_created', 'nrOfPictures', 'postalCode', 'lastSeen'], dtype='object') <class 'pandas.core.frame.DataFrame'> RangeIndex: 50000 entries, 0 to 49999 Data columns (total 20 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 date_crawled 50000 non-null object 1 name 50000 non-null object 2 seller 50000 non-null object 3 offer_type 50000 non-null object 4 price 50000 non-null object 5 abtest 50000 non-null object 6 vehicle_type 44905 non-null object 7 registration_year 50000 non-null int64 8 gearbox 47320 non-null object 9 power_ps 50000 non-null int64 10 model 47242 non-null object 11 odometer 50000 non-null object 12 registration_month 50000 non-null int64 13 fuel_type 45518 non-null object 14 brand 50000 non-null object 15 unrepaired_damage 40171 non-null object 16 ad_created 50000 non-null object 17 nr_ofpictures 50000 non-null int64 18 postal_code 50000 non-null int64 19 last_seen 50000 non-null object dtypes: int64(5), object(15) memory usage: 7.6+ MB
autos.head()
date_crawled | name | seller | offer_type | price | abtest | vehicle_type | registration_year | gearbox | power_ps | model | odometer | registration_month | fuel_type | brand | unrepaired_damage | ad_created | nr_ofpictures | postal_code | last_seen | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2016-03-26 17:47:46 | Peugeot_807_160_NAVTECH_ON_BOARD | privat | Angebot | $5,000 | control | bus | 2004 | manuell | 158 | andere | 150,000km | 3 | lpg | peugeot | nein | 2016-03-26 00:00:00 | 0 | 79588 | 2016-04-06 06:45:54 |
1 | 2016-04-04 13:38:56 | BMW_740i_4_4_Liter_HAMANN_UMBAU_Mega_Optik | privat | Angebot | $8,500 | control | limousine | 1997 | automatik | 286 | 7er | 150,000km | 6 | benzin | bmw | nein | 2016-04-04 00:00:00 | 0 | 71034 | 2016-04-06 14:45:08 |
2 | 2016-03-26 18:57:24 | Volkswagen_Golf_1.6_United | privat | Angebot | $8,990 | test | limousine | 2009 | manuell | 102 | golf | 70,000km | 7 | benzin | volkswagen | nein | 2016-03-26 00:00:00 | 0 | 35394 | 2016-04-06 20:15:37 |
3 | 2016-03-12 16:58:10 | Smart_smart_fortwo_coupe_softouch/F1/Klima/Pan... | privat | Angebot | $4,350 | control | kleinwagen | 2007 | automatik | 71 | fortwo | 70,000km | 6 | benzin | smart | nein | 2016-03-12 00:00:00 | 0 | 33729 | 2016-03-15 03:16:28 |
4 | 2016-04-01 14:38:50 | Ford_Focus_1_6_Benzin_TÜV_neu_ist_sehr_gepfleg... | privat | Angebot | $1,350 | test | kombi | 2003 | manuell | 0 | focus | 150,000km | 7 | benzin | ford | nein | 2016-04-01 00:00:00 | 0 | 39218 | 2016-04-01 14:38:50 |
The Dataframe was modified and column names were transformed from camelcase to Python's prefered snakecase.
autos.describe()
registration_year | power_ps | registration_month | nr_ofpictures | postal_code | |
---|---|---|---|---|---|
count | 50000.000000 | 50000.000000 | 50000.000000 | 50000.0 | 50000.000000 |
mean | 2005.073280 | 116.355920 | 5.723360 | 0.0 | 50813.627300 |
std | 105.712813 | 209.216627 | 3.711984 | 0.0 | 25779.747957 |
min | 1000.000000 | 0.000000 | 0.000000 | 0.0 | 1067.000000 |
25% | 1999.000000 | 70.000000 | 3.000000 | 0.0 | 30451.000000 |
50% | 2003.000000 | 105.000000 | 6.000000 | 0.0 | 49577.000000 |
75% | 2008.000000 | 150.000000 | 9.000000 | 0.0 | 71540.000000 |
max | 9999.000000 | 17700.000000 | 12.000000 | 0.0 | 99998.000000 |
autos["price"].unique()
autos["price"] = autos["price"].str.replace("$","")
autos["price"] = autos["price"].str.replace(",","").astype(float)
autos["odometer"] = autos["odometer"].str.replace("km","")
autos["odometer"] = autos["odometer"].str.replace(",","").astype(float)
autos.rename({'odometer':'odometer_km'}, axis=1, inplace = True)
autos.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 50000 entries, 0 to 49999 Data columns (total 20 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 date_crawled 50000 non-null object 1 name 50000 non-null object 2 seller 50000 non-null object 3 offer_type 50000 non-null object 4 price 50000 non-null float64 5 abtest 50000 non-null object 6 vehicle_type 44905 non-null object 7 registration_year 50000 non-null int64 8 gearbox 47320 non-null object 9 power_ps 50000 non-null int64 10 model 47242 non-null object 11 odometer_km 50000 non-null float64 12 registration_month 50000 non-null int64 13 fuel_type 45518 non-null object 14 brand 50000 non-null object 15 unrepaired_damage 40171 non-null object 16 ad_created 50000 non-null object 17 nr_ofpictures 50000 non-null int64 18 postal_code 50000 non-null int64 19 last_seen 50000 non-null object dtypes: float64(2), int64(5), object(13) memory usage: 7.6+ MB
autos["price"].describe()
count 5.000000e+04 mean 9.840044e+03 std 4.811044e+05 min 0.000000e+00 25% 1.100000e+03 50% 2.950000e+03 75% 7.200000e+03 max 1.000000e+08 Name: price, dtype: float64
autos = autos[(autos["price"] >= 500) & (autos["price"] < 99999999)]
autos["price"].describe()
count 4.511000e+04 mean 8.668783e+03 std 1.868146e+05 min 5.000000e+02 25% 1.500000e+03 50% 3.500000e+03 75% 7.900000e+03 max 2.732222e+07 Name: price, dtype: float64
autos["odometer_km"].describe()
count 45110.000000 mean 125288.406118 std 39627.664157 min 5000.000000 25% 100000.000000 50% 150000.000000 75% 150000.000000 max 150000.000000 Name: odometer_km, dtype: float64
price_range = autos["price"].sort_values(ascending=True).unique()
odometer_range = autos["odometer_km"].sort_values(ascending=True).unique()
from datetime import datetime
pd.to_datetime(autos['ad_created'], format='%Y-%m-%d %H:%M:%S')
0 2016-03-26 1 2016-04-04 2 2016-03-26 3 2016-03-12 4 2016-04-01 ... 49995 2016-03-27 49996 2016-03-28 49997 2016-04-02 49998 2016-03-08 49999 2016-03-13 Name: ad_created, Length: 45110, dtype: datetime64[ns]
autos.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 45110 entries, 0 to 49999 Data columns (total 20 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 date_crawled 45110 non-null object 1 name 45110 non-null object 2 seller 45110 non-null object 3 offer_type 45110 non-null object 4 price 45110 non-null float64 5 abtest 45110 non-null object 6 vehicle_type 41432 non-null object 7 registration_year 45110 non-null int64 8 gearbox 43292 non-null object 9 power_ps 45110 non-null int64 10 model 43040 non-null object 11 odometer_km 45110 non-null float64 12 registration_month 45110 non-null int64 13 fuel_type 41906 non-null object 14 brand 45110 non-null object 15 unrepaired_damage 37588 non-null object 16 ad_created 45110 non-null object 17 nr_ofpictures 45110 non-null int64 18 postal_code 45110 non-null int64 19 last_seen 45110 non-null object dtypes: float64(2), int64(5), object(13) memory usage: 7.2+ MB
autos['last_seen'].dtype
dtype('O')
pd.to_datetime(autos['last_seen'], format='%Y-%m-%d %H:%M:%S')
0 2016-04-06 06:45:54 1 2016-04-06 14:45:08 2 2016-04-06 20:15:37 3 2016-03-15 03:16:28 4 2016-04-01 14:38:50 ... 49995 2016-04-01 13:47:40 49996 2016-04-02 14:18:02 49997 2016-04-04 11:47:27 49998 2016-04-05 16:45:07 49999 2016-04-06 21:18:48 Name: last_seen, Length: 45110, dtype: datetime64[ns]
autos['ad_created'].value_counts(normalize=True, dropna=False).sort_index()
2015-06-11 00:00:00 0.000022 2015-08-10 00:00:00 0.000022 2015-09-09 00:00:00 0.000022 2015-11-10 00:00:00 0.000022 2015-12-05 00:00:00 0.000022 ... 2016-04-03 00:00:00 0.039038 2016-04-04 00:00:00 0.037021 2016-04-05 00:00:00 0.011904 2016-04-06 00:00:00 0.003259 2016-04-07 00:00:00 0.001197 Name: ad_created, Length: 76, dtype: float64
autos['date_crawled'].value_counts(normalize=True, dropna=False).sort_index()
2016-03-05 14:06:30 0.000022 2016-03-05 14:06:40 0.000022 2016-03-05 14:07:08 0.000022 2016-03-05 14:07:21 0.000022 2016-03-05 14:07:26 0.000022 ... 2016-04-07 14:30:09 0.000022 2016-04-07 14:30:26 0.000022 2016-04-07 14:36:44 0.000022 2016-04-07 14:36:55 0.000022 2016-04-07 14:36:56 0.000022 Name: date_crawled, Length: 43672, dtype: float64
autos['last_seen'].value_counts(normalize=True, dropna=False).sort_index()
2016-03-05 14:45:46 0.000022 2016-03-05 14:46:02 0.000022 2016-03-05 14:49:34 0.000022 2016-03-05 15:16:11 0.000022 2016-03-05 15:16:47 0.000022 ... 2016-04-07 14:58:44 0.000067 2016-04-07 14:58:45 0.000022 2016-04-07 14:58:46 0.000022 2016-04-07 14:58:48 0.000067 2016-04-07 14:58:50 0.000067 Name: last_seen, Length: 35969, dtype: float64
adds are all crawled and seen in 2016
autos['registration_year'].describe()
count 45110.000000 mean 2005.061605 std 89.639687 min 1000.000000 25% 2000.000000 50% 2004.000000 75% 2008.000000 max 9999.000000 Name: registration_year, dtype: float64
first car enregistered in year 1000, and the latest car registered for year 9999
autos = autos[(autos["registration_year"]>=1940) & (autos["registration_year"]<2017)]
autos['registration_year'].describe()
count 43321.000000 mean 2003.246232 std 6.967787 min 1941.000000 25% 1999.000000 50% 2004.000000 75% 2008.000000 max 2016.000000 Name: registration_year, dtype: float64
The range of registration years is corrected and car registered between 1941 and 2016 are taken into account. The data shows that most cas are registered after 2000.
autos['registration_year'].value_counts(normalize=True)
2005 0.066111 2000 0.062672 2004 0.061933 2006 0.061564 2003 0.061333 ... 1943 0.000023 1950 0.000023 1948 0.000023 1953 0.000023 1952 0.000023 Name: registration_year, Length: 70, dtype: float64
Looking for the top 20 brands
We can do the analysis simply by using groupby operation
autos.groupby('brand')['price'].mean()
brand alfa_romeo 4389.846690 audi 9613.647794 bmw 8827.709011 chevrolet 6810.153257 chrysler 3627.064103 citroen 48505.030945 dacia 5915.528455 daewoo 1219.344828 daihatsu 1965.638298 fiat 3256.152110 ford 8566.790928 honda 4420.485207 hyundai 5686.940909 jaguar 12129.600000 jeep 11650.500000 kia 6196.229560 lada 2780.153846 lancia 3879.627907 land_rover 19108.091837 mazda 4459.269231 mercedes_benz 8767.798374 mini 10691.061576 mitsubishi 3848.997015 nissan 5175.896923 opel 6234.633686 peugeot 3360.920597 porsche 46955.151079 renault 2820.227249 rover 1791.685185 saab 3415.944444 seat 4810.883871 skoda 6558.121131 smart 3611.044275 sonstige_autos 49670.088517 subaru 4554.290698 suzuki 4415.355469 toyota 5266.970740 trabant 2331.625000 volkswagen 7209.491343 volvo 5189.423645 Name: price, dtype: float64
# without using groupby we can use dicionaries to do our analysis
# all the brands sold
brand_unique = autos['brand'].unique()
nbr_cars_brand = dict()
for brand, price, mileage in zip(autos['brand'],autos['price'],autos['odometer_km']):
if brand in nbr_cars_brand:
nbr_cars_brand[brand][0] += 1
nbr_cars_brand[brand][1] += price
nbr_cars_brand[brand][2] += mileage
else:
nbr_cars_brand[brand] = [1]
nbr_cars_brand[brand].append(price)
nbr_cars_brand[brand].append(mileage)
# the dictionary contains total sale and number sold y brand
nbr_cars_brand
{'peugeot': [1272, 4275091.0, 160365000.0], 'bmw': [4983, 43988474.0, 662215000.0], 'volkswagen': [9183, 66204759.0, 1177645000.0], 'smart': [655, 2365234.0, 65235000.0], 'ford': [2822, 24175484.0, 348780000.0], 'chrysler': [156, 565822.0, 20705000.0], 'renault': [1901, 5361252.0, 240315000.0], 'audi': [3921, 37695113.0, 505455000.0], 'sonstige_autos': [418, 20762097.0, 37875000.0], 'mazda': [650, 2898525.0, 80460000.0], 'porsche': [278, 13053532.0, 27470000.0], 'mini': [406, 4340571.0, 36005000.0], 'mercedes_benz': [4429, 38832579.0, 580410000.0], 'seat': [775, 3728435.0, 93045000.0], 'toyota': [581, 3060110.0, 67135000.0], 'opel': [4346, 27095718.0, 556465000.0], 'dacia': [123, 727610.0, 10365000.0], 'nissan': [650, 3364333.0, 75970000.0], 'jeep': [106, 1234953.0, 13475000.0], 'saab': [72, 245948.0, 10370000.0], 'volvo': [406, 2106906.0, 56335000.0], 'jaguar': [70, 849072.0, 8780000.0], 'fiat': [1019, 3318019.0, 116590000.0], 'skoda': [743, 4872684.0, 82370000.0], 'subaru': [86, 391669.0, 10830000.0], 'kia': [318, 1970401.0, 35335000.0], 'citroen': [614, 29782089.0, 72680000.0], 'mitsubishi': [335, 1289414.0, 41915000.0], 'chevrolet': [261, 1777450.0, 26215000.0], 'hyundai': [440, 2502254.0, 46145000.0], 'honda': [338, 1494124.0, 41085000.0], 'daewoo': [58, 70722.0, 6825000.0], 'suzuki': [256, 1130331.0, 27260000.0], 'trabant': [48, 111918.0, 2530000.0], 'land_rover': [98, 1872593.0, 11565000.0], 'alfa_romeo': [287, 1259886.0, 37555000.0], 'lada': [26, 72284.0, 2205000.0], 'rover': [54, 96751.0, 7405000.0], 'daihatsu': [94, 184770.0, 10755000.0], 'lancia': [43, 166824.0, 5095000.0]}
#This dataframe contains total sale and number sold with average price by brand
brands_sales_price = pd.DataFrame.from_dict(nbr_cars_brand, orient = 'index', columns=['total_sold', 'total_value','total_mileage'])
brands_sales_price['average_price'] = brands_sales_price['total_value']/brands_sales_price['total_sold']
brands_sales_price['average_mileage'] = brands_sales_price['total_mileage']/brands_sales_price['total_sold']
brands_sales_price.info()
<class 'pandas.core.frame.DataFrame'> Index: 40 entries, peugeot to lancia Data columns (total 5 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 total_sold 40 non-null int64 1 total_value 40 non-null float64 2 total_mileage 40 non-null float64 3 average_price 40 non-null float64 4 average_mileage 40 non-null float64 dtypes: float64(4), int64(1) memory usage: 1.9+ KB
By sorting the dataframe we have the top10 most expensive cars
brands_sales_sortprice = brands_sales_price.sort_values(by='average_price',ascending=False)
#10 highest brands in terms of price
brands_sales_sortprice
total_sold | total_value | total_mileage | average_price | average_mileage | |
---|---|---|---|---|---|
sonstige_autos | 418 | 20762097.0 | 3.787500e+07 | 49670.088517 | 90610.047847 |
citroen | 614 | 29782089.0 | 7.268000e+07 | 48505.030945 | 118371.335505 |
porsche | 278 | 13053532.0 | 2.747000e+07 | 46955.151079 | 98812.949640 |
land_rover | 98 | 1872593.0 | 1.156500e+07 | 19108.091837 | 118010.204082 |
jaguar | 70 | 849072.0 | 8.780000e+06 | 12129.600000 | 125428.571429 |
jeep | 106 | 1234953.0 | 1.347500e+07 | 11650.500000 | 127122.641509 |
mini | 406 | 4340571.0 | 3.600500e+07 | 10691.061576 | 88682.266010 |
audi | 3921 | 37695113.0 | 5.054550e+08 | 9613.647794 | 128909.716909 |
bmw | 4983 | 43988474.0 | 6.622150e+08 | 8827.709011 | 132894.842464 |
mercedes_benz | 4429 | 38832579.0 | 5.804100e+08 | 8767.798374 | 131047.640551 |
ford | 2822 | 24175484.0 | 3.487800e+08 | 8566.790928 | 123593.196315 |
volkswagen | 9183 | 66204759.0 | 1.177645e+09 | 7209.491343 | 128241.859959 |
chevrolet | 261 | 1777450.0 | 2.621500e+07 | 6810.153257 | 100440.613027 |
skoda | 743 | 4872684.0 | 8.237000e+07 | 6558.121131 | 110861.372813 |
opel | 4346 | 27095718.0 | 5.564650e+08 | 6234.633686 | 128040.727105 |
kia | 318 | 1970401.0 | 3.533500e+07 | 6196.229560 | 111116.352201 |
dacia | 123 | 727610.0 | 1.036500e+07 | 5915.528455 | 84268.292683 |
hyundai | 440 | 2502254.0 | 4.614500e+07 | 5686.940909 | 104875.000000 |
toyota | 581 | 3060110.0 | 6.713500e+07 | 5266.970740 | 115550.774527 |
volvo | 406 | 2106906.0 | 5.633500e+07 | 5189.423645 | 138756.157635 |
nissan | 650 | 3364333.0 | 7.597000e+07 | 5175.896923 | 116876.923077 |
seat | 775 | 3728435.0 | 9.304500e+07 | 4810.883871 | 120058.064516 |
subaru | 86 | 391669.0 | 1.083000e+07 | 4554.290698 | 125930.232558 |
mazda | 650 | 2898525.0 | 8.046000e+07 | 4459.269231 | 123784.615385 |
honda | 338 | 1494124.0 | 4.108500e+07 | 4420.485207 | 121553.254438 |
suzuki | 256 | 1130331.0 | 2.726000e+07 | 4415.355469 | 106484.375000 |
alfa_romeo | 287 | 1259886.0 | 3.755500e+07 | 4389.846690 | 130853.658537 |
lancia | 43 | 166824.0 | 5.095000e+06 | 3879.627907 | 118488.372093 |
mitsubishi | 335 | 1289414.0 | 4.191500e+07 | 3848.997015 | 125119.402985 |
chrysler | 156 | 565822.0 | 2.070500e+07 | 3627.064103 | 132724.358974 |
smart | 655 | 2365234.0 | 6.523500e+07 | 3611.044275 | 99595.419847 |
saab | 72 | 245948.0 | 1.037000e+07 | 3415.944444 | 144027.777778 |
peugeot | 1272 | 4275091.0 | 1.603650e+08 | 3360.920597 | 126073.113208 |
fiat | 1019 | 3318019.0 | 1.165900e+08 | 3256.152110 | 114416.094210 |
renault | 1901 | 5361252.0 | 2.403150e+08 | 2820.227249 | 126415.044713 |
lada | 26 | 72284.0 | 2.205000e+06 | 2780.153846 | 84807.692308 |
trabant | 48 | 111918.0 | 2.530000e+06 | 2331.625000 | 52708.333333 |
daihatsu | 94 | 184770.0 | 1.075500e+07 | 1965.638298 | 114414.893617 |
rover | 54 | 96751.0 | 7.405000e+06 | 1791.685185 | 137129.629630 |
daewoo | 58 | 70722.0 | 6.825000e+06 | 1219.344828 | 117672.413793 |
brands_sales_sortmileage = brands_sales_price.sort_values(by='average_mileage',ascending=False)
#10 highest brands in terms of price
brands_sales_sortmileage
total_sold | total_value | total_mileage | average_price | average_mileage | |
---|---|---|---|---|---|
saab | 72 | 245948.0 | 1.037000e+07 | 3415.944444 | 144027.777778 |
volvo | 406 | 2106906.0 | 5.633500e+07 | 5189.423645 | 138756.157635 |
rover | 54 | 96751.0 | 7.405000e+06 | 1791.685185 | 137129.629630 |
bmw | 4983 | 43988474.0 | 6.622150e+08 | 8827.709011 | 132894.842464 |
chrysler | 156 | 565822.0 | 2.070500e+07 | 3627.064103 | 132724.358974 |
mercedes_benz | 4429 | 38832579.0 | 5.804100e+08 | 8767.798374 | 131047.640551 |
alfa_romeo | 287 | 1259886.0 | 3.755500e+07 | 4389.846690 | 130853.658537 |
audi | 3921 | 37695113.0 | 5.054550e+08 | 9613.647794 | 128909.716909 |
volkswagen | 9183 | 66204759.0 | 1.177645e+09 | 7209.491343 | 128241.859959 |
opel | 4346 | 27095718.0 | 5.564650e+08 | 6234.633686 | 128040.727105 |
jeep | 106 | 1234953.0 | 1.347500e+07 | 11650.500000 | 127122.641509 |
renault | 1901 | 5361252.0 | 2.403150e+08 | 2820.227249 | 126415.044713 |
peugeot | 1272 | 4275091.0 | 1.603650e+08 | 3360.920597 | 126073.113208 |
subaru | 86 | 391669.0 | 1.083000e+07 | 4554.290698 | 125930.232558 |
jaguar | 70 | 849072.0 | 8.780000e+06 | 12129.600000 | 125428.571429 |
mitsubishi | 335 | 1289414.0 | 4.191500e+07 | 3848.997015 | 125119.402985 |
mazda | 650 | 2898525.0 | 8.046000e+07 | 4459.269231 | 123784.615385 |
ford | 2822 | 24175484.0 | 3.487800e+08 | 8566.790928 | 123593.196315 |
honda | 338 | 1494124.0 | 4.108500e+07 | 4420.485207 | 121553.254438 |
seat | 775 | 3728435.0 | 9.304500e+07 | 4810.883871 | 120058.064516 |
lancia | 43 | 166824.0 | 5.095000e+06 | 3879.627907 | 118488.372093 |
citroen | 614 | 29782089.0 | 7.268000e+07 | 48505.030945 | 118371.335505 |
land_rover | 98 | 1872593.0 | 1.156500e+07 | 19108.091837 | 118010.204082 |
daewoo | 58 | 70722.0 | 6.825000e+06 | 1219.344828 | 117672.413793 |
nissan | 650 | 3364333.0 | 7.597000e+07 | 5175.896923 | 116876.923077 |
toyota | 581 | 3060110.0 | 6.713500e+07 | 5266.970740 | 115550.774527 |
fiat | 1019 | 3318019.0 | 1.165900e+08 | 3256.152110 | 114416.094210 |
daihatsu | 94 | 184770.0 | 1.075500e+07 | 1965.638298 | 114414.893617 |
kia | 318 | 1970401.0 | 3.533500e+07 | 6196.229560 | 111116.352201 |
skoda | 743 | 4872684.0 | 8.237000e+07 | 6558.121131 | 110861.372813 |
suzuki | 256 | 1130331.0 | 2.726000e+07 | 4415.355469 | 106484.375000 |
hyundai | 440 | 2502254.0 | 4.614500e+07 | 5686.940909 | 104875.000000 |
chevrolet | 261 | 1777450.0 | 2.621500e+07 | 6810.153257 | 100440.613027 |
smart | 655 | 2365234.0 | 6.523500e+07 | 3611.044275 | 99595.419847 |
porsche | 278 | 13053532.0 | 2.747000e+07 | 46955.151079 | 98812.949640 |
sonstige_autos | 418 | 20762097.0 | 3.787500e+07 | 49670.088517 | 90610.047847 |
mini | 406 | 4340571.0 | 3.600500e+07 | 10691.061576 | 88682.266010 |
lada | 26 | 72284.0 | 2.205000e+06 | 2780.153846 | 84807.692308 |
dacia | 123 | 727610.0 | 1.036500e+07 | 5915.528455 | 84268.292683 |
trabant | 48 | 111918.0 | 2.530000e+06 | 2331.625000 | 52708.333333 |