import pandas as pd
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
df = pd.read_csv('houses.csv')
len(df)
452
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 452 entries, 0 to 451 Data columns (total 24 columns): address 85 non-null object area 452 non-null int64 balcony 0 non-null float64 cellar 452 non-null bool city 452 non-null object contact_name 391 non-null object district 452 non-null object extra_costs 0 non-null float64 garden 0 non-null float64 immo_id 452 non-null int64 kitchen 0 non-null float64 lat 81 non-null float64 lng 81 non-null float64 media_count 452 non-null int64 private 452 non-null bool rent 452 non-null int64 rooms 452 non-null float64 sqm 452 non-null float64 time_dest 0 non-null float64 time_dest2 0 non-null float64 time_dest3 0 non-null float64 title 452 non-null object url 452 non-null object zip_code 452 non-null int64 dtypes: bool(2), float64(11), int64(5), object(6) memory usage: 78.6+ KB
df.head()
address | area | balcony | cellar | city | contact_name | district | extra_costs | garden | immo_id | ... | private | rent | rooms | sqm | time_dest | time_dest2 | time_dest3 | title | url | zip_code | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | NaN | 100 | NaN | False | Ostprignitz-Ruppin (Kreis) | Dipl.-Ing.Ralf Erich Hans Steinmann | Kyritz | NaN | NaN | 94113154 | ... | False | 39000 | 5.0 | 100.0 | NaN | NaN | NaN | Kyritz-Citylage! Auch in zwei Wohneinheiten te... | https://www.immobilienscout24.de/expose/94113154 | 16866 |
1 | NaN | 1060 | NaN | False | Dahme-Spreewald (Kreis) | Bernhard Büttner | Golßen | NaN | NaN | 89713144 | ... | False | 50000 | 4.0 | 187.0 | NaN | NaN | NaN | ***Handwerkerobjekt in Golßen im idyllischen G... | https://www.immobilienscout24.de/expose/89713144 | 15938 |
2 | Am Weinberg II, Hausnummer 501 | 306 | NaN | False | Dahme-Spreewald (Kreis) | Plettner & Brecht Immobilien GmbH | Schwielochsee | NaN | NaN | 106219505 | ... | False | 14900 | 2.0 | 40.0 | NaN | NaN | NaN | Bezugsfreies Wochenendbungalow nahe dem Kleine... | https://www.immobilienscout24.de/expose/106219505 | 15913 |
3 | NaN | 627 | NaN | False | Potsdam-Mittelmark (Kreis) | Sibylle Heide | Havelsee | NaN | NaN | 106213085 | ... | False | 16000 | 1.0 | 25.0 | NaN | NaN | NaN | Den Sommer genießen | https://www.immobilienscout24.de/expose/106213085 | 14798 |
4 | NaN | 1302 | NaN | False | Dahme-Spreewald (Kreis) | Bernhard Büttner | Münchehofe | NaN | NaN | 102951766 | ... | False | 49000 | 2.0 | 40.0 | NaN | NaN | NaN | ***Erholungsgrundstück mit kleiner Doppelhaush... | https://www.immobilienscout24.de/expose/102951766 | 15748 |
5 rows × 24 columns
df.immo_id = df.immo_id.astype(str)
df.zip_code = df.zip_code.astype(str)
df2=df.drop(columns=["time_dest", "time_dest2", "time_dest3"])
df2.describe()
area | balcony | extra_costs | garden | kitchen | lat | lng | media_count | rent | rooms | sqm | |
---|---|---|---|---|---|---|---|---|---|---|---|
count | 452.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 81.000000 | 81.000000 | 452.000000 | 452.000000 | 452.000000 | 452.000000 |
mean | 1670.946903 | NaN | NaN | NaN | NaN | 52.545090 | 13.556852 | 13.889381 | 59969.533186 | 4.775442 | 127.200509 |
std | 2902.259936 | NaN | NaN | NaN | NaN | 0.640315 | 0.808090 | 13.360138 | 26265.177687 | 4.038605 | 133.685552 |
min | 0.000000 | NaN | NaN | NaN | NaN | 51.388860 | 11.586090 | 0.000000 | 1.000000 | 1.000000 | 0.000000 |
25% | 433.750000 | NaN | NaN | NaN | NaN | 52.015330 | 13.161210 | 6.000000 | 37375.000000 | 3.000000 | 70.000000 |
50% | 816.500000 | NaN | NaN | NaN | NaN | 52.651200 | 13.692740 | 11.000000 | 60000.000000 | 4.000000 | 105.000000 |
75% | 1659.000000 | NaN | NaN | NaN | NaN | 53.150370 | 14.150960 | 17.000000 | 82000.000000 | 5.000000 | 143.250000 |
max | 35393.000000 | NaN | NaN | NaN | NaN | 53.537940 | 14.714360 | 137.000000 | 100000.000000 | 40.000000 | 1550.000000 |
df2.groupby("contact_name").size().sort_values(ascending=False)
contact_name Sebastian Stelzig 15 Linda Buder 11 Anja Emmrich 11 Ina Ferchof 11 Kathleen Golembieski 10 Stefan Ganter 9 Maik Neumann 7 Anne Koplin 7 Silke Boldt 7 Maik Mettke 7 Haldor Kahmann 6 Johanna Pabst 6 Alexander Rainer Kremser 5 Christian Moll 5 Rebecca Steinhardt 5 Jens Madsen 5 Marco Klewe 5 Uwe Eschert 5 Heike Lemcke 5 Dipl.-Ing.Ralf Erich Hans Steinmann 5 Mario Cierpka 4 Klaus Vehlow 4 Frank Sack 4 Robert Juckel 4 Ronald Kulnick 4 Stephan Hikel 4 Sarah-Sophie Derling 4 Dietmar Grünberg 3 Dieter Langer 3 Detlef Horn 3 .. Kay Bobach 1 beimco Bestensee Immobilien Consulting GmbH 1 Karola Fleischer 1 Jörn Hilpert 1 Kurt Lewin 1 Jörg Stephan 1 Jörg Schröter 1 Jörg Rus 1 Jörg Hoffmann 1 Jessica Hofmann 1 Jens Renk 1 Knut Bukowiecki 1 König Immobilien Mühlhausen UG (haftungsbeschränkt) 1 Monika Keilholz 1 Marco Dülgerow 1 Matthias Pilz 1 Martin Becker 1 Marta Rohe 1 Mario Todtmann 1 Marina Haseloff 1 Marcus Mukrasch 1 Marcel Multhauf 1 L. Schwendy 1 Marcel Buhse 1 Marc Märtens 1 Manuela Medding 1 Maik Lietz 1 Madeleine Waldmann 1 Lothar Stenzel 1 A4RES Advisory GmbH Vermarktung 1 Length: 187, dtype: int64
df2.groupby("district").size().sort_values(ascending=False)
district Forst (Lausitz) 15 Schenkendöbern 12 Spremberg 11 Karstädt 10 Doberlug-Kirchhain 9 Belzig 7 Elsterwerda 7 Mühlberg/Elbe 7 Lauchhammer 7 Oderberg 6 Lenzen (Elbe) 6 Angermünde 6 Nordwestuckermark 6 Uebigau-Wahrenbrück 5 Neuhausen/Spree 5 Oranienburg 5 Treuenbrietzen 5 Pritzwalk 5 Sonnewalde 5 Großräschen 5 Guben 5 Sallgast 5 Letschin 5 Uckerland 4 Dahme/Mark 4 Welzow 4 Groß Pankow (Prignitz) 4 Wittstock/Dosse 4 Plattenburg 4 Herzberg/Elster 4 .. Lanz 1 Lebusa 1 Lichterfeld-Schacksdorf 1 Lieberose 1 Liepe 1 Lindenau 1 Lindendorf 1 Linthe 1 Marienwerder 1 Meyenburg 1 Milower Land 1 Märkisch Luch 1 Müllrose 1 Neißemünde 1 Rietz-Neuendorf 1 Neuhardenberg 1 Neulewin 1 Neupetershain 1 Nord 1 Oberuckersee 1 Perleberg 1 Pinnow 1 Plaue 1 Plessa 1 Prötzel 1 Päwesin 1 Randowtal 1 Rauen 1 Reichenow-Möglin 1 Alt Zauche-Wußwerk 1 Length: 203, dtype: int64
df3=df2[(df2.sqm<600) & (df2.area < 15000)]
len(df3)
444
df3.plot(x="sqm", y="rent", c="area", kind="scatter",colormap=cm.Set1, figsize=(15,10))
<matplotlib.axes._subplots.AxesSubplot at 0x124da1128>
df3.plot(x="lng", y="lat", c="rent", kind="scatter", figsize=(15,10), colormap=cm.Blues)
<matplotlib.axes._subplots.AxesSubplot at 0x125027550>
df3.hist(bins=20,figsize=(15,10), column=["area","lat","lng","media_count","rent","rooms","sqm"])
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1252a9ef0>, <matplotlib.axes._subplots.AxesSubplot object at 0x1254c44a8>, <matplotlib.axes._subplots.AxesSubplot object at 0x1254eab38>], [<matplotlib.axes._subplots.AxesSubplot object at 0x12551c208>, <matplotlib.axes._subplots.AxesSubplot object at 0x125542898>, <matplotlib.axes._subplots.AxesSubplot object at 0x1255428d0>], [<matplotlib.axes._subplots.AxesSubplot object at 0x12559e5f8>, <matplotlib.axes._subplots.AxesSubplot object at 0x1255c5c88>, <matplotlib.axes._subplots.AxesSubplot object at 0x1255f6358>]], dtype=object)
corr=df3.corr()
fig, ax = plt.subplots(figsize=(15,10))
sns.heatmap(corr,
xticklabels=corr.columns,
yticklabels=corr.columns, center=0.0,
cmap=sns.diverging_palette(5, 250, as_cmap=True), annot=True, fmt=".2f", ax=ax)
<matplotlib.axes._subplots.AxesSubplot at 0x1256a5320>
sns.pairplot(df3, vars=["sqm","rooms","rent","media_count", "area"], kind="reg")
/Users/andi/.local/share/virtualenvs/ImmoSpider-M0SYrQN4/lib/python3.6/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result. return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval
<seaborn.axisgrid.PairGrid at 0x128c519e8>
import numpy as np
from sklearn import linear_model
from sklearn.metrics import r2_score
# One-hot encoding of zip_codes
hot_zip = pd.get_dummies(df3.zip_code)
#df3.balcony = df3.balcony.astype(int)
#df3.garden = df3.garden.astype(int)
#df3.kitchen = df3.kitchen.astype(int)
#df3.private = df3.private.astype(int)
df3.cellar = df3.cellar.astype(int)
/Users/andi/.local/share/virtualenvs/ImmoSpider-M0SYrQN4/lib/python3.6/site-packages/pandas/core/generic.py:4405: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy self[name] = value
X = pd.concat((df3[["sqm","rooms","area","cellar"]], hot_zip), axis=1)
y = df3[["rent"]]
X.head()
sqm | rooms | area | cellar | 14542 | 14547 | 14550 | 14641 | 14669 | 14712 | ... | 4910 | 4916 | 4924 | 4928 | 4931 | 4932 | 4934 | 4936 | 4938 | 7554 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 100.0 | 5.0 | 100 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
1 | 187.0 | 4.0 | 1060 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
2 | 40.0 | 2.0 | 306 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
3 | 25.0 | 1.0 | 627 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
4 | 40.0 | 2.0 | 1302 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
5 rows × 136 columns
# a simple linear model doesn't perform too well
simple = linear_model.LinearRegression()
simple.fit(X,y)
y_pred = simple.predict(X)
r2_score(y, y_pred)
/Users/andi/.local/share/virtualenvs/ImmoSpider-M0SYrQN4/lib/python3.6/site-packages/sklearn/linear_model/base.py:509: RuntimeWarning: internal gelsd driver lwork query error, required iwork dimension not returned. This is likely the result of LAPACK bug 0038, fixed in LAPACK 3.2.2 (released July 21, 2010). Falling back to 'gelss' driver. linalg.lstsq(X, y)
0.4460684685260624
# Robustly fit linear model with RANSAC (RANdom SAmple Consensus) algorithm
regressor = linear_model.RANSACRegressor(linear_model.LinearRegression())
regressor.fit(X,y)
RANSACRegressor(base_estimator=LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False), is_data_valid=None, is_model_valid=None, loss='absolute_loss', max_skips=inf, max_trials=100, min_samples=None, random_state=None, residual_metric=None, residual_threshold=None, stop_n_inliers=inf, stop_probability=0.99, stop_score=inf)
inlier_mask = regressor.inlier_mask_
outlier_mask = np.logical_not(inlier_mask)
print(u'%.1f%% der Wohnungen als Ausreißer identifiziert' % (sum(outlier_mask)*100.0/(sum(outlier_mask)+sum(inlier_mask))))
32.4% der Wohnungen als Ausreißer identifiziert
y_pred = regressor.predict(X)
df3["rent_predicted"] = y_pred
/Users/andi/.local/share/virtualenvs/ImmoSpider-M0SYrQN4/lib/python3.6/site-packages/ipykernel_launcher.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
r2_ransac = r2_score(y[inlier_mask], y_pred[inlier_mask])
fig, ax = plt.subplots(figsize=(15,10))
sns.regplot(df3['rent'][inlier_mask],
df3['rent_predicted'][inlier_mask])
plt.scatter(df3['rent'][outlier_mask],
df3['rent_predicted'][outlier_mask],
alpha=0.4,
c='r')
plt.title('Prediction of price for houses in Brandenburg\n(zip_code, rooms, sqm)')
plt.text(250, 2000, r'$R^2=%.2f$' % r2_ransac)
plt.tight_layout()
# plt.savefig('LinReg-rent-apartment.png', dpi=150)
/Users/andi/.local/share/virtualenvs/ImmoSpider-M0SYrQN4/lib/python3.6/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result. return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval
df3["rent_error"] = df3["rent"] - df3["rent_predicted"]
/Users/andi/.local/share/virtualenvs/ImmoSpider-M0SYrQN4/lib/python3.6/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy """Entry point for launching an IPython kernel.
top = df3.sort_values("rent_error")
top
address | area | balcony | cellar | city | contact_name | district | extra_costs | garden | immo_id | ... | media_count | private | rent | rooms | sqm | title | url | zip_code | rent_predicted | rent_error | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
432 | NaN | 1487 | NaN | 0 | Elbe-Elster (Kreis) | Jens Madsen | Herzberg/Elster | NaN | NaN | 88000685 | ... | 3 | False | 19000 | 3.0 | 62.0 | Bauernhaus für Bastler | https://www.immobilienscout24.de/expose/88000685 | 4916 | 115935.942336 | -96935.942336 |
45 | NaN | 795 | NaN | 1 | Elbe-Elster (Kreis) | Linda Buder | Herzberg/Elster | NaN | NaN | 104049545 | ... | 23 | False | 75000 | 7.5 | 250.0 | Groß und gemütlich | https://www.immobilienscout24.de/expose/104049545 | 4916 | 164210.246541 | -89210.246541 |
84 | NaN | 363 | NaN | 0 | Elbe-Elster (Kreis) | Linda Buder | Herzberg/Elster | NaN | NaN | 104049933 | ... | 10 | False | 29000 | 3.0 | 80.0 | Jetzt schnell sein! | https://www.immobilienscout24.de/expose/104049933 | 4916 | 117362.983034 | -88362.983034 |
249 | NaN | 6900 | NaN | 0 | Uckermark (Kreis) | Dorit Wegner | Nordwestuckermark | NaN | NaN | 105468259 | ... | 12 | False | 29000 | 10.0 | 300.0 | RESERVIERT!! - EIN GROSSES GRUNDSTÜCK ZUM KLEI... | https://www.immobilienscout24.de/expose/105468259 | 17291 | 106225.125006 | -77225.125006 |
229 | NaN | 213 | NaN | 1 | Oberspreewald-Lausitz (Kreis) | Sarah-Sophie Derling | Altdöbern | NaN | NaN | 105723058 | ... | 3 | False | 19998 | 5.0 | 226.0 | Einfamilienhaus_ ca. 226 m²_sanierungsbedürftig | https://www.immobilienscout24.de/expose/105723058 | 3229 | 92856.543438 | -72858.543438 |
423 | NaN | 550 | NaN | 0 | Havelland (Kreis) | Dirk Braunschweig | Wustermark | NaN | NaN | 92816206 | ... | 11 | False | 220 | 5.0 | 146.0 | Nicht länger warten ... jetzt BAUEN! KFW 55 im... | https://www.immobilienscout24.de/expose/92816206 | 14641 | 72681.742078 | -72461.742078 |
254 | NaN | 240 | NaN | 1 | Barnim (Kreis) | Uwe Eschert | Oderberg | NaN | NaN | 105320555 | ... | 15 | False | 20000 | 6.0 | 200.0 | Wohn- und Geschäftshaus in Oderberg (Leerstand) | https://www.immobilienscout24.de/expose/105320555 | 16248 | 92024.998165 | -72024.998165 |
65 | NaN | 663 | NaN | 0 | Elbe-Elster (Kreis) | Kathleen Golembieski | Elsterwerda | NaN | NaN | 94589870 | ... | 3 | False | 27360 | 15.0 | 391.0 | Zwangsversteigerung am 07.08.2018 | https://www.immobilienscout24.de/expose/94589870 | 4910 | 96606.478731 | -69246.478731 |
195 | NaN | 600 | NaN | 0 | Brandenburg an der Havel | Marina Haseloff | Neustadt | NaN | NaN | 106169318 | ... | 3 | False | 25000 | 2.0 | 48.0 | Ferienbungalow auf sicherem Pachtland unweit B... | https://www.immobilienscout24.de/expose/106169318 | 14776 | 91956.592176 | -66956.592176 |
443 | NaN | 300 | NaN | 1 | Oberspreewald-Lausitz (Kreis) | Ronald Kulnick | Großräschen | NaN | NaN | 71380534 | ... | 9 | False | 29000 | 3.0 | 65.0 | Ein Anfang ist gemacht | https://www.immobilienscout24.de/expose/71380534 | 1983 | 95307.304462 | -66307.304462 |
24 | NaN | 7532 | NaN | 0 | Elbe-Elster (Kreis) | Kathleen Golembieski | Röderland | NaN | NaN | 103489529 | ... | 3 | False | 19500 | 10.0 | 183.0 | Zwangsversteigerungsobjekt in Prösen | https://www.immobilienscout24.de/expose/103489529 | 4932 | 83717.107630 | -64217.107630 |
302 | dorfstrasse 17 | 1306 | NaN | 0 | Ostprignitz-Ruppin (Kreis) | calogero caci | Fehrbellin | NaN | NaN | 104290944 | ... | 0 | True | 20000 | 16.0 | 400.0 | Schönes Haus mit 16 - Zimmern in Ostprignitz-R... | https://www.immobilienscout24.de/expose/104290944 | 16818 | 83022.762419 | -63022.762419 |
274 | NaN | 528 | NaN | 1 | Spree-Neiße (Kreis) | Johanna Pabst | Forst (Lausitz) | NaN | NaN | 104920317 | ... | 5 | False | 29000 | 7.0 | 160.0 | Fleißige Handwerker gesucht! | https://www.immobilienscout24.de/expose/104920317 | 3149 | 86459.013624 | -57459.013624 |
91 | Jagow 9 | 1000 | NaN | 0 | Uckermark (Kreis) | Klaus Vehlow | Uckerland | NaN | NaN | 95465100 | ... | 13 | False | 15000 | 5.0 | 200.0 | Das denkmalgeschützte ehemalige Kutscherhaus a... | https://www.immobilienscout24.de/expose/95465100 | 17337 | 71150.466616 | -56150.466616 |
435 | NaN | 1400 | NaN | 1 | Prignitz (Kreis) | Alexander Rainer Kremser | Karstädt | NaN | NaN | 87093189 | ... | 17 | False | 19000 | 6.0 | 120.0 | Es kommt darauf an, was man daraus macht! | https://www.immobilienscout24.de/expose/87093189 | 19357 | 74496.725115 | -55496.725115 |
341 | NaN | 5517 | NaN | 0 | Prignitz (Kreis) | Jörg Rus | Plattenburg | NaN | NaN | 103176930 | ... | 12 | False | 75000 | 20.0 | 464.0 | ++ sanierungsbedürftiges Mehrfamilienhaus in G... | https://www.immobilienscout24.de/expose/103176930 | 19339 | 127925.109272 | -52925.109272 |
425 | NaN | 693 | NaN | 0 | Spree-Neiße (Kreis) | Mario Cierpka | Forst (Lausitz) | NaN | NaN | 91595478 | ... | 5 | False | 10000 | 5.0 | 116.0 | Einfamilienhaus mit Nebengebäuden in Forst/Lau... | https://www.immobilienscout24.de/expose/91595478 | 3149 | 62458.608291 | -52458.608291 |
377 | NaN | 258 | NaN | 0 | Märkisch-Oderland (Kreis) | Simon Soldanski | Lindendorf | NaN | NaN | 100598647 | ... | 15 | False | 29800 | 6.0 | 149.0 | Teilvermietetes Zweifamilienhaus in Lindendorf... | https://www.immobilienscout24.de/expose/100598647 | 15306 | 78986.197355 | -49186.197355 |
318 | NaN | 306 | NaN | 1 | Spree-Neiße (Kreis) | Sebastian Stelzig | Schenkendöbern | NaN | NaN | 103807603 | ... | 21 | False | 21500 | 2.0 | 30.0 | Bungalow am Deulowitzer See_2 Zimmer_Terrasse_... | https://www.immobilienscout24.de/expose/103807603 | 3172 | 70473.796832 | -48973.796832 |
419 | NaN | 174 | NaN | 0 | Prignitz (Kreis) | Germaine Schoek | Plattenburg | NaN | NaN | 93244599 | ... | 4 | False | 9900 | 3.0 | 49.0 | FAST GESCHENKT: Haus und Grundstück in dörflic... | https://www.immobilienscout24.de/expose/93244599 | 19339 | 58381.864239 | -48481.864239 |
411 | NaN | 1948 | NaN | 1 | Spree-Neiße (Kreis) | David Schwarz | Spremberg | NaN | NaN | 95444333 | ... | 6 | False | 32000 | 5.0 | 178.0 | Neubau oder Sanieren - Naturnahes Wohnen | https://www.immobilienscout24.de/expose/95444333 | 3130 | 79665.665997 | -47665.665997 |
305 | NaN | 414 | NaN | 1 | Spree-Neiße (Kreis) | Stefan Ganter | Schenkendöbern | NaN | NaN | 104143474 | ... | 18 | False | 24000 | 3.0 | 35.0 | Erholung am Pinnower See | https://www.immobilienscout24.de/expose/104143474 | 3172 | 70286.831176 | -46286.831176 |
294 | NaN | 1023 | NaN | 1 | Spree-Neiße (Kreis) | Dipl. Betriebswirt Gerald Schrick-Zerbinati | Forst (Lausitz) | NaN | NaN | 104477011 | ... | 8 | False | 62500 | 12.0 | 300.0 | DREIFAMILIENHAUS MIT DOPPELGARAGE+DOPPELCARPOR... | https://www.immobilienscout24.de/expose/104477011 | 3149 | 108419.965012 | -45919.965012 |
221 | Ihlower Weg 12 | 6700 | NaN | 0 | Märkisch-Oderland (Kreis) | Roland Menzel | Reichenow-Möglin | NaN | NaN | 105849936 | ... | 15 | False | 25000 | 3.0 | 100.0 | ZWANGSVERSTEIGERUNG - Sanierungsobjekt in idyl... | https://www.immobilienscout24.de/expose/105849936 | 15345 | 70672.046113 | -45672.046113 |
163 | Am Dammteich 8 | 649 | NaN | 0 | Prignitz (Kreis) | NaN | Groß Pankow (Prignitz) | NaN | NaN | 106593038 | ... | 7 | False | 5500 | 3.0 | 60.0 | Reihenendhaus - vermietet | https://www.immobilienscout24.de/expose/106593038 | 16928 | 48709.985220 | -43209.985220 |
252 | NaN | 1200 | NaN | 1 | Barnim (Kreis) | Uwe Eschert | Ziethen | NaN | NaN | 105345856 | ... | 28 | False | 48000 | 4.0 | 121.0 | Einfamilienhaus leerstehend | https://www.immobilienscout24.de/expose/105345856 | 16247 | 91151.639725 | -43151.639725 |
194 | NaN | 784 | NaN | 0 | Oberspreewald-Lausitz (Kreis) | Thorsten Nabiar | Schwarzheide | NaN | NaN | 106211287 | ... | 11 | False | 14599 | 4.0 | 134.0 | Super-Niedrigenergiehaus! Bauen Sie mit KfW-Fö... | https://www.immobilienscout24.de/expose/106211287 | 1987 | 57302.458618 | -42703.458618 |
393 | NaN | 105 | NaN | 0 | Elbe-Elster (Kreis) | Ina Ferchof | Doberlug-Kirchhain | NaN | NaN | 98798565 | ... | 5 | False | 17500 | 4.0 | 100.0 | In der Zwangsversteigerung am 07.08.2018 | https://www.immobilienscout24.de/expose/98798565 | 3253 | 59634.890275 | -42134.890275 |
7 | NaN | 1385 | NaN | 1 | Ostprignitz-Ruppin (Kreis) | Dipl.-Ing.Ralf Erich Hans Steinmann | Wittstock/Dosse | NaN | NaN | 106154426 | ... | 17 | False | 35000 | 10.0 | 200.0 | Ihr Restaurant mit Wohn- und Gästehaus! | https://www.immobilienscout24.de/expose/106154426 | 16909 | 76136.147789 | -41136.147789 |
431 | NaN | 137 | NaN | 0 | Spree-Neiße (Kreis) | André Schedler | Teichland | NaN | NaN | 89398047 | ... | 8 | False | 29000 | 2.0 | 120.0 | Flachbau (ehem. Landladen) prov.frei zu verkau... | https://www.immobilienscout24.de/expose/89398047 | 3185 | 70079.195703 | -41079.195703 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
23 | NaN | 287 | NaN | 0 | Elbe-Elster (Kreis) | Linda Buder | Falkenberg/Elster | NaN | NaN | 105627094 | ... | 18 | False | 29000 | 6.0 | 123.0 | Hier steckt mehr drin, als Sie denken! | https://www.immobilienscout24.de/expose/105627094 | 4895 | -3834.803844 | 32834.803844 |
13 | NaN | 300 | NaN | 0 | Havelland (Kreis) | Kurt Lewin | Ketzin | NaN | NaN | 103050860 | ... | 13 | False | 85555 | 2.0 | 50.0 | Die Freizeit in der Natur und am Wasser verbri... | https://www.immobilienscout24.de/expose/103050860 | 14669 | 52193.941087 | 33361.058913 |
110 | Prenzlauer Straße 34/35 | 1100 | NaN | 0 | Uckermark (Kreis) | Jürgen Baron | Boitzenburger Land | NaN | NaN | 103722329 | ... | 11 | False | 75000 | 5.0 | 126.0 | Doppelhaus mit 2 separaten Wohneinheiten in Ha... | https://www.immobilienscout24.de/expose/103722329 | 17268 | 41075.231559 | 33924.768441 |
322 | NaN | 1479 | NaN | 0 | Oberspreewald-Lausitz (Kreis) | Anja Dietz | Vetschau/Spreewald | NaN | NaN | 103722853 | ... | 16 | False | 59000 | 2.0 | 60.0 | Hier kann man was draus machen! | https://www.immobilienscout24.de/expose/103722853 | 3226 | 24391.733551 | 34608.266449 |
198 | NaN | 4200 | NaN | 1 | Uckermark (Kreis) | Agnieszka Horn | Hohenselchow-Groß Pinnow | NaN | NaN | 106153085 | ... | 24 | False | 98000 | 3.0 | 129.0 | HORN IMMOBILIEN ++ ein schönes Einfamilienhaus... | https://www.immobilienscout24.de/expose/106153085 | 16306 | 63366.966590 | 34633.033410 |
297 | NaN | 1576 | NaN | 0 | Uckermark (Kreis) | Eve Guske | Angermünde | NaN | NaN | 104413663 | ... | 5 | False | 96000 | 4.0 | 103.0 | Gartenfreunde willkommen! | https://www.immobilienscout24.de/expose/104413663 | 16278 | 61186.048298 | 34813.951702 |
137 | NaN | 700 | NaN | 0 | Uckermark (Kreis) | Tatiana Polewska | Schwedt/Oder | NaN | NaN | 106456335 | ... | 9 | True | 80000 | 3.0 | 60.0 | niedliches Haus mit drei Zimmern in Uckermark,... | https://www.immobilienscout24.de/expose/106456335 | 16303 | 45096.251775 | 34903.748225 |
406 | NaN | 455 | NaN | 0 | Oberspreewald-Lausitz (Kreis) | Claus-Peter Oehmcke | Lübbenau/Spreewald | NaN | NaN | 96497074 | ... | 19 | False | 85000 | 4.0 | 75.0 | Besonders reizvolle, ruhige Lage exklusives Wo... | https://www.immobilienscout24.de/expose/96497074 | 3222 | 49352.913504 | 35647.086496 |
421 | NaN | 550 | NaN | 0 | Barnim (Kreis) | Georgi Kanta | Oderberg | NaN | NaN | 93078982 | ... | 8 | False | 98600 | 5.0 | 131.0 | Wie lange willst Du noch warten??? | https://www.immobilienscout24.de/expose/93078982 | 16248 | 61913.589384 | 36686.410616 |
98 | Bahnhofstr. 7 | 620 | NaN | 0 | Uckermark (Kreis) | Verena Siewert | Passow | NaN | NaN | 104559094 | ... | 9 | False | 70000 | 4.0 | 111.0 | Passow - Einfamilienhaus zum Sofortbezug | https://www.immobilienscout24.de/expose/104559094 | 16306 | 33147.483120 | 36852.516880 |
136 | Annenstr. 10 | 641 | NaN | 0 | Elbe-Elster (Kreis) | Gerald Bleich | Lichterfeld-Schacksdorf | NaN | NaN | 106028875 | ... | 14 | True | 85000 | 2.0 | 60.0 | EFH mit zwei Zimmern in Finsterwalde, großes G... | https://www.immobilienscout24.de/expose/106028875 | 3231 | 46377.074192 | 38622.925808 |
267 | NaN | 817 | NaN | 0 | Prignitz (Kreis) | Silke Boldt | Pritzwalk | NaN | NaN | 105118792 | ... | 11 | False | 95000 | 4.5 | 103.0 | EFH in bevorzugter Wohnlage | https://www.immobilienscout24.de/expose/105118792 | 16928 | 55535.770982 | 39464.229018 |
35 | NaN | 2284 | NaN | 1 | Prignitz (Kreis) | Winfried Schultz | Putlitz | NaN | NaN | 105517038 | ... | 7 | False | 79000 | 3.0 | 90.0 | XXL-Garten-Grundstück trifft Doppelhaushälfte ... | https://www.immobilienscout24.de/expose/105517038 | 16949 | 38755.253119 | 40244.746881 |
165 | Rohlsdorf 10 | 500 | NaN | 0 | Prignitz (Kreis) | Heide Kretschmer | Groß Pankow (Prignitz) | NaN | NaN | 106585918 | ... | 3 | True | 100000 | 5.0 | 130.0 | Schönes Haus mit fünf Zimmern in Prignitz (Kre... | https://www.immobilienscout24.de/expose/106585918 | 16928 | 59622.259683 | 40377.740317 |
213 | NaN | 4598 | NaN | 0 | Märkisch-Oderland (Kreis) | Knut Bukowiecki | Küstriner Vorland | NaN | NaN | 105924959 | ... | 10 | False | 79890 | 4.0 | 125.0 | Kreative Nestbauer gesucht! - Grundstück mit L... | https://www.immobilienscout24.de/expose/105924959 | 15328 | 37740.851100 | 42149.148900 |
64 | NaN | 500 | NaN | 0 | Teltow-Fläming (Kreis) | Tim Lemke | Dahmetal | NaN | NaN | 104859683 | ... | 15 | False | 90999 | 4.0 | 134.0 | EIN HAUS ZUM VERLIEBEN !!!! MASSA HAUS 0174140... | https://www.immobilienscout24.de/expose/104859683 | 15936 | 48633.476327 | 42365.523673 |
265 | NaN | 254 | NaN | 1 | Potsdam-Mittelmark (Kreis) | Nadja Dillinger | Treuenbrietzen | NaN | NaN | 105130414 | ... | 20 | False | 90000 | 4.0 | 80.0 | Gemütliches Reihenhaus mit Garten! | https://www.immobilienscout24.de/expose/105130414 | 14929 | 47378.930283 | 42621.069717 |
418 | NaN | 493 | NaN | 0 | Potsdam-Mittelmark (Kreis) | Nadja Dillinger | Treuenbrietzen | NaN | NaN | 93416313 | ... | 15 | False | 76000 | 3.0 | 90.0 | Haus mit viel Charme und Potenzial! | https://www.immobilienscout24.de/expose/93416313 | 14929 | 32918.525804 | 43081.474196 |
270 | NaN | 579 | NaN | 1 | Uckermark (Kreis) | NaN | Milmersdorf | NaN | NaN | 105078442 | ... | 14 | False | 99000 | 4.0 | 100.0 | Sommerangebot! | https://www.immobilienscout24.de/expose/105078442 | 17268 | 54583.037304 | 44416.962696 |
117 | NaN | 996 | NaN | 0 | Elbe-Elster (Kreis) | NaN | Uebigau-Wahrenbrück | NaN | NaN | 102115774 | ... | 20 | False | 70000 | 3.0 | 75.0 | Gemütliche Doppelhaushälfte mit Terrasse, Priv... | https://www.immobilienscout24.de/expose/102115774 | 4924 | 23923.585927 | 46076.414073 |
268 | NaN | 770 | NaN | 0 | Oberhavel (Kreis) | Andreas Reichel | Zehdenick | NaN | NaN | 105101176 | ... | 15 | False | 89000 | 3.0 | 46.0 | Marienthal am Wentowsee | https://www.immobilienscout24.de/expose/105101176 | 16792 | 42456.166983 | 46543.833017 |
17 | Eggertsberg 3 | 1600 | NaN | 0 | Potsdam-Mittelmark (Kreis) | René Lindelaub | Wusterwitz | NaN | NaN | 106272555 | ... | 8 | False | 86000 | 2.0 | 55.0 | Wohlfühloase in Wusterwitz – ca. 150 m bis zum... | https://www.immobilienscout24.de/expose/106272555 | 14789 | 37921.721061 | 48078.278939 |
158 | Mentiner Straße xx | 3713 | NaN | 0 | Prignitz (Kreis) | NaN | Putlitz | NaN | NaN | 106599183 | ... | 5 | False | 91000 | 5.0 | 190.0 | WOHNHAUS MIT 190m² WOHNFLÄCHE | https://www.immobilienscout24.de/expose/106599183 | 16949 | 40394.741038 | 50605.258962 |
99 | NaN | 2147 | NaN | 0 | Dahme-Spreewald (Kreis) | Christine Bildstein | Unterspreewald | NaN | NaN | 93988424 | ... | 2 | False | 100000 | 5.0 | 135.0 | Grundstück im Unterspreewald - in Hauptstadtnähe! | https://www.immobilienscout24.de/expose/93988424 | 15910 | 48267.341535 | 51732.658465 |
180 | NaN | 682 | NaN | 0 | Spree-Neiße (Kreis) | Michael Jacobick | Neuhausen/Spree | NaN | NaN | 106435376 | ... | 8 | False | 99900 | 2.0 | 60.0 | Erholung pur in herrlicher Waldrandlage | https://www.immobilienscout24.de/expose/106435376 | 3058 | 46456.237979 | 53443.762021 |
337 | NaN | 682 | NaN | 0 | Spree-Neiße (Kreis) | Michael Jacobick | Neuhausen/Spree | NaN | NaN | 103335308 | ... | 8 | False | 99900 | 2.0 | 60.0 | Erholung pur in herrlicher Waldrandlage | https://www.immobilienscout24.de/expose/103335308 | 3058 | 46456.237979 | 53443.762021 |
248 | NaN | 330 | NaN | 1 | Elbe-Elster (Kreis) | Sven Mager | Falkenberg/Elster | NaN | NaN | 105470158 | ... | 15 | False | 59000 | 2.0 | 34.0 | Ein Traum am Kiebitzsee...\nDiese Immobilie in... | https://www.immobilienscout24.de/expose/105470158 | 4895 | 2355.705404 | 56644.294596 |
436 | NaN | 940 | NaN | 0 | Märkisch-Oderland (Kreis) | Weronika Rostropowicz | Letschin | NaN | NaN | 86178170 | ... | 24 | False | 95000 | 4.0 | 118.0 | Leben im Oderbruch - Nur 95.000.-€ und 10 Jahr... | https://www.immobilienscout24.de/expose/86178170 | 15324 | 36462.392551 | 58537.607449 |
128 | NaN | 600 | NaN | 1 | Elbe-Elster (Kreis) | Linda Buder | Uebigau-Wahrenbrück | NaN | NaN | 104833077 | ... | 8 | False | 85000 | 5.0 | 110.0 | Nette Nachbarn sind schon da ! | https://www.immobilienscout24.de/expose/104833077 | 4938 | 22854.603486 | 62145.396514 |
227 | NaN | 400 | NaN | 0 | Prignitz (Kreis) | Dieter Langer | Lenzen (Elbe) | NaN | NaN | 105750474 | ... | 15 | False | 73000 | 3.0 | 32.0 | Erholung pur - Bungalow in erster Reihe am Rud... | https://www.immobilienscout24.de/expose/105750474 | 19309 | 2812.117173 | 70187.882827 |
444 rows × 23 columns
from tpot import TPOTRegressor
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
train_size=0.75, test_size=0.25)
tpot = TPOTRegressor(generations=5, population_size=20, verbosity=2, scoring="r2")
tpot.fit(X_train, y_train)
print(tpot.score(X_test, y_test))
/Users/andi/.local/share/virtualenvs/ImmoSpider-M0SYrQN4/lib/python3.6/site-packages/sklearn/ensemble/weight_boosting.py:29: DeprecationWarning: numpy.core.umath_tests is an internal NumPy module and should not be imported. It will be removed in a future NumPy release. from numpy.core.umath_tests import inner1d /Users/andi/.local/share/virtualenvs/ImmoSpider-M0SYrQN4/lib/python3.6/site-packages/sklearn/utils/validation.py:578: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel(). y = column_or_1d(y, warn=True)
Warning: xgboost.XGBRegressor is not available and will not be used by TPOT.
Optimization Progress: 33%|███▎ | 40/120 [00:21<00:51, 1.55pipeline/s]
Generation 1 - Current best internal CV score: 0.14364768400252653
Optimization Progress: 50%|█████ | 60/120 [00:35<00:43, 1.37pipeline/s]
Generation 2 - Current best internal CV score: 0.14364768400252653
Optimization Progress: 67%|██████▋ | 80/120 [00:53<00:25, 1.56pipeline/s]
Generation 3 - Current best internal CV score: 0.14383612624991568
Optimization Progress: 83%|████████▎ | 100/120 [01:14<00:15, 1.30pipeline/s]
Generation 4 - Current best internal CV score: 0.14860805291607176
Generation 5 - Current best internal CV score: 0.14860805291607176 Best pipeline: RandomForestRegressor(OneHotEncoder(input_matrix, minimum_fraction=0.2, sparse=False), bootstrap=False, max_features=0.35000000000000003, min_samples_leaf=15, min_samples_split=6, n_estimators=100) 0.12595318630220242
y_pred = tpot.predict(X)
y_pred_test = tpot.predict(X_test)
r2_score(y_test, y_pred_test)
0.12595318630220242