In [5]:
import pandas as pd
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import seaborn as sns
In [6]:
%matplotlib inline
In [8]:
df = pd.read_csv('houses.csv')
len(df)
Out[8]:
452
In [9]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 452 entries, 0 to 451
Data columns (total 24 columns):
address         85 non-null object
area            452 non-null int64
balcony         0 non-null float64
cellar          452 non-null bool
city            452 non-null object
contact_name    391 non-null object
district        452 non-null object
extra_costs     0 non-null float64
garden          0 non-null float64
immo_id         452 non-null int64
kitchen         0 non-null float64
lat             81 non-null float64
lng             81 non-null float64
media_count     452 non-null int64
private         452 non-null bool
rent            452 non-null int64
rooms           452 non-null float64
sqm             452 non-null float64
time_dest       0 non-null float64
time_dest2      0 non-null float64
time_dest3      0 non-null float64
title           452 non-null object
url             452 non-null object
zip_code        452 non-null int64
dtypes: bool(2), float64(11), int64(5), object(6)
memory usage: 78.6+ KB
In [10]:
df.head()
Out[10]:
address area balcony cellar city contact_name district extra_costs garden immo_id ... private rent rooms sqm time_dest time_dest2 time_dest3 title url zip_code
0 NaN 100 NaN False Ostprignitz-Ruppin (Kreis) Dipl.-Ing.Ralf Erich Hans Steinmann Kyritz NaN NaN 94113154 ... False 39000 5.0 100.0 NaN NaN NaN Kyritz-Citylage! Auch in zwei Wohneinheiten te... https://www.immobilienscout24.de/expose/94113154 16866
1 NaN 1060 NaN False Dahme-Spreewald (Kreis) Bernhard Büttner Golßen NaN NaN 89713144 ... False 50000 4.0 187.0 NaN NaN NaN ***Handwerkerobjekt in Golßen im idyllischen G... https://www.immobilienscout24.de/expose/89713144 15938
2 Am Weinberg II, Hausnummer 501 306 NaN False Dahme-Spreewald (Kreis) Plettner & Brecht Immobilien GmbH Schwielochsee NaN NaN 106219505 ... False 14900 2.0 40.0 NaN NaN NaN Bezugsfreies Wochenendbungalow nahe dem Kleine... https://www.immobilienscout24.de/expose/106219505 15913
3 NaN 627 NaN False Potsdam-Mittelmark (Kreis) Sibylle Heide Havelsee NaN NaN 106213085 ... False 16000 1.0 25.0 NaN NaN NaN Den Sommer genießen https://www.immobilienscout24.de/expose/106213085 14798
4 NaN 1302 NaN False Dahme-Spreewald (Kreis) Bernhard Büttner Münchehofe NaN NaN 102951766 ... False 49000 2.0 40.0 NaN NaN NaN ***Erholungsgrundstück mit kleiner Doppelhaush... https://www.immobilienscout24.de/expose/102951766 15748

5 rows × 24 columns

Data Cleaning

In [11]:
df.immo_id = df.immo_id.astype(str)
df.zip_code = df.zip_code.astype(str)
df2=df.drop(columns=["time_dest", "time_dest2", "time_dest3"])
df2.describe()
Out[11]:
area balcony extra_costs garden kitchen lat lng media_count rent rooms sqm
count 452.000000 0.0 0.0 0.0 0.0 81.000000 81.000000 452.000000 452.000000 452.000000 452.000000
mean 1670.946903 NaN NaN NaN NaN 52.545090 13.556852 13.889381 59969.533186 4.775442 127.200509
std 2902.259936 NaN NaN NaN NaN 0.640315 0.808090 13.360138 26265.177687 4.038605 133.685552
min 0.000000 NaN NaN NaN NaN 51.388860 11.586090 0.000000 1.000000 1.000000 0.000000
25% 433.750000 NaN NaN NaN NaN 52.015330 13.161210 6.000000 37375.000000 3.000000 70.000000
50% 816.500000 NaN NaN NaN NaN 52.651200 13.692740 11.000000 60000.000000 4.000000 105.000000
75% 1659.000000 NaN NaN NaN NaN 53.150370 14.150960 17.000000 82000.000000 5.000000 143.250000
max 35393.000000 NaN NaN NaN NaN 53.537940 14.714360 137.000000 100000.000000 40.000000 1550.000000

Interesting statistics

Providers with most apartment offers

In [12]:
df2.groupby("contact_name").size().sort_values(ascending=False)
Out[12]:
contact_name
Sebastian Stelzig                                      15
Linda Buder                                            11
Anja Emmrich                                           11
Ina Ferchof                                            11
Kathleen Golembieski                                   10
Stefan Ganter                                           9
Maik Neumann                                            7
Anne Koplin                                             7
Silke Boldt                                             7
Maik Mettke                                             7
Haldor Kahmann                                          6
Johanna Pabst                                           6
Alexander Rainer Kremser                                5
Christian Moll                                          5
Rebecca Steinhardt                                      5
Jens Madsen                                             5
Marco Klewe                                             5
Uwe Eschert                                             5
Heike Lemcke                                            5
Dipl.-Ing.Ralf Erich Hans Steinmann                     5
Mario Cierpka                                           4
Klaus Vehlow                                            4
Frank Sack                                              4
Robert Juckel                                           4
Ronald Kulnick                                          4
Stephan Hikel                                           4
Sarah-Sophie Derling                                    4
Dietmar Grünberg                                        3
Dieter Langer                                           3
Detlef Horn                                             3
                                                       ..
Kay Bobach                                              1
beimco Bestensee Immobilien Consulting GmbH             1
Karola Fleischer                                        1
Jörn Hilpert                                            1
Kurt Lewin                                              1
Jörg Stephan                                            1
Jörg Schröter                                           1
Jörg Rus                                                1
Jörg Hoffmann                                           1
Jessica Hofmann                                         1
Jens Renk                                               1
Knut Bukowiecki                                         1
König Immobilien Mühlhausen UG (haftungsbeschränkt)     1
Monika Keilholz                                         1
Marco Dülgerow                                          1
Matthias Pilz                                           1
Martin Becker                                           1
Marta Rohe                                              1
Mario Todtmann                                          1
Marina  Haseloff                                        1
Marcus Mukrasch                                         1
Marcel Multhauf                                         1
L. Schwendy                                             1
Marcel Buhse                                            1
Marc Märtens                                            1
Manuela Medding                                         1
Maik Lietz                                              1
Madeleine Waldmann                                      1
Lothar Stenzel                                          1
A4RES Advisory GmbH Vermarktung                         1
Length: 187, dtype: int64

Districts with most apartment offers

In [13]:
df2.groupby("district").size().sort_values(ascending=False)
Out[13]:
district
Forst (Lausitz)            15
Schenkendöbern             12
Spremberg                  11
Karstädt                   10
Doberlug-Kirchhain          9
Belzig                      7
Elsterwerda                 7
Mühlberg/Elbe               7
Lauchhammer                 7
Oderberg                    6
Lenzen (Elbe)               6
Angermünde                  6
Nordwestuckermark           6
Uebigau-Wahrenbrück         5
Neuhausen/Spree             5
Oranienburg                 5
Treuenbrietzen              5
Pritzwalk                   5
Sonnewalde                  5
Großräschen                 5
Guben                       5
Sallgast                    5
Letschin                    5
Uckerland                   4
Dahme/Mark                  4
Welzow                      4
Groß Pankow (Prignitz)      4
Wittstock/Dosse             4
Plattenburg                 4
Herzberg/Elster             4
                           ..
Lanz                        1
Lebusa                      1
Lichterfeld-Schacksdorf     1
Lieberose                   1
Liepe                       1
Lindenau                    1
Lindendorf                  1
Linthe                      1
Marienwerder                1
Meyenburg                   1
Milower Land                1
Märkisch Luch               1
Müllrose                    1
Neißemünde                  1
Rietz-Neuendorf             1
Neuhardenberg               1
Neulewin                    1
Neupetershain               1
Nord                        1
Oberuckersee                1
Perleberg                   1
Pinnow                      1
Plaue                       1
Plessa                      1
Prötzel                     1
Päwesin                     1
Randowtal                   1
Rauen                       1
Reichenow-Möglin            1
Alt Zauche-Wußwerk          1
Length: 203, dtype: int64
In [24]:
df3=df2[(df2.sqm<600) & (df2.area < 15000)]
len(df3)
Out[24]:
444
In [27]:
df3.plot(x="sqm", y="rent", c="area", kind="scatter",colormap=cm.Set1, figsize=(15,10))
Out[27]:
<matplotlib.axes._subplots.AxesSubplot at 0x124da1128>
In [28]:
df3.plot(x="lng", y="lat", c="rent", kind="scatter", figsize=(15,10), colormap=cm.Blues)
Out[28]:
<matplotlib.axes._subplots.AxesSubplot at 0x125027550>
In [29]:
df3.hist(bins=20,figsize=(15,10), column=["area","lat","lng","media_count","rent","rooms","sqm"])
Out[29]:
array([[<matplotlib.axes._subplots.AxesSubplot object at 0x1252a9ef0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1254c44a8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1254eab38>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x12551c208>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x125542898>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1255428d0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x12559e5f8>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1255c5c88>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x1255f6358>]],
      dtype=object)
In [30]:
corr=df3.corr()
In [31]:
fig, ax = plt.subplots(figsize=(15,10)) 
sns.heatmap(corr, 
        xticklabels=corr.columns,
        yticklabels=corr.columns, center=0.0, 
            cmap=sns.diverging_palette(5, 250, as_cmap=True), annot=True, fmt=".2f", ax=ax)
Out[31]:
<matplotlib.axes._subplots.AxesSubplot at 0x1256a5320>
In [38]:
sns.pairplot(df3, vars=["sqm","rooms","rent","media_count", "area"], kind="reg")
/Users/andi/.local/share/virtualenvs/ImmoSpider-M0SYrQN4/lib/python3.6/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval
Out[38]:
<seaborn.axisgrid.PairGrid at 0x128c519e8>
In [39]:
import numpy as np
from sklearn import linear_model
from sklearn.metrics import r2_score
In [41]:
# One-hot encoding of zip_codes
hot_zip = pd.get_dummies(df3.zip_code)
#df3.balcony = df3.balcony.astype(int)
#df3.garden = df3.garden.astype(int)
#df3.kitchen = df3.kitchen.astype(int)
#df3.private = df3.private.astype(int)
df3.cellar = df3.cellar.astype(int)
/Users/andi/.local/share/virtualenvs/ImmoSpider-M0SYrQN4/lib/python3.6/site-packages/pandas/core/generic.py:4405: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self[name] = value
In [42]:
X = pd.concat((df3[["sqm","rooms","area","cellar"]], hot_zip), axis=1)
y = df3[["rent"]]
In [43]:
X.head()
Out[43]:
sqm rooms area cellar 14542 14547 14550 14641 14669 14712 ... 4910 4916 4924 4928 4931 4932 4934 4936 4938 7554
0 100.0 5.0 100 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 187.0 4.0 1060 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 40.0 2.0 306 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
3 25.0 1.0 627 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
4 40.0 2.0 1302 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

5 rows × 136 columns

In [44]:
# a simple linear model doesn't perform too well
simple = linear_model.LinearRegression()
simple.fit(X,y)
y_pred = simple.predict(X)
r2_score(y, y_pred)
/Users/andi/.local/share/virtualenvs/ImmoSpider-M0SYrQN4/lib/python3.6/site-packages/sklearn/linear_model/base.py:509: RuntimeWarning: internal gelsd driver lwork query error, required iwork dimension not returned. This is likely the result of LAPACK bug 0038, fixed in LAPACK 3.2.2 (released July 21, 2010). Falling back to 'gelss' driver.
  linalg.lstsq(X, y)
Out[44]:
0.4460684685260624
In [45]:
# Robustly fit linear model with RANSAC (RANdom SAmple Consensus) algorithm 
regressor = linear_model.RANSACRegressor(linear_model.LinearRegression())
In [46]:
regressor.fit(X,y)
Out[46]:
RANSACRegressor(base_estimator=LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False),
        is_data_valid=None, is_model_valid=None, loss='absolute_loss',
        max_skips=inf, max_trials=100, min_samples=None, random_state=None,
        residual_metric=None, residual_threshold=None, stop_n_inliers=inf,
        stop_probability=0.99, stop_score=inf)
In [47]:
inlier_mask = regressor.inlier_mask_
outlier_mask = np.logical_not(inlier_mask)
print(u'%.1f%% der Wohnungen als Ausreißer identifiziert' % (sum(outlier_mask)*100.0/(sum(outlier_mask)+sum(inlier_mask))))
32.4% der Wohnungen als Ausreißer identifiziert
In [48]:
y_pred = regressor.predict(X)
df3["rent_predicted"] = y_pred
/Users/andi/.local/share/virtualenvs/ImmoSpider-M0SYrQN4/lib/python3.6/site-packages/ipykernel_launcher.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
In [49]:
r2_ransac = r2_score(y[inlier_mask], y_pred[inlier_mask])
In [51]:
fig, ax = plt.subplots(figsize=(15,10)) 
sns.regplot(df3['rent'][inlier_mask],
            df3['rent_predicted'][inlier_mask])
plt.scatter(df3['rent'][outlier_mask],
            df3['rent_predicted'][outlier_mask],
            alpha=0.4,
            c='r')

plt.title('Prediction of price for houses in Brandenburg\n(zip_code, rooms, sqm)')
plt.text(250, 2000, r'$R^2=%.2f$' % r2_ransac)
plt.tight_layout()
# plt.savefig('LinReg-rent-apartment.png', dpi=150)
/Users/andi/.local/share/virtualenvs/ImmoSpider-M0SYrQN4/lib/python3.6/site-packages/scipy/stats/stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval
In [52]:
df3["rent_error"] = df3["rent"] - df3["rent_predicted"]
/Users/andi/.local/share/virtualenvs/ImmoSpider-M0SYrQN4/lib/python3.6/site-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
In [53]:
top = df3.sort_values("rent_error")
In [54]:
top
Out[54]:
address area balcony cellar city contact_name district extra_costs garden immo_id ... media_count private rent rooms sqm title url zip_code rent_predicted rent_error
432 NaN 1487 NaN 0 Elbe-Elster (Kreis) Jens Madsen Herzberg/Elster NaN NaN 88000685 ... 3 False 19000 3.0 62.0 Bauernhaus für Bastler https://www.immobilienscout24.de/expose/88000685 4916 115935.942336 -96935.942336
45 NaN 795 NaN 1 Elbe-Elster (Kreis) Linda Buder Herzberg/Elster NaN NaN 104049545 ... 23 False 75000 7.5 250.0 Groß und gemütlich https://www.immobilienscout24.de/expose/104049545 4916 164210.246541 -89210.246541
84 NaN 363 NaN 0 Elbe-Elster (Kreis) Linda Buder Herzberg/Elster NaN NaN 104049933 ... 10 False 29000 3.0 80.0 Jetzt schnell sein! https://www.immobilienscout24.de/expose/104049933 4916 117362.983034 -88362.983034
249 NaN 6900 NaN 0 Uckermark (Kreis) Dorit Wegner Nordwestuckermark NaN NaN 105468259 ... 12 False 29000 10.0 300.0 RESERVIERT!! - EIN GROSSES GRUNDSTÜCK ZUM KLEI... https://www.immobilienscout24.de/expose/105468259 17291 106225.125006 -77225.125006
229 NaN 213 NaN 1 Oberspreewald-Lausitz (Kreis) Sarah-Sophie Derling Altdöbern NaN NaN 105723058 ... 3 False 19998 5.0 226.0 Einfamilienhaus_ ca. 226 m²_sanierungsbedürftig https://www.immobilienscout24.de/expose/105723058 3229 92856.543438 -72858.543438
423 NaN 550 NaN 0 Havelland (Kreis) Dirk Braunschweig Wustermark NaN NaN 92816206 ... 11 False 220 5.0 146.0 Nicht länger warten ... jetzt BAUEN! KFW 55 im... https://www.immobilienscout24.de/expose/92816206 14641 72681.742078 -72461.742078
254 NaN 240 NaN 1 Barnim (Kreis) Uwe Eschert Oderberg NaN NaN 105320555 ... 15 False 20000 6.0 200.0 Wohn- und Geschäftshaus in Oderberg (Leerstand) https://www.immobilienscout24.de/expose/105320555 16248 92024.998165 -72024.998165
65 NaN 663 NaN 0 Elbe-Elster (Kreis) Kathleen Golembieski Elsterwerda NaN NaN 94589870 ... 3 False 27360 15.0 391.0 Zwangsversteigerung am 07.08.2018 https://www.immobilienscout24.de/expose/94589870 4910 96606.478731 -69246.478731
195 NaN 600 NaN 0 Brandenburg an der Havel Marina Haseloff Neustadt NaN NaN 106169318 ... 3 False 25000 2.0 48.0 Ferienbungalow auf sicherem Pachtland unweit B... https://www.immobilienscout24.de/expose/106169318 14776 91956.592176 -66956.592176
443 NaN 300 NaN 1 Oberspreewald-Lausitz (Kreis) Ronald Kulnick Großräschen NaN NaN 71380534 ... 9 False 29000 3.0 65.0 Ein Anfang ist gemacht https://www.immobilienscout24.de/expose/71380534 1983 95307.304462 -66307.304462
24 NaN 7532 NaN 0 Elbe-Elster (Kreis) Kathleen Golembieski Röderland NaN NaN 103489529 ... 3 False 19500 10.0 183.0 Zwangsversteigerungsobjekt in Prösen https://www.immobilienscout24.de/expose/103489529 4932 83717.107630 -64217.107630
302 dorfstrasse 17 1306 NaN 0 Ostprignitz-Ruppin (Kreis) calogero caci Fehrbellin NaN NaN 104290944 ... 0 True 20000 16.0 400.0 Schönes Haus mit 16 - Zimmern in Ostprignitz-R... https://www.immobilienscout24.de/expose/104290944 16818 83022.762419 -63022.762419
274 NaN 528 NaN 1 Spree-Neiße (Kreis) Johanna Pabst Forst (Lausitz) NaN NaN 104920317 ... 5 False 29000 7.0 160.0 Fleißige Handwerker gesucht! https://www.immobilienscout24.de/expose/104920317 3149 86459.013624 -57459.013624
91 Jagow 9 1000 NaN 0 Uckermark (Kreis) Klaus Vehlow Uckerland NaN NaN 95465100 ... 13 False 15000 5.0 200.0 Das denkmalgeschützte ehemalige Kutscherhaus a... https://www.immobilienscout24.de/expose/95465100 17337 71150.466616 -56150.466616
435 NaN 1400 NaN 1 Prignitz (Kreis) Alexander Rainer Kremser Karstädt NaN NaN 87093189 ... 17 False 19000 6.0 120.0 Es kommt darauf an, was man daraus macht! https://www.immobilienscout24.de/expose/87093189 19357 74496.725115 -55496.725115
341 NaN 5517 NaN 0 Prignitz (Kreis) Jörg Rus Plattenburg NaN NaN 103176930 ... 12 False 75000 20.0 464.0 ++ sanierungsbedürftiges Mehrfamilienhaus in G... https://www.immobilienscout24.de/expose/103176930 19339 127925.109272 -52925.109272
425 NaN 693 NaN 0 Spree-Neiße (Kreis) Mario Cierpka Forst (Lausitz) NaN NaN 91595478 ... 5 False 10000 5.0 116.0 Einfamilienhaus mit Nebengebäuden in Forst/Lau... https://www.immobilienscout24.de/expose/91595478 3149 62458.608291 -52458.608291
377 NaN 258 NaN 0 Märkisch-Oderland (Kreis) Simon Soldanski Lindendorf NaN NaN 100598647 ... 15 False 29800 6.0 149.0 Teilvermietetes Zweifamilienhaus in Lindendorf... https://www.immobilienscout24.de/expose/100598647 15306 78986.197355 -49186.197355
318 NaN 306 NaN 1 Spree-Neiße (Kreis) Sebastian Stelzig Schenkendöbern NaN NaN 103807603 ... 21 False 21500 2.0 30.0 Bungalow am Deulowitzer See_2 Zimmer_Terrasse_... https://www.immobilienscout24.de/expose/103807603 3172 70473.796832 -48973.796832
419 NaN 174 NaN 0 Prignitz (Kreis) Germaine Schoek Plattenburg NaN NaN 93244599 ... 4 False 9900 3.0 49.0 FAST GESCHENKT: Haus und Grundstück in dörflic... https://www.immobilienscout24.de/expose/93244599 19339 58381.864239 -48481.864239
411 NaN 1948 NaN 1 Spree-Neiße (Kreis) David Schwarz Spremberg NaN NaN 95444333 ... 6 False 32000 5.0 178.0 Neubau oder Sanieren - Naturnahes Wohnen https://www.immobilienscout24.de/expose/95444333 3130 79665.665997 -47665.665997
305 NaN 414 NaN 1 Spree-Neiße (Kreis) Stefan Ganter Schenkendöbern NaN NaN 104143474 ... 18 False 24000 3.0 35.0 Erholung am Pinnower See https://www.immobilienscout24.de/expose/104143474 3172 70286.831176 -46286.831176
294 NaN 1023 NaN 1 Spree-Neiße (Kreis) Dipl. Betriebswirt Gerald Schrick-Zerbinati Forst (Lausitz) NaN NaN 104477011 ... 8 False 62500 12.0 300.0 DREIFAMILIENHAUS MIT DOPPELGARAGE+DOPPELCARPOR... https://www.immobilienscout24.de/expose/104477011 3149 108419.965012 -45919.965012
221 Ihlower Weg 12 6700 NaN 0 Märkisch-Oderland (Kreis) Roland Menzel Reichenow-Möglin NaN NaN 105849936 ... 15 False 25000 3.0 100.0 ZWANGSVERSTEIGERUNG - Sanierungsobjekt in idyl... https://www.immobilienscout24.de/expose/105849936 15345 70672.046113 -45672.046113
163 Am Dammteich 8 649 NaN 0 Prignitz (Kreis) NaN Groß Pankow (Prignitz) NaN NaN 106593038 ... 7 False 5500 3.0 60.0 Reihenendhaus - vermietet https://www.immobilienscout24.de/expose/106593038 16928 48709.985220 -43209.985220
252 NaN 1200 NaN 1 Barnim (Kreis) Uwe Eschert Ziethen NaN NaN 105345856 ... 28 False 48000 4.0 121.0 Einfamilienhaus leerstehend https://www.immobilienscout24.de/expose/105345856 16247 91151.639725 -43151.639725
194 NaN 784 NaN 0 Oberspreewald-Lausitz (Kreis) Thorsten Nabiar Schwarzheide NaN NaN 106211287 ... 11 False 14599 4.0 134.0 Super-Niedrigenergiehaus! Bauen Sie mit KfW-Fö... https://www.immobilienscout24.de/expose/106211287 1987 57302.458618 -42703.458618
393 NaN 105 NaN 0 Elbe-Elster (Kreis) Ina Ferchof Doberlug-Kirchhain NaN NaN 98798565 ... 5 False 17500 4.0 100.0 In der Zwangsversteigerung am 07.08.2018 https://www.immobilienscout24.de/expose/98798565 3253 59634.890275 -42134.890275
7 NaN 1385 NaN 1 Ostprignitz-Ruppin (Kreis) Dipl.-Ing.Ralf Erich Hans Steinmann Wittstock/Dosse NaN NaN 106154426 ... 17 False 35000 10.0 200.0 Ihr Restaurant mit Wohn- und Gästehaus! https://www.immobilienscout24.de/expose/106154426 16909 76136.147789 -41136.147789
431 NaN 137 NaN 0 Spree-Neiße (Kreis) André Schedler Teichland NaN NaN 89398047 ... 8 False 29000 2.0 120.0 Flachbau (ehem. Landladen) prov.frei zu verkau... https://www.immobilienscout24.de/expose/89398047 3185 70079.195703 -41079.195703
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
23 NaN 287 NaN 0 Elbe-Elster (Kreis) Linda Buder Falkenberg/Elster NaN NaN 105627094 ... 18 False 29000 6.0 123.0 Hier steckt mehr drin, als Sie denken! https://www.immobilienscout24.de/expose/105627094 4895 -3834.803844 32834.803844
13 NaN 300 NaN 0 Havelland (Kreis) Kurt Lewin Ketzin NaN NaN 103050860 ... 13 False 85555 2.0 50.0 Die Freizeit in der Natur und am Wasser verbri... https://www.immobilienscout24.de/expose/103050860 14669 52193.941087 33361.058913
110 Prenzlauer Straße 34/35 1100 NaN 0 Uckermark (Kreis) Jürgen Baron Boitzenburger Land NaN NaN 103722329 ... 11 False 75000 5.0 126.0 Doppelhaus mit 2 separaten Wohneinheiten in Ha... https://www.immobilienscout24.de/expose/103722329 17268 41075.231559 33924.768441
322 NaN 1479 NaN 0 Oberspreewald-Lausitz (Kreis) Anja Dietz Vetschau/Spreewald NaN NaN 103722853 ... 16 False 59000 2.0 60.0 Hier kann man was draus machen! https://www.immobilienscout24.de/expose/103722853 3226 24391.733551 34608.266449
198 NaN 4200 NaN 1 Uckermark (Kreis) Agnieszka Horn Hohenselchow-Groß Pinnow NaN NaN 106153085 ... 24 False 98000 3.0 129.0 HORN IMMOBILIEN ++ ein schönes Einfamilienhaus... https://www.immobilienscout24.de/expose/106153085 16306 63366.966590 34633.033410
297 NaN 1576 NaN 0 Uckermark (Kreis) Eve Guske Angermünde NaN NaN 104413663 ... 5 False 96000 4.0 103.0 Gartenfreunde willkommen! https://www.immobilienscout24.de/expose/104413663 16278 61186.048298 34813.951702
137 NaN 700 NaN 0 Uckermark (Kreis) Tatiana Polewska Schwedt/Oder NaN NaN 106456335 ... 9 True 80000 3.0 60.0 niedliches Haus mit drei Zimmern in Uckermark,... https://www.immobilienscout24.de/expose/106456335 16303 45096.251775 34903.748225
406 NaN 455 NaN 0 Oberspreewald-Lausitz (Kreis) Claus-Peter Oehmcke Lübbenau/Spreewald NaN NaN 96497074 ... 19 False 85000 4.0 75.0 Besonders reizvolle, ruhige Lage exklusives Wo... https://www.immobilienscout24.de/expose/96497074 3222 49352.913504 35647.086496
421 NaN 550 NaN 0 Barnim (Kreis) Georgi Kanta Oderberg NaN NaN 93078982 ... 8 False 98600 5.0 131.0 Wie lange willst Du noch warten??? https://www.immobilienscout24.de/expose/93078982 16248 61913.589384 36686.410616
98 Bahnhofstr. 7 620 NaN 0 Uckermark (Kreis) Verena Siewert Passow NaN NaN 104559094 ... 9 False 70000 4.0 111.0 Passow - Einfamilienhaus zum Sofortbezug https://www.immobilienscout24.de/expose/104559094 16306 33147.483120 36852.516880
136 Annenstr. 10 641 NaN 0 Elbe-Elster (Kreis) Gerald Bleich Lichterfeld-Schacksdorf NaN NaN 106028875 ... 14 True 85000 2.0 60.0 EFH mit zwei Zimmern in Finsterwalde, großes G... https://www.immobilienscout24.de/expose/106028875 3231 46377.074192 38622.925808
267 NaN 817 NaN 0 Prignitz (Kreis) Silke Boldt Pritzwalk NaN NaN 105118792 ... 11 False 95000 4.5 103.0 EFH in bevorzugter Wohnlage https://www.immobilienscout24.de/expose/105118792 16928 55535.770982 39464.229018
35 NaN 2284 NaN 1 Prignitz (Kreis) Winfried Schultz Putlitz NaN NaN 105517038 ... 7 False 79000 3.0 90.0 XXL-Garten-Grundstück trifft Doppelhaushälfte ... https://www.immobilienscout24.de/expose/105517038 16949 38755.253119 40244.746881
165 Rohlsdorf 10 500 NaN 0 Prignitz (Kreis) Heide Kretschmer Groß Pankow (Prignitz) NaN NaN 106585918 ... 3 True 100000 5.0 130.0 Schönes Haus mit fünf Zimmern in Prignitz (Kre... https://www.immobilienscout24.de/expose/106585918 16928 59622.259683 40377.740317
213 NaN 4598 NaN 0 Märkisch-Oderland (Kreis) Knut Bukowiecki Küstriner Vorland NaN NaN 105924959 ... 10 False 79890 4.0 125.0 Kreative Nestbauer gesucht! - Grundstück mit L... https://www.immobilienscout24.de/expose/105924959 15328 37740.851100 42149.148900
64 NaN 500 NaN 0 Teltow-Fläming (Kreis) Tim Lemke Dahmetal NaN NaN 104859683 ... 15 False 90999 4.0 134.0 EIN HAUS ZUM VERLIEBEN !!!! MASSA HAUS 0174140... https://www.immobilienscout24.de/expose/104859683 15936 48633.476327 42365.523673
265 NaN 254 NaN 1 Potsdam-Mittelmark (Kreis) Nadja Dillinger Treuenbrietzen NaN NaN 105130414 ... 20 False 90000 4.0 80.0 Gemütliches Reihenhaus mit Garten! https://www.immobilienscout24.de/expose/105130414 14929 47378.930283 42621.069717
418 NaN 493 NaN 0 Potsdam-Mittelmark (Kreis) Nadja Dillinger Treuenbrietzen NaN NaN 93416313 ... 15 False 76000 3.0 90.0 Haus mit viel Charme und Potenzial! https://www.immobilienscout24.de/expose/93416313 14929 32918.525804 43081.474196
270 NaN 579 NaN 1 Uckermark (Kreis) NaN Milmersdorf NaN NaN 105078442 ... 14 False 99000 4.0 100.0 Sommerangebot! https://www.immobilienscout24.de/expose/105078442 17268 54583.037304 44416.962696
117 NaN 996 NaN 0 Elbe-Elster (Kreis) NaN Uebigau-Wahrenbrück NaN NaN 102115774 ... 20 False 70000 3.0 75.0 Gemütliche Doppelhaushälfte mit Terrasse, Priv... https://www.immobilienscout24.de/expose/102115774 4924 23923.585927 46076.414073
268 NaN 770 NaN 0 Oberhavel (Kreis) Andreas Reichel Zehdenick NaN NaN 105101176 ... 15 False 89000 3.0 46.0 Marienthal am Wentowsee https://www.immobilienscout24.de/expose/105101176 16792 42456.166983 46543.833017
17 Eggertsberg 3 1600 NaN 0 Potsdam-Mittelmark (Kreis) René Lindelaub Wusterwitz NaN NaN 106272555 ... 8 False 86000 2.0 55.0 Wohlfühloase in Wusterwitz – ca. 150 m bis zum... https://www.immobilienscout24.de/expose/106272555 14789 37921.721061 48078.278939
158 Mentiner Straße xx 3713 NaN 0 Prignitz (Kreis) NaN Putlitz NaN NaN 106599183 ... 5 False 91000 5.0 190.0 WOHNHAUS MIT 190m² WOHNFLÄCHE https://www.immobilienscout24.de/expose/106599183 16949 40394.741038 50605.258962
99 NaN 2147 NaN 0 Dahme-Spreewald (Kreis) Christine Bildstein Unterspreewald NaN NaN 93988424 ... 2 False 100000 5.0 135.0 Grundstück im Unterspreewald - in Hauptstadtnähe! https://www.immobilienscout24.de/expose/93988424 15910 48267.341535 51732.658465
180 NaN 682 NaN 0 Spree-Neiße (Kreis) Michael Jacobick Neuhausen/Spree NaN NaN 106435376 ... 8 False 99900 2.0 60.0 Erholung pur in herrlicher Waldrandlage https://www.immobilienscout24.de/expose/106435376 3058 46456.237979 53443.762021
337 NaN 682 NaN 0 Spree-Neiße (Kreis) Michael Jacobick Neuhausen/Spree NaN NaN 103335308 ... 8 False 99900 2.0 60.0 Erholung pur in herrlicher Waldrandlage https://www.immobilienscout24.de/expose/103335308 3058 46456.237979 53443.762021
248 NaN 330 NaN 1 Elbe-Elster (Kreis) Sven Mager Falkenberg/Elster NaN NaN 105470158 ... 15 False 59000 2.0 34.0 Ein Traum am Kiebitzsee...\nDiese Immobilie in... https://www.immobilienscout24.de/expose/105470158 4895 2355.705404 56644.294596
436 NaN 940 NaN 0 Märkisch-Oderland (Kreis) Weronika Rostropowicz Letschin NaN NaN 86178170 ... 24 False 95000 4.0 118.0 Leben im Oderbruch - Nur 95.000.-€ und 10 Jahr... https://www.immobilienscout24.de/expose/86178170 15324 36462.392551 58537.607449
128 NaN 600 NaN 1 Elbe-Elster (Kreis) Linda Buder Uebigau-Wahrenbrück NaN NaN 104833077 ... 8 False 85000 5.0 110.0 Nette Nachbarn sind schon da ! https://www.immobilienscout24.de/expose/104833077 4938 22854.603486 62145.396514
227 NaN 400 NaN 0 Prignitz (Kreis) Dieter Langer Lenzen (Elbe) NaN NaN 105750474 ... 15 False 73000 3.0 32.0 Erholung pur - Bungalow in erster Reihe am Rud... https://www.immobilienscout24.de/expose/105750474 19309 2812.117173 70187.882827

444 rows × 23 columns

In [55]:
from tpot import TPOTRegressor
from sklearn.model_selection import train_test_split
In [56]:
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    train_size=0.75, test_size=0.25)
In [57]:
tpot = TPOTRegressor(generations=5, population_size=20, verbosity=2, scoring="r2")
tpot.fit(X_train, y_train)
print(tpot.score(X_test, y_test))
/Users/andi/.local/share/virtualenvs/ImmoSpider-M0SYrQN4/lib/python3.6/site-packages/sklearn/ensemble/weight_boosting.py:29: DeprecationWarning: numpy.core.umath_tests is an internal NumPy module and should not be imported. It will be removed in a future NumPy release.
  from numpy.core.umath_tests import inner1d
/Users/andi/.local/share/virtualenvs/ImmoSpider-M0SYrQN4/lib/python3.6/site-packages/sklearn/utils/validation.py:578: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().
  y = column_or_1d(y, warn=True)
Warning: xgboost.XGBRegressor is not available and will not be used by TPOT.
Optimization Progress:  33%|███▎      | 40/120 [00:21<00:51,  1.55pipeline/s]
Generation 1 - Current best internal CV score: 0.14364768400252653
Optimization Progress:  50%|█████     | 60/120 [00:35<00:43,  1.37pipeline/s]
Generation 2 - Current best internal CV score: 0.14364768400252653
Optimization Progress:  67%|██████▋   | 80/120 [00:53<00:25,  1.56pipeline/s]
Generation 3 - Current best internal CV score: 0.14383612624991568
Optimization Progress:  83%|████████▎ | 100/120 [01:14<00:15,  1.30pipeline/s]
Generation 4 - Current best internal CV score: 0.14860805291607176
                                                                              
Generation 5 - Current best internal CV score: 0.14860805291607176

Best pipeline: RandomForestRegressor(OneHotEncoder(input_matrix, minimum_fraction=0.2, sparse=False), bootstrap=False, max_features=0.35000000000000003, min_samples_leaf=15, min_samples_split=6, n_estimators=100)
0.12595318630220242
In [58]:
y_pred = tpot.predict(X)
In [59]:
y_pred_test = tpot.predict(X_test)
r2_score(y_test, y_pred_test)
Out[59]:
0.12595318630220242
In [ ]: