2017, Александр Дьяконов
# подгружаем все нужные пакеты
import pandas as pd
import numpy as np
# для встроенных картинок
%pylab inline
# чуть покрасивше картинки:
pd.set_option('display.mpl_style', 'default')
figsize(12, 9)
import warnings
warnings.filterwarnings("ignore")
#plt.rcParams['figure.figsize'] = 10, 7.5
#plt.rcParams['axes.grid'] = True
pd.set_option('display.max_columns', None)
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['font.family'] = 'Ubuntu'
plt.rc('text', usetex=False)
plt.rc('font', family='serif')
plt.rc('font', weight='bold')
plt.rc('xtick', labelsize=14)
plt.rc('ytick', labelsize=14)
# чтобы был русский шрифт
from matplotlib import rc
font = {'family': 'Droid Sans',
'weight': 'normal'}
rc('font', **font)
Populating the interactive namespace from numpy and matplotlib
# data = pd.read_excel('Training Part.xlsx')
# day1 = pd.read_excel('day1.xlsx')
# day2 = pd.read_excel('day2.xlsx')
data = pd.read_csv('data.csv')
day1 = pd.read_csv('day1.csv')
day2 = pd.read_csv('day2.csv')
print (data.shape, day1.shape, day2.shape)
(390000, 197) (130000, 197) (132016, 197)
data[:5]
ID | NetSales | gender | ActivityIndex | Alcohol | AverageFriendsRegMonthDelta | CanSeeAllPosts | CanSeeAllPosts_CanPost | EducationType | EducationType_VK | FriendsAverageDeletedAccounts | FriendsAverageHasHighSchools | FriendsAverageHasJobs | FriendsPerDay | HasPhone | HasSkype | HasTwitter | LastActivity | LifeTime | MaxRegDate | MinRegDate | MobileUsageAll | MobileUsageAndroid | MobileUsageIPad | MobileUsageIphone | MobileUsageWinPhone | MonthsFromMaxRegDate | MonthsFromMinRegDate | NumberOfAccounts | NumberOfAccountsMOIMIR | NumberOfAccountsODKL | NumberOfAccountsVK | NumberOfAdvancedSchools | NumberOfChilds | NumberOfChilds_VK | NumberOfCompanies | NumberOfDeletedAccounts | NumberOfDeletedAccounts_VK | NumberOfEntrance | NumberOfFollowers | NumberOfFriendsMax | NumberOfFriendsMax_VK | NumberOfFriendsMin | NumberOfFriendsSum | NumberOfGroupsMax | NumberOfGroupsSum_OK | NumberOfHighSchools | NumberOfNotesMax | NumberOfPhotosMax | NumberOfPhotosMin_VK | NumberOfPrivateAccounts | NumberOfPrivateAccounts_OK | NumberOfRelatives | NumberOfRelatives_OK | NumberOfSchools | NumberOfSubscriptions | NumberOfVideos | Relation | SaScore1 | SaScore2 | SaScore5 | SaScore6 | SaScore7 | SaScoreFraud | SaScoreSocial | Smoking | UseScreenName | Worldviews | YearsSinceMinRegDate | YearsSinceMinRegDate_OK | YearsSinceMinRegDate_VK | NumberOfAccountsFB | Interest_1 | Interest_2 | Interest_3 | Interest_4 | Interest_5 | Interest_6 | Interest_7 | Interest_8 | Interest_9 | Interest_10 | Interest_11 | Interest_12 | Interest_13 | Interest_14 | Interest_15 | Interest_16 | Interest_17 | Interest_18 | Interest_19 | Interest_20 | Interest_21 | Interest_22 | Interest_23 | Interest_24 | Interest_25 | TD_acquaint_communic | TD_acquaintances | TD_active_rest | TD_ad | TD_animals | TD_anime_hentai | TD_architecture | TD_art_design | TD_beautiful_girls | TD_beauty | TD_books | TD_business | TD_cars | TD_cartoons | TD_caucasian | TD_celebrities | TD_children | TD_cognitive | TD_companies | TD_computers | TD_cookery | TD_design | TD_design_renovation | TD_diets | TD_do_yourself | TD_electronics_electrappliances | TD_entertainment | TD_family_home | TD_fashion | TD_finance | TD_fitness | TD_football | TD_foreign_lang | TD_gadgets | TD_games | TD_geopolitics_economy | TD_gif | TD_goods_services | TD_hobbies | TD_horoscope | TD_horror | TD_humour | TD_images | TD_insurance | TD_interior | TD_kazakhstan | TD_landscape_design | TD_literature_poetry | TD_mass_media | TD_mass_media_ad_PR | TD_men_communities | TD_mobile_internet | TD_motivation | TD_moto | TD_movies | TD_music | TD_names | TD_nature | TD_nature_and_travel | TD_nostalgia | TD_other_services | TD_parents_communities | TD_philosophy_esoterics | TD_photo | TD_poetry | TD_politics | TD_professions | TD_proposed_news | TD_real_estate | TD_regional_communities | TD_relations | TD_religion | TD_rest | TD_russia | TD_science | TD_science_education | TD_shops | TD_slimming | TD_society | TD_soft | TD_softporno_porno | TD_sport_and_health | TD_sport_diet | TD_sport_other | TD_start_ups | TD_tech_IT | TD_technologies | TD_thoughts_ideas | TD_tourism | TD_travel | TD_TV | TD_ukraine | TD_video | TD_way_of_life | TD_wedding_communities | TD_women_communities | TD_workout | TD_youth_communities | TD_sum | animal_owner | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 0.0 | F | - | 0.0 | 92 | - | - | 12 | - | 0.06667 | 0.33333 | 0.06667 | 0.01768 | 0.0 | 0.0 | 0.0 | 113 | 2623 | 2012-10-19 00:00:00 | 2008-01-20 00:00:00 | - | - | - | - | - | 51 | 108 | 2.0 | 0.0 | 2.0 | 0.0 | 0.0 | 0 | - | 0.0 | 0.0 | - | - | - | 14 | - | 1 | 15 | - | - | 1.0 | 0 | 1 | - | 0.0 | 0 | 4.0 | 4 | 1.0 | - | - | 0.0 | -3.82756 | -1.67380 | -5.15525 | -6.52538 | 113.0 | -4.14635 | -4.86583 | 0.0 | 0.0 | 0.0 | 9.0197 | 9.0197 | - | NaN | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1 | 7 | 0.0 | F | - | 0.0 | - | - | - | - | - | 0 | 0.33333 | 0.22222 | - | 0.0 | 0.0 | 0.0 | 79 | - | 2008-06-22 00:00:00 | 2008-06-22 00:00:00 | - | - | - | - | - | 103 | 103 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0 | - | 0.0 | 0.0 | - | - | - | 8 | - | 8 | 8 | - | - | 0.0 | - | 4 | - | 1.0 | - | 0.0 | - | 0.0 | - | - | 0.0 | -4.06040 | -1.92671 | -4.93723 | -6.52939 | 110.0 | -3.98769 | -5.21042 | 0.0 | 0.0 | 0.0 | 8.59808 | - | - | NaN | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2 | 13 | 0.0 | F | - | 0.0 | 96 | - | - | 12 | - | 0.11579 | 0.45263 | 0.06316 | 0.03699 | 0.0 | 0.0 | 0.0 | 145 | 2514 | 2008-02-19 00:00:00 | 2008-02-19 00:00:00 | - | - | - | - | - | 107 | 107 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0 | - | 0.0 | 1.0 | - | - | - | 93 | - | 93 | 93 | 3 | 3 | 1.0 | 7 | 11 | - | 0.0 | 0 | 0.0 | 0 | 1.0 | - | - | 0.0 | -4.00498 | -1.88295 | -5.82966 | -7.60862 | 108.0 | -4.59382 | -4.86583 | 0.0 | 0.0 | 0.0 | 8.93898 | 8.93898 | - | 1.0 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3 | 19 | 180.0 | M | 4.58 | 0.0 | 89 | 0 | 1 | 0 | 0 | 0.02691 | 0.72197 | 0.10762 | 0.07254 | 0.0 | 0.0 | 0.0 | 3 | 2578 | 2008-06-26 00:00:00 | 2008-06-09 00:00:00 | 0.93583 | 0 | 0.65714 | 0.10857 | 0 | 103 | 103 | 2.0 | 1.0 | 0.0 | 1.0 | 2.0 | 0 | 0 | 0.0 | 0.0 | 0 | 123 | 105 | 187 | 187 | 16 | 203 | 28 | - | 1.0 | - | 96 | 96 | 0.0 | - | 0.0 | - | 2.0 | 31 | 246 | 0.0 | -3.97826 | -2.78780 | -5.45857 | -7.31933 | 91.0 | -4.87099 | -4.90845 | 0.0 | 1.0 | 0.0 | 8.63466 | - | 8.58786 | 1.0 | 0 | 0 | 0 | 4 | 0 | 6 | 1 | 0 | 8 | 5 | 8 | 0 | 7 | 0 | 6 | 0 | 10 | 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.00000 | 0.0 | 0.026316 | 0.00000 | 0.008772 | 0.0 | 0.0 | 0.035088 | 0.0 | 0.00000 | 0.035088 | 0.008772 | 0.008772 | 0.0 | 0.0 | 0.00000 | 0.0 | 0.035088 | 0.0 | 0.0 | 0.00000 | 0.000000 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.122807 | 0.000000 | 0.00000 | 0.008772 | 0.00000 | 0.0 | 0.026316 | 0.00000 | 0.0 | 0.0 | 0.0 | 0.043860 | 0.035088 | 0.0 | 0.0 | 0.008772 | 0.026316 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.052632 | 0.00000 | 0.000000 | 0.008772 | 0.0 | 0.00000 | 0.0 | 0.008772 | 0.087719 | 0.0 | 0.0 | 0.017544 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.008772 | 0.017544 | 0.0 | 0.0 | 0.00000 | 0.00000 | 0.026316 | 0.00000 | 0.0 | 0.061404 | 0.008772 | 0.008772 | 0.070175 | 0.043860 | 0.00000 | 0.0 | 0.00000 | 0.0 | 0.043860 | 0.017544 | 0.017544 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.008772 | 0.026316 | 0.0 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.008772 | 0.026316 | 114.0 | 0.0 |
4 | 25 | 4876.4 | F | 5.74 | 0.0 | 97 | 0 | 1 | - | - | 0.01754 | 0.625 | 0.33114 | 0.15117 | 0.0 | 0.0 | 0.0 | 0 | 2785 | 2007-11-29 00:00:00 | 2007-11-29 00:00:00 | 0.51657 | 0 | 0 | 0.9893 | 0 | 110 | 110 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0 | 0 | 1.0 | 0.0 | 0 | 253 | 105 | 421 | 421 | 421 | 421 | - | - | 0.0 | - | 128 | 128 | 0.0 | - | 0.0 | - | 0.0 | 59 | 22 | 0.0 | -4.45843 | -3.21992 | -5.08907 | -6.78496 | 99.0 | -4.95631 | -5.04582 | 0.0 | 1.0 | 0.0 | 9.16308 | - | 9.16308 | 1.0 | 0 | 1 | 1 | 0 | 2 | 0 | 4 | 7 | 8 | 0 | 2 | 8 | 0 | 5 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.00641 | 0.0 | 0.000000 | 0.00641 | 0.000000 | 0.0 | 0.0 | 0.121795 | 0.0 | 0.00641 | 0.000000 | 0.006410 | 0.000000 | 0.0 | 0.0 | 0.00641 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.00641 | 0.038462 | 0.0 | 0.0 | 0.019231 | 0.0 | 0.025641 | 0.083333 | 0.00641 | 0.000000 | 0.00641 | 0.0 | 0.000000 | 0.00641 | 0.0 | 0.0 | 0.0 | 0.121795 | 0.006410 | 0.0 | 0.0 | 0.012821 | 0.038462 | 0.0 | 0.019231 | 0.0 | 0.0 | 0.000000 | 0.00641 | 0.019231 | 0.006410 | 0.0 | 0.00641 | 0.0 | 0.012821 | 0.000000 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.032051 | 0.0 | 0.0 | 0.025641 | 0.000000 | 0.0 | 0.0 | 0.00641 | 0.00641 | 0.032051 | 0.00641 | 0.0 | 0.025641 | 0.012821 | 0.000000 | 0.000000 | 0.083333 | 0.00641 | 0.0 | 0.00641 | 0.0 | 0.019231 | 0.006410 | 0.000000 | 0.0 | 0.012821 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.000000 | 0.0 | 0.012821 | 0.0 | 0.076923 | 0.012821 | 0.000000 | 0.019231 | 156.0 | 1.0 |
day1[:5]
ID | NetSales | gender | ActivityIndex | Alcohol | AverageFriendsRegMonthDelta | CanSeeAllPosts | CanSeeAllPosts_CanPost | EducationType | EducationType_VK | FriendsAverageDeletedAccounts | FriendsAverageHasHighSchools | FriendsAverageHasJobs | FriendsPerDay | HasPhone | HasSkype | HasTwitter | LastActivity | LifeTime | MaxRegDate | MinRegDate | MobileUsageAll | MobileUsageAndroid | MobileUsageIPad | MobileUsageIphone | MobileUsageWinPhone | MonthsFromMaxRegDate | MonthsFromMinRegDate | NumberOfAccounts | NumberOfAccountsMOIMIR | NumberOfAccountsODKL | NumberOfAccountsVK | NumberOfAdvancedSchools | NumberOfChilds | NumberOfChilds_VK | NumberOfCompanies | NumberOfDeletedAccounts | NumberOfDeletedAccounts_VK | NumberOfEntrance | NumberOfFollowers | NumberOfFriendsMax | NumberOfFriendsMax_VK | NumberOfFriendsMin | NumberOfFriendsSum | NumberOfGroupsMax | NumberOfGroupsSum_OK | NumberOfHighSchools | NumberOfNotesMax | NumberOfPhotosMax | NumberOfPhotosMin_VK | NumberOfPrivateAccounts | NumberOfPrivateAccounts_OK | NumberOfRelatives | NumberOfRelatives_OK | NumberOfSchools | NumberOfSubscriptions | NumberOfVideos | Relation | SaScore1 | SaScore2 | SaScore5 | SaScore6 | SaScore7 | SaScoreFraud | SaScoreSocial | Smoking | UseScreenName | Worldviews | YearsSinceMinRegDate | YearsSinceMinRegDate_OK | YearsSinceMinRegDate_VK | NumberOfAccountsFB | Interest_1 | Interest_2 | Interest_3 | Interest_4 | Interest_5 | Interest_6 | Interest_7 | Interest_8 | Interest_9 | Interest_10 | Interest_11 | Interest_12 | Interest_13 | Interest_14 | Interest_15 | Interest_16 | Interest_17 | Interest_18 | Interest_19 | Interest_20 | Interest_21 | Interest_22 | Interest_23 | Interest_24 | Interest_25 | TD_acquaint_communic | TD_acquaintances | TD_active_rest | TD_ad | TD_animals | TD_anime_hentai | TD_architecture | TD_art_design | TD_beautiful_girls | TD_beauty | TD_books | TD_business | TD_cars | TD_cartoons | TD_caucasian | TD_celebrities | TD_children | TD_cognitive | TD_companies | TD_computers | TD_cookery | TD_design | TD_design_renovation | TD_diets | TD_do_yourself | TD_electronics_electrappliances | TD_entertainment | TD_family_home | TD_fashion | TD_finance | TD_fitness | TD_football | TD_foreign_lang | TD_gadgets | TD_games | TD_geopolitics_economy | TD_gif | TD_goods_services | TD_hobbies | TD_horoscope | TD_horror | TD_humour | TD_images | TD_insurance | TD_interior | TD_kazakhstan | TD_landscape_design | TD_literature_poetry | TD_mass_media | TD_mass_media_ad_PR | TD_men_communities | TD_mobile_internet | TD_motivation | TD_moto | TD_movies | TD_music | TD_names | TD_nature | TD_nature_and_travel | TD_nostalgia | TD_other_services | TD_parents_communities | TD_philosophy_esoterics | TD_photo | TD_poetry | TD_politics | TD_professions | TD_proposed_news | TD_real_estate | TD_regional_communities | TD_relations | TD_religion | TD_rest | TD_russia | TD_science | TD_science_education | TD_shops | TD_slimming | TD_society | TD_soft | TD_softporno_porno | TD_sport_and_health | TD_sport_diet | TD_sport_other | TD_start_ups | TD_tech_IT | TD_technologies | TD_thoughts_ideas | TD_tourism | TD_travel | TD_TV | TD_ukraine | TD_video | TD_way_of_life | TD_wedding_communities | TD_women_communities | TD_workout | TD_youth_communities | TD_sum | animal_owner | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 390004 | NaN | F | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1 | 390010 | NaN | NaN | 4.5 | 0.0 | 60 | 1 | 1 | - | - | 0.04138 | 0.18506 | 0.07356 | 0.14374 | 1.0 | 0.0 | 0.0 | 1 | 2261 | 2014-03-12 00:00:00 | 2009-06-30 00:00:00 | 0.95402 | 0.91429 | 0 | 0.59036 | 0 | 34 | 91 | 7.0 | 1.0 | 3.0 | 3.0 | 0.0 | 0.0 | 0 | 0.0 | 0.0 | 0 | 151 | 2 | 270 | 111 | 0 | 826 | 12 | 12 | 0.0 | 79 | 20 | 1 | 1.0 | 1 | 16.0 | 28 | 2.0 | 6 | - | 6.0 | -1.90297 | -1.72063 | -3.57124 | -5.03950 | 155.0 | -2.39785 | -2.97663 | 0.0 | 1.0 | 0.0 | 7.5767 | 7.57379 | 7.5767 | 1.0 | 0 | 5 | 0 | 0 | 5 | 0 | 6 | 0 | 5 | 2 | 6 | 0 | 5 | 0 | 7 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0.011765 | 0.0 | 0.0000 | 0.011765 | 0.0000 | 0.011765 | 0.0 | 0.047059 | 0.0 | 0.000000 | 0.000000 | 0.023529 | 0.000000 | 0.0000 | 0.0 | 0.000000 | 0.0 | 0.023529 | 0.0 | 0.0000 | 0.011765 | 0.011765 | 0.0000 | 0.000000 | 0.000000 | 0.0 | 0.047059 | 0.000000 | 0.011765 | 0.023529 | 0.000000 | 0.011765 | 0.011765 | 0.0 | 0.0 | 0.0 | 0.0 | 0.105882 | 0.000000 | 0.000000 | 0.0 | 0.035294 | 0.000000 | 0.0 | 0.000000 | 0.0 | 0.023529 | 0.000000 | 0.0000 | 0.011765 | 0.011765 | 0.011765 | 0.000000 | 0.0000 | 0.035294 | 0.000000 | 0.0 | 0.011765 | 0.011765 | 0.0000 | 0.011765 | 0.000000 | 0.000000 | 0.011765 | 0.000000 | 0.0 | 0.023529 | 0.011765 | 0.011765 | 0.082353 | 0.000000 | 0.0 | 0.000000 | 0.035294 | 0.000000 | 0.058824 | 0.082353 | 0.000000 | 0.023529 | 0.023529 | 0.000000 | 0.023529 | 0.0000 | 0.0000 | 0.0000 | 0.035294 | 0.023529 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.0 | 0.011765 | 0.011765 | 0.000000 | 85.0 | 0.0 |
2 | 390016 | NaN | F | - | 0.0 | 87 | - | - | 12 | - | 0.03125 | 0.17188 | 0.03125 | 0.01774 | 0.0 | 0.0 | 0.0 | 21 | 1003 | 2012-07-18 00:00:00 | 2008-03-26 00:00:00 | - | - | - | - | - | 54 | 106 | 3.0 | 1.0 | 2.0 | 0.0 | 0.0 | 0.0 | - | 0.0 | 0.0 | - | - | - | 44 | - | 1 | 61 | - | - | 2.0 | 1 | 32 | - | 0.0 | 0 | 0.0 | 0 | 1.0 | - | - | 0.0 | -3.57804 | -1.89699 | -4.78375 | -6.30990 | 124.0 | -3.51037 | -4.46552 | 0.0 | 0.0 | 0.0 | 8.83984 | 4.80161 | - | NaN | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3 | 390022 | NaN | F | 5.79 | 0.0 | 85 | 0 | 1 | 0 | 0 | 0.02874 | 0.56897 | 0.21839 | 0.04461 | 0.0 | 0.0 | 0.0 | 0 | 2869 | 2008-03-26 00:00:00 | 2007-08-30 00:00:00 | 0.85795 | 0 | 0 | 0.99338 | 0 | 106 | 113 | 2.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0 | 0.0 | 0.0 | 0 | 137 | 121 | 128 | 128 | 6 | 134 | 1 | - | 2.0 | - | 371 | 371 | 0.0 | - | 2.0 | - | 2.0 | 167 | 1012 | 0.0 | -4.58583 | -3.21992 | -5.37974 | -7.15375 | 76.0 | -4.51827 | -5.15266 | 0.0 | 1.0 | 0.0 | 9.4124 | - | 9.4124 | NaN | 1 | 3 | 2 | 3 | 4 | 3 | 6 | 6 | 7 | 8 | 4 | 2 | 5 | 2 | 6 | 0 | 7 | 5 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.006502 | 0.0 | 0.0013 | 0.002601 | 0.0013 | 0.001300 | 0.0 | 0.049415 | 0.0 | 0.019506 | 0.016905 | 0.046814 | 0.003901 | 0.0013 | 0.0 | 0.010403 | 0.0 | 0.046814 | 0.0 | 0.0013 | 0.006502 | 0.013004 | 0.0013 | 0.002601 | 0.007802 | 0.0 | 0.074122 | 0.015605 | 0.009103 | 0.006502 | 0.003901 | 0.000000 | 0.014304 | 0.0 | 0.0 | 0.0 | 0.0 | 0.016905 | 0.011704 | 0.002601 | 0.0 | 0.015605 | 0.020806 | 0.0 | 0.007802 | 0.0 | 0.000000 | 0.023407 | 0.0013 | 0.014304 | 0.019506 | 0.000000 | 0.039012 | 0.0013 | 0.040312 | 0.014304 | 0.0 | 0.002601 | 0.018205 | 0.0013 | 0.003901 | 0.005202 | 0.016905 | 0.007802 | 0.006502 | 0.0 | 0.009103 | 0.006502 | 0.002601 | 0.045514 | 0.009103 | 0.0 | 0.020806 | 0.016905 | 0.002601 | 0.066320 | 0.006502 | 0.003901 | 0.009103 | 0.005202 | 0.002601 | 0.014304 | 0.0013 | 0.0013 | 0.0013 | 0.006502 | 0.002601 | 0.005202 | 0.003901 | 0.014304 | 0.010403 | 0.0 | 0.005202 | 0.014304 | 0.0 | 0.033810 | 0.003901 | 0.019506 | 769.0 | 1.0 |
4 | 390028 | NaN | F | - | 0.0 | - | - | - | 13 | - | 0 | 0.47059 | 0.11765 | - | 0.0 | 0.0 | 0.0 | 74 | - | 2009-04-19 00:00:00 | 2009-04-19 00:00:00 | - | - | - | - | - | 93 | 93 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | - | 0.0 | 0.0 | - | - | - | 15 | - | 15 | 15 | 20 | - | 2.0 | - | 1 | - | 0.0 | - | 0.0 | - | 1.0 | - | 2 | 0.0 | -4.70057 | -2.19835 | -5.85025 | -7.95237 | 91.0 | -5.07033 | -4.69854 | 0.0 | 0.0 | 0.0 | 7.7748 | - | - | NaN | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
day2[:5]
ID | Net Sales | gender | ActivityIndex | Alcohol | AverageFriendsRegMonthDelta | CanSeeAllPosts | CanSeeAllPosts_CanPost | EducationType | EducationType_VK | FriendsAverageDeletedAccounts | FriendsAverageHasHighSchools | FriendsAverageHasJobs | FriendsPerDay | HasPhone | HasSkype | HasTwitter | LastActivity | LifeTime | MaxRegDate | MinRegDate | MobileUsageAll | MobileUsageAndroid | MobileUsageIPad | MobileUsageIphone | MobileUsageWinPhone | MonthsFromMaxRegDate | MonthsFromMinRegDate | NumberOfAccounts | NumberOfAccountsMOIMIR | NumberOfAccountsODKL | NumberOfAccountsVK | NumberOfAdvancedSchools | NumberOfChilds | NumberOfChilds_VK | NumberOfCompanies | NumberOfDeletedAccounts | NumberOfDeletedAccounts_VK | NumberOfEntrance | NumberOfFollowers | NumberOfFriendsMax | NumberOfFriendsMax_VK | NumberOfFriendsMin | NumberOfFriendsSum | NumberOfGroupsMax | NumberOfGroupsSum_OK | NumberOfHighSchools | NumberOfNotesMax | NumberOfPhotosMax | NumberOfPhotosMin_VK | NumberOfPrivateAccounts | NumberOfPrivateAccounts_OK | NumberOfRelatives | NumberOfRelatives_OK | NumberOfSchools | NumberOfSubscriptions | NumberOfVideos | Relation | SaScore1 | SaScore2 | SaScore5 | SaScore6 | SaScore7 | SaScoreFraud | SaScoreSocial | Smoking | UseScreenName | Worldviews | YearsSinceMinRegDate | YearsSinceMinRegDate_OK | YearsSinceMinRegDate_VK | NumberOfAccountsFB | Interest_1 | Interest_2 | Interest_3 | Interest_4 | Interest_5 | Interest_6 | Interest_7 | Interest_8 | Interest_9 | Interest_10 | Interest_11 | Interest_12 | Interest_13 | Interest_14 | Interest_15 | Interest_16 | Interest_17 | Interest_18 | Interest_19 | Interest_20 | Interest_21 | Interest_22 | Interest_23 | Interest_24 | Interest_25 | TD_acquaint_communic | TD_acquaintances | TD_active_rest | TD_ad | TD_animals | TD_anime_hentai | TD_architecture | TD_art_design | TD_beautiful_girls | TD_beauty | TD_books | TD_business | TD_cars | TD_cartoons | TD_caucasian | TD_celebrities | TD_children | TD_cognitive | TD_companies | TD_computers | TD_cookery | TD_design | TD_design_renovation | TD_diets | TD_do_yourself | TD_electronics_electrappliances | TD_entertainment | TD_family_home | TD_fashion | TD_finance | TD_fitness | TD_football | TD_foreign_lang | TD_gadgets | TD_games | TD_geopolitics_economy | TD_gif | TD_goods_services | TD_hobbies | TD_horoscope | TD_horror | TD_humour | TD_images | TD_insurance | TD_interior | TD_kazakhstan | TD_landscape_design | TD_literature_poetry | TD_mass_media | TD_mass_media_ad_PR | TD_men_communities | TD_mobile_internet | TD_motivation | TD_moto | TD_movies | TD_music | TD_names | TD_nature | TD_nature_and_travel | TD_nostalgia | TD_other_services | TD_parents_communities | TD_philosophy_esoterics | TD_photo | TD_poetry | TD_politics | TD_professions | TD_proposed_news | TD_real_estate | TD_regional_communities | TD_relations | TD_religion | TD_rest | TD_russia | TD_science | TD_science_education | TD_shops | TD_slimming | TD_society | TD_soft | TD_softporno_porno | TD_sport_and_health | TD_sport_diet | TD_sport_other | TD_start_ups | TD_tech_IT | TD_technologies | TD_thoughts_ideas | TD_tourism | TD_travel | TD_TV | TD_ukraine | TD_video | TD_way_of_life | TD_wedding_communities | TD_women_communities | TD_workout | TD_youth_communities | TD_sum | animal_owner | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 520001 | NaN | M | 0.9 | 0.0 | 78 | 1 | 2 | 0 | 0 | 0 | 0.24 | 0.12 | 0.00338 | 0.0 | 0.0 | 0.0 | 1 | 1478 | 2011-08-23 00:00:00 | 2008-12-25 00:00:00 | 0.75 | 1 | 0 | 0 | 0 | 65 | 97 | 2.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0 | 0.0 | 0.0 | 0 | 3 | 1 | 18 | 5 | 5 | 23 | 9 | - | 1.0 | - | 4 | 1 | 0.0 | - | 0.0 | - | 1.0 | 6 | 8 | 7.0 | -4.07950 | -2.42172 | -4.79584 | -6.03038 | 121.0 | -3.95212 | -4.19340 | 0.0 | 1.0 | 0.0 | 8.08853 | - | 5.42952 | 1.0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 10 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.000000 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.133333 | 0.0 | 0.0 | 0.000000 | 0.133333 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00 | 0.0 | 0.0 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.133333 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.0 | 0.133333 | 0.0 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.000 | 0.0 | 0.000000 | 0.0 | 0.066667 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.000000 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.000000 | 0.000000 | 0.0 | 0.066667 | 0.000000 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.066667 | 0.0 | 0.0 | 0.000 | 0.000000 | 0.0 | 0.0 | 0.066667 | 0.0 | 0.000000 | 0.0 | 0.2 | 15.0 | 0.0 |
1 | 520007 | NaN | M | - | 2.0 | 92 | 1 | 2 | 0 | 0 | 0.09053 | 0.39506 | 0.16049 | 0.33846 | 0.0 | 0.0 | 0.0 | 39 | 2288 | 2012-01-10 00:00:00 | 2009-03-02 00:00:00 | 0 | 0 | 0 | 0 | 0 | 60 | 94 | 3.0 | 1.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0 | 1.0 | 0.0 | 0 | - | 6 | 209 | 22 | 3 | 234 | 19 | 16 | 1.0 | 10 | 14 | 4 | 0.0 | 0 | 3.0 | 3 | 2.0 | - | 6 | 7.0 | -3.57355 | -1.58798 | -4.35452 | -5.16231 | 132.0 | -4.28645 | -4.61878 | 2.0 | 0.0 | 1.0 | 7.90421 | 7.90421 | 5.04392 | 1.0 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
2 | 520013 | NaN | F | - | 0.0 | 80 | - | - | - | - | 0.03356 | 0.44295 | 0.15436 | 0.09244 | 0.0 | 0.0 | 0.0 | 0 | 2495 | 2014-05-14 00:00:00 | 2008-08-19 00:00:00 | - | - | - | - | - | 32 | 101 | 3.0 | 1.0 | 2.0 | 0.0 | 0.0 | 0.0 | - | 0.0 | 0.0 | - | - | - | 70 | - | 44 | 181 | 28 | 28 | 0.0 | 48 | 238 | - | 2.0 | 1 | 13.0 | 15 | 0.0 | - | - | 0.0 | -3.98220 | -1.75361 | -5.49793 | -6.42880 | 143.0 | -4.40719 | -4.61141 | 0.0 | 0.0 | 0.0 | 8.44049 | 8.36627 | - | 1.0 | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | - | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3 | 520019 | NaN | F | 2.99 | 0.0 | 79 | 0 | 1 | 0 | 0 | 0.07826 | 0.58261 | 0.10435 | 0.02885 | 0.0 | 0.0 | 0.0 | 0 | 1976 | 2010-04-04 00:00:00 | 2010-04-04 00:00:00 | 0.95062 | 0 | 0.2987 | 0.68831 | 0 | 81 | 81 | 2.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0 | 0.0 | 0.0 | 0 | 54 | 183 | 57 | 57 | 0 | 57 | - | - | 1.0 | - | 10 | 10 | 0.0 | - | 0.0 | - | 0.0 | 17 | 2 | 0.0 | -3.97585 | -2.85935 | -5.49487 | -7.18308 | 106.0 | -4.57233 | -4.28606 | 0.0 | 1.0 | 0.0 | 6.81516 | - | 6.81452 | 1.0 | 6 | 7 | 0 | 0 | 5 | 0 | 4 | 0 | 7 | 0 | 0 | 0 | 8 | 0 | 0 | 0 | 8 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.048193 | 0.0 | 0.0 | 0.012048 | 0.0 | 0.0 | 0.0 | 0.048193 | 0.0 | 0.0 | 0.024096 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.012048 | 0.0 | 0.0 | 0.012048 | 0.0 | 0.036145 | 0.012048 | 0.0 | 0.0 | 0.0 | 0.0 | 0.084337 | 0.0 | 0.024096 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00 | 0.0 | 0.0 | 0.084337 | 0.012048 | 0.012048 | 0.0 | 0.072289 | 0.024096 | 0.0 | 0.012048 | 0.0 | 0.0 | 0.024096 | 0.0 | 0.024096 | 0.036145 | 0.0 | 0.000000 | 0.0 | 0.024096 | 0.036145 | 0.0 | 0.0 | 0.000 | 0.0 | 0.012048 | 0.0 | 0.012048 | 0.0 | 0.0 | 0.012048 | 0.0 | 0.048193 | 0.0 | 0.072289 | 0.0 | 0.0 | 0.012048 | 0.024096 | 0.0 | 0.000000 | 0.060241 | 0.0 | 0.012048 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.000 | 0.024096 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.036145 | 0.0 | 0.0 | 83.0 | 0.0 |
4 | 520025 | NaN | M | 2.36 | 0.0 | 64 | 1 | 2 | - | - | 0 | 0.36842 | 0.21053 | 0.01576 | 0.0 | 0.0 | 0.0 | 0 | 1079 | 2012-07-09 00:00:00 | 2008-09-11 00:00:00 | 0.17391 | 1 | 0 | 0 | 0 | 54 | 100 | 2.0 | 0.0 | 1.0 | 1.0 | 1.0 | 0.0 | 0 | 0.0 | 0.0 | 0 | 15 | - | 17 | 17 | 0 | 17 | - | - | 0.0 | 0 | - | - | 0.0 | 0 | 0.0 | 0 | 2.0 | 1 | - | 7.0 | -3.77161 | -2.84637 | -4.91313 | -6.43954 | 120.0 | -4.10089 | -4.60750 | 0.0 | 0.0 | 0.0 | 8.37664 | 8.37664 | 4.54872 | 1.0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0.000000 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.250000 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.25 | 0.0 | 0.0 | 0.125000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.0 | 0.0 | 0.125 | 0.0 | 0.000000 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.000000 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.125000 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.125 | 0.000000 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.000000 | 0.0 | 0.0 | 8.0 | 0.0 |
from time import time
def make_feature_matrix(data):
tm = time()
data.fillna(-2, inplace=True)
data.replace('-', -1, inplace=True)
data.gender = data.gender.map({'F': -1, 'M': +1, -2: 0})
data.MaxRegDate = pd.to_datetime(data.MaxRegDate.replace(-1, datetime.datetime(2010, 6, 6, 0, 0)).replace(-2, datetime.datetime(2009, 7, 7, 0, 0)))
data.MinRegDate = pd.to_datetime(data.MinRegDate.replace(-1, datetime.datetime(2010, 6, 6, 0, 0)).replace(-2, datetime.datetime(2009, 7, 7, 0, 0)))
# print (tmp.max()) # Timestamp('2015-09-08 00:00:00') 2015-08-17 00:00:00
# print (tmp.min()) # Timestamp('2006-03-06 00:00:00') 2006-03-04 00:00:00
data['deltatime'] = data.MaxRegDate - data.MinRegDate
data.deltatime = data.deltatime.dt.total_seconds() /(60*60*24)
data.MaxRegDate = (data.MaxRegDate - pd.to_datetime('2006-01-01T00:00:00.000000000')).dt.total_seconds() /(60*60*24)
data.MinRegDate = (data.MinRegDate - pd.to_datetime('2006-01-01T00:00:00.000000000')).dt.total_seconds() /(60*60*24)
data = data.astype(float)
print ('time = ' + str(time() - tm))
return (data)
data = make_feature_matrix(data)
day1 = make_feature_matrix(day1)
day2 = make_feature_matrix(day2)
time = 6.3446221351623535 time = 2.0537281036376953 time = 2.0606093406677246
print (data.shape, day1.shape, day2.shape)
(390000, 198) (130000, 198) (132016, 198)
# подготовить матрицу для обучения
def make_trainmatrix(data):
ids = data.ID.values
if 'y' in data.columns:
X = data.drop(['ID', 'NetSales', 'y'], axis=1)
else:
X = data.drop(['ID', 'NetSales'], axis=1)
y = np.log(data.NetSales.values + 1.0)
return (X, y, ids)
import scipy.stats as ss
# сделать ставки
def make_rates(a):
v_sum = 2500000 # общее число денег
k = 8 # число игроков
n = len(a) # число юзеров
b = np.linspace(0, 1, n)
b = v_sum * k * b / np.sum(b)
b = b[ss.rankdata(a, method='ordinal') - 1] #np.argsort(a)
return (b)
import scipy.stats as ss
# сделать ставки - другая стратегия
def make_rates(a):
v_sum = 2500000 # общее число денег
k = 12 # число игроков
n = len(a) # число юзеров
b = np.linspace(-1, 1, n)
b = np.maximum(b, 0)
b = v_sum * k * b / np.sum(b)
b = np.maximum(b, 1)
b = b[ss.rankdata(a, method='ordinal') - 1] #np.argsort(a)
#b = np.round(b, 2)
return (b)
make_rates([1,2,10, 20, 30, 40])
array([ 1.00000000e+00, 1.00000000e+00, 1.00000000e+00, 3.33333333e+06, 1.00000000e+07, 1.66666667e+07])
tmp = day2.columns.tolist()
tmp[1] = 'NetSales' # там некорректность в значениях
day2.columns = tmp
X, y, data_ids = make_trainmatrix(data)
X1, y1, day1_ids = make_trainmatrix(day1)
X2, y2, day2_ids = make_trainmatrix(day2)
X[:5]
gender | ActivityIndex | Alcohol | AverageFriendsRegMonthDelta | CanSeeAllPosts | CanSeeAllPosts_CanPost | EducationType | EducationType_VK | FriendsAverageDeletedAccounts | FriendsAverageHasHighSchools | FriendsAverageHasJobs | FriendsPerDay | HasPhone | HasSkype | HasTwitter | LastActivity | LifeTime | MaxRegDate | MinRegDate | MobileUsageAll | MobileUsageAndroid | MobileUsageIPad | MobileUsageIphone | MobileUsageWinPhone | MonthsFromMaxRegDate | MonthsFromMinRegDate | NumberOfAccounts | NumberOfAccountsMOIMIR | NumberOfAccountsODKL | NumberOfAccountsVK | NumberOfAdvancedSchools | NumberOfChilds | NumberOfChilds_VK | NumberOfCompanies | NumberOfDeletedAccounts | NumberOfDeletedAccounts_VK | NumberOfEntrance | NumberOfFollowers | NumberOfFriendsMax | NumberOfFriendsMax_VK | NumberOfFriendsMin | NumberOfFriendsSum | NumberOfGroupsMax | NumberOfGroupsSum_OK | NumberOfHighSchools | NumberOfNotesMax | NumberOfPhotosMax | NumberOfPhotosMin_VK | NumberOfPrivateAccounts | NumberOfPrivateAccounts_OK | NumberOfRelatives | NumberOfRelatives_OK | NumberOfSchools | NumberOfSubscriptions | NumberOfVideos | Relation | SaScore1 | SaScore2 | SaScore5 | SaScore6 | SaScore7 | SaScoreFraud | SaScoreSocial | Smoking | UseScreenName | Worldviews | YearsSinceMinRegDate | YearsSinceMinRegDate_OK | YearsSinceMinRegDate_VK | NumberOfAccountsFB | Interest_1 | Interest_2 | Interest_3 | Interest_4 | Interest_5 | Interest_6 | Interest_7 | Interest_8 | Interest_9 | Interest_10 | Interest_11 | Interest_12 | Interest_13 | Interest_14 | Interest_15 | Interest_16 | Interest_17 | Interest_18 | Interest_19 | Interest_20 | Interest_21 | Interest_22 | Interest_23 | Interest_24 | Interest_25 | TD_acquaint_communic | TD_acquaintances | TD_active_rest | TD_ad | TD_animals | TD_anime_hentai | TD_architecture | TD_art_design | TD_beautiful_girls | TD_beauty | TD_books | TD_business | TD_cars | TD_cartoons | TD_caucasian | TD_celebrities | TD_children | TD_cognitive | TD_companies | TD_computers | TD_cookery | TD_design | TD_design_renovation | TD_diets | TD_do_yourself | TD_electronics_electrappliances | TD_entertainment | TD_family_home | TD_fashion | TD_finance | TD_fitness | TD_football | TD_foreign_lang | TD_gadgets | TD_games | TD_geopolitics_economy | TD_gif | TD_goods_services | TD_hobbies | TD_horoscope | TD_horror | TD_humour | TD_images | TD_insurance | TD_interior | TD_kazakhstan | TD_landscape_design | TD_literature_poetry | TD_mass_media | TD_mass_media_ad_PR | TD_men_communities | TD_mobile_internet | TD_motivation | TD_moto | TD_movies | TD_music | TD_names | TD_nature | TD_nature_and_travel | TD_nostalgia | TD_other_services | TD_parents_communities | TD_philosophy_esoterics | TD_photo | TD_poetry | TD_politics | TD_professions | TD_proposed_news | TD_real_estate | TD_regional_communities | TD_relations | TD_religion | TD_rest | TD_russia | TD_science | TD_science_education | TD_shops | TD_slimming | TD_society | TD_soft | TD_softporno_porno | TD_sport_and_health | TD_sport_diet | TD_sport_other | TD_start_ups | TD_tech_IT | TD_technologies | TD_thoughts_ideas | TD_tourism | TD_travel | TD_TV | TD_ukraine | TD_video | TD_way_of_life | TD_wedding_communities | TD_women_communities | TD_workout | TD_youth_communities | TD_sum | animal_owner | deltatime | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | -1.0 | -1.00 | 0.0 | 92.0 | -1.0 | -1.0 | 12.0 | -1.0 | 0.06667 | 0.33333 | 0.06667 | 0.01768 | 0.0 | 0.0 | 0.0 | 113.0 | 2623.0 | 2483.0 | 749.0 | -1.00000 | -1.0 | -1.00000 | -1.00000 | -1.0 | 51.0 | 108.0 | 2.0 | 0.0 | 2.0 | 0.0 | 0.0 | 0.0 | -1.0 | 0.0 | 0.0 | -1.0 | -1.0 | -1.0 | 14.0 | -1.0 | 1.0 | 15.0 | -1.0 | -1.0 | 1.0 | 0.0 | 1.0 | -1.0 | 0.0 | 0.0 | 4.0 | 4.0 | 1.0 | -1.0 | -1.0 | 0.0 | -3.82756 | -1.67380 | -5.15525 | -6.52538 | 113.0 | -4.14635 | -4.86583 | 0.0 | 0.0 | 0.0 | 9.01970 | 9.01970 | -1.00000 | -2.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -2.00000 | -2.0 | -2.000000 | -2.00000 | -2.000000 | -2.0 | -2.0 | -2.000000 | -2.0 | -2.00000 | -2.000000 | -2.000000 | -2.000000 | -2.0 | -2.0 | -2.00000 | -2.0 | -2.000000 | -2.0 | -2.0 | -2.00000 | -2.000000 | -2.0 | -2.0 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.00000 | -2.000000 | -2.00000 | -2.0 | -2.000000 | -2.00000 | -2.0 | -2.0 | -2.0 | -2.000000 | -2.000000 | -2.0 | -2.0 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.0 | -2.0 | -2.000000 | -2.00000 | -2.000000 | -2.000000 | -2.0 | -2.00000 | -2.0 | -2.000000 | -2.000000 | -2.0 | -2.0 | -2.000000 | -2.0 | -2.000000 | -2.0 | -2.0 | -2.000000 | -2.000000 | -2.0 | -2.0 | -2.00000 | -2.00000 | -2.000000 | -2.00000 | -2.0 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.00000 | -2.0 | -2.00000 | -2.0 | -2.000000 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.0 | -2.0 | -2.0 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.0 | -2.0 | 1734.0 |
1 | -1.0 | -1.00 | 0.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | 0.00000 | 0.33333 | 0.22222 | -1.00000 | 0.0 | 0.0 | 0.0 | 79.0 | -1.0 | 903.0 | 903.0 | -1.00000 | -1.0 | -1.00000 | -1.00000 | -1.0 | 103.0 | 103.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | -1.0 | 0.0 | 0.0 | -1.0 | -1.0 | -1.0 | 8.0 | -1.0 | 8.0 | 8.0 | -1.0 | -1.0 | 0.0 | -1.0 | 4.0 | -1.0 | 1.0 | -1.0 | 0.0 | -1.0 | 0.0 | -1.0 | -1.0 | 0.0 | -4.06040 | -1.92671 | -4.93723 | -6.52939 | 110.0 | -3.98769 | -5.21042 | 0.0 | 0.0 | 0.0 | 8.59808 | -1.00000 | -1.00000 | -2.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -2.00000 | -2.0 | -2.000000 | -2.00000 | -2.000000 | -2.0 | -2.0 | -2.000000 | -2.0 | -2.00000 | -2.000000 | -2.000000 | -2.000000 | -2.0 | -2.0 | -2.00000 | -2.0 | -2.000000 | -2.0 | -2.0 | -2.00000 | -2.000000 | -2.0 | -2.0 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.00000 | -2.000000 | -2.00000 | -2.0 | -2.000000 | -2.00000 | -2.0 | -2.0 | -2.0 | -2.000000 | -2.000000 | -2.0 | -2.0 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.0 | -2.0 | -2.000000 | -2.00000 | -2.000000 | -2.000000 | -2.0 | -2.00000 | -2.0 | -2.000000 | -2.000000 | -2.0 | -2.0 | -2.000000 | -2.0 | -2.000000 | -2.0 | -2.0 | -2.000000 | -2.000000 | -2.0 | -2.0 | -2.00000 | -2.00000 | -2.000000 | -2.00000 | -2.0 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.00000 | -2.0 | -2.00000 | -2.0 | -2.000000 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.0 | -2.0 | -2.0 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.0 | -2.0 | 0.0 |
2 | -1.0 | -1.00 | 0.0 | 96.0 | -1.0 | -1.0 | 12.0 | -1.0 | 0.11579 | 0.45263 | 0.06316 | 0.03699 | 0.0 | 0.0 | 0.0 | 145.0 | 2514.0 | 779.0 | 779.0 | -1.00000 | -1.0 | -1.00000 | -1.00000 | -1.0 | 107.0 | 107.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | -1.0 | 0.0 | 1.0 | -1.0 | -1.0 | -1.0 | 93.0 | -1.0 | 93.0 | 93.0 | 3.0 | 3.0 | 1.0 | 7.0 | 11.0 | -1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | -1.0 | -1.0 | 0.0 | -4.00498 | -1.88295 | -5.82966 | -7.60862 | 108.0 | -4.59382 | -4.86583 | 0.0 | 0.0 | 0.0 | 8.93898 | 8.93898 | -1.00000 | 1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -2.00000 | -2.0 | -2.000000 | -2.00000 | -2.000000 | -2.0 | -2.0 | -2.000000 | -2.0 | -2.00000 | -2.000000 | -2.000000 | -2.000000 | -2.0 | -2.0 | -2.00000 | -2.0 | -2.000000 | -2.0 | -2.0 | -2.00000 | -2.000000 | -2.0 | -2.0 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.00000 | -2.000000 | -2.00000 | -2.0 | -2.000000 | -2.00000 | -2.0 | -2.0 | -2.0 | -2.000000 | -2.000000 | -2.0 | -2.0 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.0 | -2.0 | -2.000000 | -2.00000 | -2.000000 | -2.000000 | -2.0 | -2.00000 | -2.0 | -2.000000 | -2.000000 | -2.0 | -2.0 | -2.000000 | -2.0 | -2.000000 | -2.0 | -2.0 | -2.000000 | -2.000000 | -2.0 | -2.0 | -2.00000 | -2.00000 | -2.000000 | -2.00000 | -2.0 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.00000 | -2.0 | -2.00000 | -2.0 | -2.000000 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.0 | -2.0 | -2.0 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.0 | -2.0 | 0.0 |
3 | 1.0 | 4.58 | 0.0 | 89.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.02691 | 0.72197 | 0.10762 | 0.07254 | 0.0 | 0.0 | 0.0 | 3.0 | 2578.0 | 907.0 | 890.0 | 0.93583 | 0.0 | 0.65714 | 0.10857 | 0.0 | 103.0 | 103.0 | 2.0 | 1.0 | 0.0 | 1.0 | 2.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 123.0 | 105.0 | 187.0 | 187.0 | 16.0 | 203.0 | 28.0 | -1.0 | 1.0 | -1.0 | 96.0 | 96.0 | 0.0 | -1.0 | 0.0 | -1.0 | 2.0 | 31.0 | 246.0 | 0.0 | -3.97826 | -2.78780 | -5.45857 | -7.31933 | 91.0 | -4.87099 | -4.90845 | 0.0 | 1.0 | 0.0 | 8.63466 | -1.00000 | 8.58786 | 1.0 | 0.0 | 0.0 | 0.0 | 4.0 | 0.0 | 6.0 | 1.0 | 0.0 | 8.0 | 5.0 | 8.0 | 0.0 | 7.0 | 0.0 | 6.0 | 0.0 | 10.0 | 4.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00000 | 0.0 | 0.026316 | 0.00000 | 0.008772 | 0.0 | 0.0 | 0.035088 | 0.0 | 0.00000 | 0.035088 | 0.008772 | 0.008772 | 0.0 | 0.0 | 0.00000 | 0.0 | 0.035088 | 0.0 | 0.0 | 0.00000 | 0.000000 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.122807 | 0.000000 | 0.00000 | 0.008772 | 0.00000 | 0.0 | 0.026316 | 0.00000 | 0.0 | 0.0 | 0.0 | 0.043860 | 0.035088 | 0.0 | 0.0 | 0.008772 | 0.026316 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.052632 | 0.00000 | 0.000000 | 0.008772 | 0.0 | 0.00000 | 0.0 | 0.008772 | 0.087719 | 0.0 | 0.0 | 0.017544 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.008772 | 0.017544 | 0.0 | 0.0 | 0.00000 | 0.00000 | 0.026316 | 0.00000 | 0.0 | 0.061404 | 0.008772 | 0.008772 | 0.070175 | 0.043860 | 0.00000 | 0.0 | 0.00000 | 0.0 | 0.043860 | 0.017544 | 0.017544 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.0 | 0.008772 | 0.026316 | 0.0 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.008772 | 0.026316 | 114.0 | 0.0 | 17.0 |
4 | -1.0 | 5.74 | 0.0 | 97.0 | 0.0 | 1.0 | -1.0 | -1.0 | 0.01754 | 0.62500 | 0.33114 | 0.15117 | 0.0 | 0.0 | 0.0 | 0.0 | 2785.0 | 697.0 | 697.0 | 0.51657 | 0.0 | 0.00000 | 0.98930 | 0.0 | 110.0 | 110.0 | 1.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 253.0 | 105.0 | 421.0 | 421.0 | 421.0 | 421.0 | -1.0 | -1.0 | 0.0 | -1.0 | 128.0 | 128.0 | 0.0 | -1.0 | 0.0 | -1.0 | 0.0 | 59.0 | 22.0 | 0.0 | -4.45843 | -3.21992 | -5.08907 | -6.78496 | 99.0 | -4.95631 | -5.04582 | 0.0 | 1.0 | 0.0 | 9.16308 | -1.00000 | 9.16308 | 1.0 | 0.0 | 1.0 | 1.0 | 0.0 | 2.0 | 0.0 | 4.0 | 7.0 | 8.0 | 0.0 | 2.0 | 8.0 | 0.0 | 5.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.00641 | 0.0 | 0.000000 | 0.00641 | 0.000000 | 0.0 | 0.0 | 0.121795 | 0.0 | 0.00641 | 0.000000 | 0.006410 | 0.000000 | 0.0 | 0.0 | 0.00641 | 0.0 | 0.000000 | 0.0 | 0.0 | 0.00641 | 0.038462 | 0.0 | 0.0 | 0.019231 | 0.0 | 0.025641 | 0.083333 | 0.00641 | 0.000000 | 0.00641 | 0.0 | 0.000000 | 0.00641 | 0.0 | 0.0 | 0.0 | 0.121795 | 0.006410 | 0.0 | 0.0 | 0.012821 | 0.038462 | 0.0 | 0.019231 | 0.0 | 0.0 | 0.000000 | 0.00641 | 0.019231 | 0.006410 | 0.0 | 0.00641 | 0.0 | 0.012821 | 0.000000 | 0.0 | 0.0 | 0.000000 | 0.0 | 0.032051 | 0.0 | 0.0 | 0.025641 | 0.000000 | 0.0 | 0.0 | 0.00641 | 0.00641 | 0.032051 | 0.00641 | 0.0 | 0.025641 | 0.012821 | 0.000000 | 0.000000 | 0.083333 | 0.00641 | 0.0 | 0.00641 | 0.0 | 0.019231 | 0.006410 | 0.000000 | 0.0 | 0.012821 | 0.0 | 0.0 | 0.0 | 0.000000 | 0.000000 | 0.0 | 0.012821 | 0.0 | 0.076923 | 0.012821 | 0.000000 | 0.019231 | 156.0 | 1.0 | 0.0 |
X1[:5]
gender | ActivityIndex | Alcohol | AverageFriendsRegMonthDelta | CanSeeAllPosts | CanSeeAllPosts_CanPost | EducationType | EducationType_VK | FriendsAverageDeletedAccounts | FriendsAverageHasHighSchools | FriendsAverageHasJobs | FriendsPerDay | HasPhone | HasSkype | HasTwitter | LastActivity | LifeTime | MaxRegDate | MinRegDate | MobileUsageAll | MobileUsageAndroid | MobileUsageIPad | MobileUsageIphone | MobileUsageWinPhone | MonthsFromMaxRegDate | MonthsFromMinRegDate | NumberOfAccounts | NumberOfAccountsMOIMIR | NumberOfAccountsODKL | NumberOfAccountsVK | NumberOfAdvancedSchools | NumberOfChilds | NumberOfChilds_VK | NumberOfCompanies | NumberOfDeletedAccounts | NumberOfDeletedAccounts_VK | NumberOfEntrance | NumberOfFollowers | NumberOfFriendsMax | NumberOfFriendsMax_VK | NumberOfFriendsMin | NumberOfFriendsSum | NumberOfGroupsMax | NumberOfGroupsSum_OK | NumberOfHighSchools | NumberOfNotesMax | NumberOfPhotosMax | NumberOfPhotosMin_VK | NumberOfPrivateAccounts | NumberOfPrivateAccounts_OK | NumberOfRelatives | NumberOfRelatives_OK | NumberOfSchools | NumberOfSubscriptions | NumberOfVideos | Relation | SaScore1 | SaScore2 | SaScore5 | SaScore6 | SaScore7 | SaScoreFraud | SaScoreSocial | Smoking | UseScreenName | Worldviews | YearsSinceMinRegDate | YearsSinceMinRegDate_OK | YearsSinceMinRegDate_VK | NumberOfAccountsFB | Interest_1 | Interest_2 | Interest_3 | Interest_4 | Interest_5 | Interest_6 | Interest_7 | Interest_8 | Interest_9 | Interest_10 | Interest_11 | Interest_12 | Interest_13 | Interest_14 | Interest_15 | Interest_16 | Interest_17 | Interest_18 | Interest_19 | Interest_20 | Interest_21 | Interest_22 | Interest_23 | Interest_24 | Interest_25 | TD_acquaint_communic | TD_acquaintances | TD_active_rest | TD_ad | TD_animals | TD_anime_hentai | TD_architecture | TD_art_design | TD_beautiful_girls | TD_beauty | TD_books | TD_business | TD_cars | TD_cartoons | TD_caucasian | TD_celebrities | TD_children | TD_cognitive | TD_companies | TD_computers | TD_cookery | TD_design | TD_design_renovation | TD_diets | TD_do_yourself | TD_electronics_electrappliances | TD_entertainment | TD_family_home | TD_fashion | TD_finance | TD_fitness | TD_football | TD_foreign_lang | TD_gadgets | TD_games | TD_geopolitics_economy | TD_gif | TD_goods_services | TD_hobbies | TD_horoscope | TD_horror | TD_humour | TD_images | TD_insurance | TD_interior | TD_kazakhstan | TD_landscape_design | TD_literature_poetry | TD_mass_media | TD_mass_media_ad_PR | TD_men_communities | TD_mobile_internet | TD_motivation | TD_moto | TD_movies | TD_music | TD_names | TD_nature | TD_nature_and_travel | TD_nostalgia | TD_other_services | TD_parents_communities | TD_philosophy_esoterics | TD_photo | TD_poetry | TD_politics | TD_professions | TD_proposed_news | TD_real_estate | TD_regional_communities | TD_relations | TD_religion | TD_rest | TD_russia | TD_science | TD_science_education | TD_shops | TD_slimming | TD_society | TD_soft | TD_softporno_porno | TD_sport_and_health | TD_sport_diet | TD_sport_other | TD_start_ups | TD_tech_IT | TD_technologies | TD_thoughts_ideas | TD_tourism | TD_travel | TD_TV | TD_ukraine | TD_video | TD_way_of_life | TD_wedding_communities | TD_women_communities | TD_workout | TD_youth_communities | TD_sum | animal_owner | deltatime | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | -1.0 | -2.00 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.00000 | -2.00000 | -2.00000 | -2.00000 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | 1283.0 | 1283.0 | -2.00000 | -2.00000 | -2.0 | -2.00000 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.00000 | -2.00000 | -2.00000 | -2.00000 | -2.0 | -2.00000 | -2.00000 | -2.0 | -2.0 | -2.0 | -2.00000 | -2.00000 | -2.0000 | 1.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.0 | -2.000000 | -2.0 | -2.0000 | -2.000000 | -2.0000 | -2.000000 | -2.0 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.0000 | -2.0 | -2.000000 | -2.0 | -2.000000 | -2.0 | -2.0000 | -2.000000 | -2.000000 | -2.0000 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.0 | -2.0 | -2.0 | -2.0 | -2.000000 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.0000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.0000 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.0000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.0000 | -2.0000 | -2.0000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.000000 | -2.0 | -2.0 | 0.0 |
1 | 0.0 | 4.50 | 0.0 | 60.0 | 1.0 | 1.0 | -1.0 | -1.0 | 0.04138 | 0.18506 | 0.07356 | 0.14374 | 1.0 | 0.0 | 0.0 | 1.0 | 2261.0 | 2992.0 | 1276.0 | 0.95402 | 0.91429 | 0.0 | 0.59036 | 0.0 | 34.0 | 91.0 | 7.0 | 1.0 | 3.0 | 3.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 151.0 | 2.0 | 270.0 | 111.0 | 0.0 | 826.0 | 12.0 | 12.0 | 0.0 | 79.0 | 20.0 | 1.0 | 1.0 | 1.0 | 16.0 | 28.0 | 2.0 | 6.0 | -1.0 | 6.0 | -1.90297 | -1.72063 | -3.57124 | -5.03950 | 155.0 | -2.39785 | -2.97663 | 0.0 | 1.0 | 0.0 | 7.57670 | 7.57379 | 7.5767 | 1.0 | 0.0 | 5.0 | 0.0 | 0.0 | 5.0 | 0.0 | 6.0 | 0.0 | 5.0 | 2.0 | 6.0 | 0.0 | 5.0 | 0.0 | 7.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.011765 | 0.0 | 0.0000 | 0.011765 | 0.0000 | 0.011765 | 0.0 | 0.047059 | 0.0 | 0.000000 | 0.000000 | 0.023529 | 0.000000 | 0.0000 | 0.0 | 0.000000 | 0.0 | 0.023529 | 0.0 | 0.0000 | 0.011765 | 0.011765 | 0.0000 | 0.000000 | 0.000000 | 0.0 | 0.047059 | 0.000000 | 0.011765 | 0.023529 | 0.000000 | 0.011765 | 0.011765 | 0.0 | 0.0 | 0.0 | 0.0 | 0.105882 | 0.000000 | 0.000000 | 0.0 | 0.035294 | 0.000000 | 0.0 | 0.000000 | 0.0 | 0.023529 | 0.000000 | 0.0000 | 0.011765 | 0.011765 | 0.011765 | 0.000000 | 0.0000 | 0.035294 | 0.000000 | 0.0 | 0.011765 | 0.011765 | 0.0000 | 0.011765 | 0.000000 | 0.000000 | 0.011765 | 0.000000 | 0.0 | 0.023529 | 0.011765 | 0.011765 | 0.082353 | 0.000000 | 0.0 | 0.000000 | 0.035294 | 0.000000 | 0.058824 | 0.082353 | 0.000000 | 0.023529 | 0.023529 | 0.000000 | 0.023529 | 0.0000 | 0.0000 | 0.0000 | 0.035294 | 0.023529 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.0 | 0.000000 | 0.000000 | 0.0 | 0.011765 | 0.011765 | 0.000000 | 85.0 | 0.0 | 1716.0 |
2 | -1.0 | -1.00 | 0.0 | 87.0 | -1.0 | -1.0 | 12.0 | -1.0 | 0.03125 | 0.17188 | 0.03125 | 0.01774 | 0.0 | 0.0 | 0.0 | 21.0 | 1003.0 | 2390.0 | 815.0 | -1.00000 | -1.00000 | -1.0 | -1.00000 | -1.0 | 54.0 | 106.0 | 3.0 | 1.0 | 2.0 | 0.0 | 0.0 | 0.0 | -1.0 | 0.0 | 0.0 | -1.0 | -1.0 | -1.0 | 44.0 | -1.0 | 1.0 | 61.0 | -1.0 | -1.0 | 2.0 | 1.0 | 32.0 | -1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | -1.0 | -1.0 | 0.0 | -3.57804 | -1.89699 | -4.78375 | -6.30990 | 124.0 | -3.51037 | -4.46552 | 0.0 | 0.0 | 0.0 | 8.83984 | 4.80161 | -1.0000 | -2.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -2.000000 | -2.0 | -2.0000 | -2.000000 | -2.0000 | -2.000000 | -2.0 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.0000 | -2.0 | -2.000000 | -2.0 | -2.000000 | -2.0 | -2.0000 | -2.000000 | -2.000000 | -2.0000 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.0 | -2.0 | -2.0 | -2.0 | -2.000000 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.0000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.0000 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.0000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.0000 | -2.0000 | -2.0000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.000000 | -2.0 | -2.0 | 1575.0 |
3 | -1.0 | 5.79 | 0.0 | 85.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.02874 | 0.56897 | 0.21839 | 0.04461 | 0.0 | 0.0 | 0.0 | 0.0 | 2869.0 | 815.0 | 606.0 | 0.85795 | 0.00000 | 0.0 | 0.99338 | 0.0 | 106.0 | 113.0 | 2.0 | 1.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 137.0 | 121.0 | 128.0 | 128.0 | 6.0 | 134.0 | 1.0 | -1.0 | 2.0 | -1.0 | 371.0 | 371.0 | 0.0 | -1.0 | 2.0 | -1.0 | 2.0 | 167.0 | 1012.0 | 0.0 | -4.58583 | -3.21992 | -5.37974 | -7.15375 | 76.0 | -4.51827 | -5.15266 | 0.0 | 1.0 | 0.0 | 9.41240 | -1.00000 | 9.4124 | -2.0 | 1.0 | 3.0 | 2.0 | 3.0 | 4.0 | 3.0 | 6.0 | 6.0 | 7.0 | 8.0 | 4.0 | 2.0 | 5.0 | 2.0 | 6.0 | 0.0 | 7.0 | 5.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.006502 | 0.0 | 0.0013 | 0.002601 | 0.0013 | 0.001300 | 0.0 | 0.049415 | 0.0 | 0.019506 | 0.016905 | 0.046814 | 0.003901 | 0.0013 | 0.0 | 0.010403 | 0.0 | 0.046814 | 0.0 | 0.0013 | 0.006502 | 0.013004 | 0.0013 | 0.002601 | 0.007802 | 0.0 | 0.074122 | 0.015605 | 0.009103 | 0.006502 | 0.003901 | 0.000000 | 0.014304 | 0.0 | 0.0 | 0.0 | 0.0 | 0.016905 | 0.011704 | 0.002601 | 0.0 | 0.015605 | 0.020806 | 0.0 | 0.007802 | 0.0 | 0.000000 | 0.023407 | 0.0013 | 0.014304 | 0.019506 | 0.000000 | 0.039012 | 0.0013 | 0.040312 | 0.014304 | 0.0 | 0.002601 | 0.018205 | 0.0013 | 0.003901 | 0.005202 | 0.016905 | 0.007802 | 0.006502 | 0.0 | 0.009103 | 0.006502 | 0.002601 | 0.045514 | 0.009103 | 0.0 | 0.020806 | 0.016905 | 0.002601 | 0.066320 | 0.006502 | 0.003901 | 0.009103 | 0.005202 | 0.002601 | 0.014304 | 0.0013 | 0.0013 | 0.0013 | 0.006502 | 0.002601 | 0.005202 | 0.003901 | 0.014304 | 0.010403 | 0.0 | 0.005202 | 0.014304 | 0.0 | 0.033810 | 0.003901 | 0.019506 | 769.0 | 1.0 | 209.0 |
4 | -1.0 | -1.00 | 0.0 | -1.0 | -1.0 | -1.0 | 13.0 | -1.0 | 0.00000 | 0.47059 | 0.11765 | -1.00000 | 0.0 | 0.0 | 0.0 | 74.0 | -1.0 | 1204.0 | 1204.0 | -1.00000 | -1.00000 | -1.0 | -1.00000 | -1.0 | 93.0 | 93.0 | 1.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | -1.0 | 0.0 | 0.0 | -1.0 | -1.0 | -1.0 | 15.0 | -1.0 | 15.0 | 15.0 | 20.0 | -1.0 | 2.0 | -1.0 | 1.0 | -1.0 | 0.0 | -1.0 | 0.0 | -1.0 | 1.0 | -1.0 | 2.0 | 0.0 | -4.70057 | -2.19835 | -5.85025 | -7.95237 | 91.0 | -5.07033 | -4.69854 | 0.0 | 0.0 | 0.0 | 7.77480 | -1.00000 | -1.0000 | -2.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -1.0 | -2.000000 | -2.0 | -2.0000 | -2.000000 | -2.0000 | -2.000000 | -2.0 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.0000 | -2.0 | -2.000000 | -2.0 | -2.000000 | -2.0 | -2.0000 | -2.000000 | -2.000000 | -2.0000 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.0 | -2.0 | -2.0 | -2.0 | -2.000000 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.0000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.0000 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.0000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.0000 | -2.0000 | -2.0000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.0 | -2.000000 | -2.000000 | -2.000000 | -2.0 | -2.0 | 0.0 |
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor(n_estimators=1000,
criterion='mse', max_depth=None,
min_samples_split=200, min_samples_leaf=100,
min_weight_fraction_leaf=0.0,
max_features=20, max_leaf_nodes=None,
min_impurity_split=1e-07, bootstrap=True,
oob_score=False, n_jobs=-1, random_state=10, # None
verbose=0, warm_start=False)
rf.fit(X, y)
a1_rf = rf.predict(X1)
a2_rf = rf.predict(X2)
from xgboost import XGBRegressor
#a1_gbm = 0
a2_gbm = 0
for t in range(10):
gbm = XGBRegressor(max_depth=4, learning_rate=0.1,
n_estimators=100, silent=True,
objective='reg:linear', gamma=0.6,
min_child_weight=5, max_delta_step=0,
subsample=0.8, colsample_bytree=0.8,
colsample_bylevel=1, reg_alpha=0,
reg_lambda=1, scale_pos_weight=1, base_score=0.5,
seed=t, missing=None)
gbm.fit(X, y)
a1_gbm += gbm.predict(X1)
a2_gbm += gbm.predict(X2)
print (t)
#a1_gbm /= 5
a2_gbm /= 10
/home/alexander/anaconda3/lib/python3.5/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20. "This module will be removed in 0.20.", DeprecationWarning)
0 1 2 3 4 5 6 7 8 9
figsize(6, 5)
plt.scatter(a1_gbm, a1_rf)
<matplotlib.collections.PathCollection at 0x7f7f895da3c8>
import lightgbm as lgb
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
lgbm = lgb.LGBMRegressor(learning_rate=0.1, n_estimators=50, nthread=-1, seed=0)
lgbm.fit(X, y)
a1_lgbm = lgbm.predict(X1)
a2_lgbm = lgbm.predict(X2)
plt.scatter(a1_rf, a1_lgbm, 10, c ='#0000AA')
<matplotlib.collections.PathCollection at 0x7f503d09ac50>
plt.scatter(a2_rf, a2_lgbm, 10, c ='#0000AA')
<matplotlib.collections.PathCollection at 0x7f503e9dbe10>
# a1 = 0.2 * a1_rf + 0.8 * a1_gbm
# a2 = 0.2 * a2_rf + 0.8 * a2_gbm
a1 = 0.2 * a1_rf + 0.8 * a1_lgbm
a2 = 0.2 * a2_rf + 0.8 * a2_lgbm
min(a1), max(a1), min(a1), max(a2)
(0.56006001304460185, 6.1910533431351009, 0.56006001304460185, 6.3666864883953096)
max(np.exp(a2) - 1.0)
581.12574924935996
df1 = pd.DataFrame({'ID': day1_ids, 'NetSales_Forecast': ((np.exp(a1) - 1.0)), 'Bid': (make_rates(a1))})
# df1 = df1.astype(int)
df1.to_excel('ans_dyakonov_day1_post.xlsx', index=False, columns=['ID', 'NetSales_Forecast', 'Bid'])
df1[:5]
Bid | ID | NetSales_Forecast | |
---|---|---|---|
0 | 192.205029 | 390004.0 | 20.042152 |
1 | 9.543269 | 390010.0 | 3.507976 |
2 | 124.289713 | 390016.0 | 15.434077 |
3 | 256.170018 | 390022.0 | 36.039177 |
4 | 213.840698 | 390028.0 | 20.102014 |
df2 = pd.DataFrame({'ID': day2_ids, 'NetSales_Forecast': ((np.exp(a2) - 1.0)), 'Bid': (make_rates(a2))})
# df2 = df2.astype(int)
df2.to_excel('ans_dyakonov_day2_post.xlsx', index=False, columns=['ID', 'NetSales_Forecast', 'Bid'])
df2[:5]
Bid | ID | NetSales_Forecast | |
---|---|---|---|
0 | 123.600448 | 520001.0 | 15.652646 |
1 | 82.177669 | 520007.0 | 11.477249 |
2 | 72.586258 | 520013.0 | 10.330755 |
3 | 127.380551 | 520019.0 | 16.292125 |
4 | 220.999519 | 520025.0 | 22.989423 |
tmp1 = pd.read_excel('dyakonov_day1.xlsx')
tmp2 = pd.read_excel('dyakonov_day2.xlsx')
tmp2[:3]
ID | NetSales_Forecast | Bid | |
---|---|---|---|
0 | 520001 | 14 | 126 |
1 | 520007 | 12 | 101 |
2 | 520013 | 9 | 74 |
plt.scatter(tmp1.NetSales_Forecast, df1.NetSales_Forecast)
<matplotlib.collections.PathCollection at 0x7f50383b7898>
plt.scatter(tmp1.Bid, df1.Bid)
<matplotlib.collections.PathCollection at 0x7f503eaf9400>
plt.scatter(tmp2.NetSales_Forecast, df2.NetSales_Forecast)
<matplotlib.collections.PathCollection at 0x7f50337a7be0>
plt.scatter(tmp2.Bid, df2.Bid)
<matplotlib.collections.PathCollection at 0x7f7f90689b70>
def my_traintestsplit(X, y, p=0.25):
n = X.shape[0]
nn = int(np.round((1-p) * n))
return (X[:nn], y[:nn], X[nn:], y[nn:])
X1, y1, X2, y2 = my_traintestsplit(X, y)
print (X.shape, X1.shape, X2.shape, y.shape, y1.shape, y2.shape)
(390000, 196) (292500, 196) (97500, 196) (390000,) (292500,) (97500,)
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor(n_estimators=400,
criterion='mse', max_depth=None,
min_samples_split=200, min_samples_leaf=100,
min_weight_fraction_leaf=0.0,
max_features=20, max_leaf_nodes=None,
min_impurity_split=1e-07, bootstrap=True,
oob_score=False, n_jobs=-1, random_state=10, # None
verbose=0, warm_start=False)
rf.fit(X1, y1)
a_rf = rf.predict(X2)
from xgboost import XGBRegressor
a_gbm = 0
for t in range(5):
gbm = XGBRegressor(max_depth=4, learning_rate=0.1,
n_estimators=100, silent=True,
objective='reg:linear', gamma=0.6,
min_child_weight=5, max_delta_step=0,
subsample=0.8, colsample_bytree=0.8,
colsample_bylevel=1, reg_alpha=0,
reg_lambda=1, scale_pos_weight=1, base_score=0.5,
seed=t, missing=None)
gbm.fit(X1, y1)
a_gbm += gbm.predict(X2)
print (t)
a_gbm /= 5
/home/alexander/anaconda3/lib/python3.5/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20. "This module will be removed in 0.20.", DeprecationWarning)
0 1 2 3 4
a = 0.2 * a_rf + 0.8 * a_gbm
from scipy.integrate import trapz, simps
def hackathone_metric(df, do_draw=True):
'''На фход подается словарь с ответами алгоритма(поле pred)
и фактическими значениями(поле test):
df = {'pred': [2,3,55,63,1,2,100,100,10],'test': [1,2,3,4,5,0,99,121,14]}
'''
# Сортируем фактические продажи по возрастанию и считаем кумулятивную сумму – это нижняя линия
n = range(1, len(df['test'])+1)
c1 = np.cumsum(sorted(df['test'], reverse=False))
# Сортируем фактические продажи по убыванию и считаем кумулятивную сумму – это верхняя линия
c2 = np.cumsum(sorted(df['test'], reverse=True))
# Площадь от верхней линии к нижней линии
s1 = simps(c2, n) - simps(c1, n)
# Теперь берём пары предсказаний и фактических продаж
# Сортируем по возрастанию прогнозного значения
c3 = np.cumsum(list(zip(*sorted(zip(df['pred'],df['test']), key=lambda x: x[0])))[1])
s2 = simps(c3, n) - simps(c1, n)
if do_draw:
plt.plot(n,c1, label='Lower Line')
plt.plot(n,c2, label='Upper Line')
plt.plot(n,c3, label='Actual Line')
plt.legend()
return (1-s2/s1)
# Проверка метрики - на вход подается полностью случайный прогноз
# Ожидается, что искомая линия пройдет посередине
figsize(6, 5)
df = {'pred': a,'test': y2}
print ("Metric: ", hackathone_metric(df))
--------------------------------------------------------------------------- NameError Traceback (most recent call last) <ipython-input-19-068ac80f2d11> in <module>() 35 figsize(6, 5) 36 ---> 37 df = {'pred': a,'test': y2} 38 print ("Metric: ", hackathone_metric(df)) NameError: name 'a' is not defined
df = {'pred': (np.round(np.exp(a) - 1.0)),'test': y2}
print ("Metric: ", hackathone_metric(df))
Metric: 0.621995550395
linsp = np.linspace(0, 1, 20)
e = []
for alpha in linsp:
c = (alpha) * a_gbm + (1 - alpha) * a_rf
df = {'pred': c,'test': y2}
score = hackathone_metric(df, do_draw=False)
e.append(score)
print (alpha, score)
plt.plot(linsp, e)
0.0 0.620636518173 0.0526315789474 0.620832841581 0.105263157895 0.621046803985 0.157894736842 0.621295643409 0.210526315789 0.621516542351 0.263157894737 0.621659999279 0.315789473684 0.621783133362 0.368421052632 0.62191612081 0.421052631579 0.622068011626 0.473684210526 0.622145190819 0.526315789474 0.622153453196 0.578947368421 0.622204900991 0.631578947368 0.622256660367 0.684210526316 0.62218627885 0.736842105263 0.622107463885 0.789473684211 0.622116445131 0.842105263158 0.622126191117 0.894736842105 0.622081835009 0.947368421053 0.622031330678 1.0 0.621928255973
[<matplotlib.lines.Line2D at 0x7f7f9dfadc18>]