In [1]:
#Necesario para que los plots de matplotlib aparezcan en el notebook
%matplotlib inline
In [2]:
import matplotlib.pyplot as plt
import pandas as pd

carpeta_datos="G:/Mi unidad/DOCTORADO/Docencia/Curso_2018-2019/IN_2018-2019/data/"

#True si cada variable categórica se convierte en varias binarias (tantas como categorías),
#False si solo se convierte la categórica a numérica (ordinal)
binarizar = False

'''
devuelve un DataFrame, los valores perdidos notados como '?' se convierten a NaN,
si no, se consideraría '?' como una categoría más
'''

if not binarizar:
    bank_orig = pd.read_csv(carpeta_datos+'bank-additional-full.csv', delimiter=';')
else:
    bank_orig = pd.read_csv(carpeta_datos+'bank-additional-full.csv',na_values="?", delimiter=';')
    
print("------ Lista de características y tipos (object=categórica)")
print(bank_orig.dtypes,"\n")

print("------ Distribución de datos en la característica 'job'")
print(bank_orig['job'].value_counts(),"\n")

print("------ Y en la clase")
print(bank_orig['y'].value_counts(),"\n")
------ Lista de características y tipos (object=categórica)
age                 int64
job                object
marital            object
education          object
default            object
housing            object
loan               object
contact            object
month              object
day_of_week        object
duration            int64
campaign            int64
pdays               int64
previous            int64
poutcome           object
emp.var.rate      float64
cons.price.idx    float64
cons.conf.idx     float64
euribor3m         float64
nr.employed       float64
y                  object
dtype: object 

------ Distribución de datos en la característica 'job'
admin.           10422
blue-collar       9254
technician        6743
services          3969
management        2924
retired           1720
entrepreneur      1456
self-employed     1421
housemaid         1060
unemployed        1014
student            875
unknown            330
Name: job, dtype: int64 

------ Y en la clase
no     36548
yes     4640
Name: y, dtype: int64 

In [3]:
# Previsualizar el principio y fin del dataframe
bank_orig.all
Out[3]:
<bound method DataFrame.all of        age            job   marital            education  default  housing  \
0       56      housemaid   married             basic.4y       no       no   
1       57       services   married          high.school  unknown       no   
2       37       services   married          high.school       no      yes   
3       40         admin.   married             basic.6y       no       no   
4       56       services   married          high.school       no       no   
5       45       services   married             basic.9y  unknown       no   
6       59         admin.   married  professional.course       no       no   
7       41    blue-collar   married              unknown  unknown       no   
8       24     technician    single  professional.course       no      yes   
9       25       services    single          high.school       no      yes   
10      41    blue-collar   married              unknown  unknown       no   
11      25       services    single          high.school       no      yes   
12      29    blue-collar    single          high.school       no       no   
13      57      housemaid  divorced             basic.4y       no      yes   
14      35    blue-collar   married             basic.6y       no      yes   
15      54        retired   married             basic.9y  unknown      yes   
16      35    blue-collar   married             basic.6y       no      yes   
17      46    blue-collar   married             basic.6y  unknown      yes   
18      50    blue-collar   married             basic.9y       no      yes   
19      39     management    single             basic.9y  unknown       no   
20      30     unemployed   married          high.school       no       no   
21      55    blue-collar   married             basic.4y  unknown      yes   
22      55        retired    single          high.school       no      yes   
23      41     technician    single          high.school       no      yes   
24      37         admin.   married          high.school       no      yes   
25      35     technician   married    university.degree       no       no   
26      59     technician   married              unknown       no      yes   
27      39  self-employed   married             basic.9y  unknown       no   
28      54     technician    single    university.degree  unknown       no   
29      55        unknown   married    university.degree  unknown  unknown   
...    ...            ...       ...                  ...      ...      ...   
41158   35     technician  divorced             basic.4y       no       no   
41159   35     technician  divorced             basic.4y       no      yes   
41160   33         admin.   married    university.degree       no       no   
41161   33         admin.   married    university.degree       no      yes   
41162   60    blue-collar   married             basic.4y       no      yes   
41163   35     technician  divorced             basic.4y       no      yes   
41164   54         admin.   married  professional.course       no       no   
41165   38      housemaid  divorced    university.degree       no       no   
41166   32         admin.   married    university.degree       no       no   
41167   32         admin.   married    university.degree       no      yes   
41168   38   entrepreneur   married    university.degree       no       no   
41169   62       services   married          high.school       no      yes   
41170   40     management  divorced    university.degree       no      yes   
41171   33        student   married  professional.course       no      yes   
41172   31         admin.    single    university.degree       no      yes   
41173   62        retired   married    university.degree       no      yes   
41174   62        retired   married    university.degree       no      yes   
41175   34        student    single              unknown       no      yes   
41176   38      housemaid  divorced          high.school       no      yes   
41177   57        retired   married  professional.course       no      yes   
41178   62        retired   married    university.degree       no       no   
41179   64        retired  divorced  professional.course       no      yes   
41180   36         admin.   married    university.degree       no       no   
41181   37         admin.   married    university.degree       no      yes   
41182   29     unemployed    single             basic.4y       no      yes   
41183   73        retired   married  professional.course       no      yes   
41184   46    blue-collar   married  professional.course       no       no   
41185   56        retired   married    university.degree       no      yes   
41186   44     technician   married  professional.course       no       no   
41187   74        retired   married  professional.course       no      yes   

          loan    contact month day_of_week ...   campaign  pdays  previous  \
0           no  telephone   may         mon ...          1    999         0   
1           no  telephone   may         mon ...          1    999         0   
2           no  telephone   may         mon ...          1    999         0   
3           no  telephone   may         mon ...          1    999         0   
4          yes  telephone   may         mon ...          1    999         0   
5           no  telephone   may         mon ...          1    999         0   
6           no  telephone   may         mon ...          1    999         0   
7           no  telephone   may         mon ...          1    999         0   
8           no  telephone   may         mon ...          1    999         0   
9           no  telephone   may         mon ...          1    999         0   
10          no  telephone   may         mon ...          1    999         0   
11          no  telephone   may         mon ...          1    999         0   
12         yes  telephone   may         mon ...          1    999         0   
13          no  telephone   may         mon ...          1    999         0   
14          no  telephone   may         mon ...          1    999         0   
15         yes  telephone   may         mon ...          1    999         0   
16          no  telephone   may         mon ...          1    999         0   
17         yes  telephone   may         mon ...          1    999         0   
18         yes  telephone   may         mon ...          1    999         0   
19          no  telephone   may         mon ...          1    999         0   
20          no  telephone   may         mon ...          1    999         0   
21          no  telephone   may         mon ...          1    999         0   
22          no  telephone   may         mon ...          1    999         0   
23          no  telephone   may         mon ...          1    999         0   
24          no  telephone   may         mon ...          1    999         0   
25         yes  telephone   may         mon ...          1    999         0   
26          no  telephone   may         mon ...          1    999         0   
27          no  telephone   may         mon ...          1    999         0   
28          no  telephone   may         mon ...          2    999         0   
29     unknown  telephone   may         mon ...          1    999         0   
...        ...        ...   ...         ... ...        ...    ...       ...   
41158       no   cellular   nov         tue ...          1    999         0   
41159       no   cellular   nov         tue ...          1      9         4   
41160       no   cellular   nov         tue ...          1    999         0   
41161       no   cellular   nov         tue ...          1    999         1   
41162       no   cellular   nov         tue ...          2      4         1   
41163       no   cellular   nov         tue ...          3      4         2   
41164       no   cellular   nov         tue ...          2     10         1   
41165       no   cellular   nov         wed ...          2    999         0   
41166       no  telephone   nov         wed ...          1    999         1   
41167       no   cellular   nov         wed ...          3    999         0   
41168       no   cellular   nov         wed ...          2    999         0   
41169       no   cellular   nov         wed ...          5    999         0   
41170       no   cellular   nov         wed ...          2    999         4   
41171       no  telephone   nov         thu ...          1    999         0   
41172       no   cellular   nov         thu ...          1    999         0   
41173       no   cellular   nov         thu ...          1    999         2   
41174       no   cellular   nov         thu ...          1      1         6   
41175       no   cellular   nov         thu ...          1    999         2   
41176      yes   cellular   nov         thu ...          1    999         0   
41177       no   cellular   nov         thu ...          6    999         0   
41178       no   cellular   nov         thu ...          2      6         3   
41179       no   cellular   nov         fri ...          3    999         0   
41180       no   cellular   nov         fri ...          2    999         0   
41181       no   cellular   nov         fri ...          1    999         0   
41182       no   cellular   nov         fri ...          1      9         1   
41183       no   cellular   nov         fri ...          1    999         0   
41184       no   cellular   nov         fri ...          1    999         0   
41185       no   cellular   nov         fri ...          2    999         0   
41186       no   cellular   nov         fri ...          1    999         0   
41187       no   cellular   nov         fri ...          3    999         1   

          poutcome emp.var.rate  cons.price.idx  cons.conf.idx  euribor3m  \
0      nonexistent          1.1          93.994          -36.4      4.857   
1      nonexistent          1.1          93.994          -36.4      4.857   
2      nonexistent          1.1          93.994          -36.4      4.857   
3      nonexistent          1.1          93.994          -36.4      4.857   
4      nonexistent          1.1          93.994          -36.4      4.857   
5      nonexistent          1.1          93.994          -36.4      4.857   
6      nonexistent          1.1          93.994          -36.4      4.857   
7      nonexistent          1.1          93.994          -36.4      4.857   
8      nonexistent          1.1          93.994          -36.4      4.857   
9      nonexistent          1.1          93.994          -36.4      4.857   
10     nonexistent          1.1          93.994          -36.4      4.857   
11     nonexistent          1.1          93.994          -36.4      4.857   
12     nonexistent          1.1          93.994          -36.4      4.857   
13     nonexistent          1.1          93.994          -36.4      4.857   
14     nonexistent          1.1          93.994          -36.4      4.857   
15     nonexistent          1.1          93.994          -36.4      4.857   
16     nonexistent          1.1          93.994          -36.4      4.857   
17     nonexistent          1.1          93.994          -36.4      4.857   
18     nonexistent          1.1          93.994          -36.4      4.857   
19     nonexistent          1.1          93.994          -36.4      4.857   
20     nonexistent          1.1          93.994          -36.4      4.857   
21     nonexistent          1.1          93.994          -36.4      4.857   
22     nonexistent          1.1          93.994          -36.4      4.857   
23     nonexistent          1.1          93.994          -36.4      4.857   
24     nonexistent          1.1          93.994          -36.4      4.857   
25     nonexistent          1.1          93.994          -36.4      4.857   
26     nonexistent          1.1          93.994          -36.4      4.857   
27     nonexistent          1.1          93.994          -36.4      4.857   
28     nonexistent          1.1          93.994          -36.4      4.857   
29     nonexistent          1.1          93.994          -36.4      4.857   
...            ...          ...             ...            ...        ...   
41158  nonexistent         -1.1          94.767          -50.8      1.035   
41159      success         -1.1          94.767          -50.8      1.035   
41160  nonexistent         -1.1          94.767          -50.8      1.035   
41161      failure         -1.1          94.767          -50.8      1.035   
41162      success         -1.1          94.767          -50.8      1.035   
41163      success         -1.1          94.767          -50.8      1.035   
41164      success         -1.1          94.767          -50.8      1.035   
41165  nonexistent         -1.1          94.767          -50.8      1.030   
41166      failure         -1.1          94.767          -50.8      1.030   
41167  nonexistent         -1.1          94.767          -50.8      1.030   
41168  nonexistent         -1.1          94.767          -50.8      1.030   
41169  nonexistent         -1.1          94.767          -50.8      1.030   
41170      failure         -1.1          94.767          -50.8      1.030   
41171  nonexistent         -1.1          94.767          -50.8      1.031   
41172  nonexistent         -1.1          94.767          -50.8      1.031   
41173      failure         -1.1          94.767          -50.8      1.031   
41174      success         -1.1          94.767          -50.8      1.031   
41175      failure         -1.1          94.767          -50.8      1.031   
41176  nonexistent         -1.1          94.767          -50.8      1.031   
41177  nonexistent         -1.1          94.767          -50.8      1.031   
41178      success         -1.1          94.767          -50.8      1.031   
41179  nonexistent         -1.1          94.767          -50.8      1.028   
41180  nonexistent         -1.1          94.767          -50.8      1.028   
41181  nonexistent         -1.1          94.767          -50.8      1.028   
41182      success         -1.1          94.767          -50.8      1.028   
41183  nonexistent         -1.1          94.767          -50.8      1.028   
41184  nonexistent         -1.1          94.767          -50.8      1.028   
41185  nonexistent         -1.1          94.767          -50.8      1.028   
41186  nonexistent         -1.1          94.767          -50.8      1.028   
41187      failure         -1.1          94.767          -50.8      1.028   

       nr.employed    y  
0           5191.0   no  
1           5191.0   no  
2           5191.0   no  
3           5191.0   no  
4           5191.0   no  
5           5191.0   no  
6           5191.0   no  
7           5191.0   no  
8           5191.0   no  
9           5191.0   no  
10          5191.0   no  
11          5191.0   no  
12          5191.0   no  
13          5191.0   no  
14          5191.0   no  
15          5191.0   no  
16          5191.0   no  
17          5191.0   no  
18          5191.0   no  
19          5191.0   no  
20          5191.0   no  
21          5191.0   no  
22          5191.0   no  
23          5191.0   no  
24          5191.0   no  
25          5191.0   no  
26          5191.0   no  
27          5191.0   no  
28          5191.0   no  
29          5191.0   no  
...            ...  ...  
41158       4963.6  yes  
41159       4963.6  yes  
41160       4963.6  yes  
41161       4963.6   no  
41162       4963.6   no  
41163       4963.6  yes  
41164       4963.6  yes  
41165       4963.6  yes  
41166       4963.6  yes  
41167       4963.6   no  
41168       4963.6   no  
41169       4963.6   no  
41170       4963.6   no  
41171       4963.6  yes  
41172       4963.6  yes  
41173       4963.6  yes  
41174       4963.6  yes  
41175       4963.6   no  
41176       4963.6   no  
41177       4963.6   no  
41178       4963.6  yes  
41179       4963.6   no  
41180       4963.6   no  
41181       4963.6  yes  
41182       4963.6   no  
41183       4963.6  yes  
41184       4963.6   no  
41185       4963.6   no  
41186       4963.6  yes  
41187       4963.6   no  

[41188 rows x 21 columns]>
In [4]:
bank_orig
Out[4]:
age job marital education default housing loan contact month day_of_week ... campaign pdays previous poutcome emp.var.rate cons.price.idx cons.conf.idx euribor3m nr.employed y
0 56 housemaid married basic.4y no no no telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
1 57 services married high.school unknown no no telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
2 37 services married high.school no yes no telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
3 40 admin. married basic.6y no no no telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
4 56 services married high.school no no yes telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
5 45 services married basic.9y unknown no no telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
6 59 admin. married professional.course no no no telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
7 41 blue-collar married unknown unknown no no telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
8 24 technician single professional.course no yes no telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
9 25 services single high.school no yes no telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
10 41 blue-collar married unknown unknown no no telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
11 25 services single high.school no yes no telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
12 29 blue-collar single high.school no no yes telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
13 57 housemaid divorced basic.4y no yes no telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
14 35 blue-collar married basic.6y no yes no telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
15 54 retired married basic.9y unknown yes yes telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
16 35 blue-collar married basic.6y no yes no telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
17 46 blue-collar married basic.6y unknown yes yes telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
18 50 blue-collar married basic.9y no yes yes telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
19 39 management single basic.9y unknown no no telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
20 30 unemployed married high.school no no no telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
21 55 blue-collar married basic.4y unknown yes no telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
22 55 retired single high.school no yes no telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
23 41 technician single high.school no yes no telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
24 37 admin. married high.school no yes no telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
25 35 technician married university.degree no no yes telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
26 59 technician married unknown no yes no telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
27 39 self-employed married basic.9y unknown no no telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
28 54 technician single university.degree unknown no no telephone may mon ... 2 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
29 55 unknown married university.degree unknown unknown unknown telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
41158 35 technician divorced basic.4y no no no cellular nov tue ... 1 999 0 nonexistent -1.1 94.767 -50.8 1.035 4963.6 yes
41159 35 technician divorced basic.4y no yes no cellular nov tue ... 1 9 4 success -1.1 94.767 -50.8 1.035 4963.6 yes
41160 33 admin. married university.degree no no no cellular nov tue ... 1 999 0 nonexistent -1.1 94.767 -50.8 1.035 4963.6 yes
41161 33 admin. married university.degree no yes no cellular nov tue ... 1 999 1 failure -1.1 94.767 -50.8 1.035 4963.6 no
41162 60 blue-collar married basic.4y no yes no cellular nov tue ... 2 4 1 success -1.1 94.767 -50.8 1.035 4963.6 no
41163 35 technician divorced basic.4y no yes no cellular nov tue ... 3 4 2 success -1.1 94.767 -50.8 1.035 4963.6 yes
41164 54 admin. married professional.course no no no cellular nov tue ... 2 10 1 success -1.1 94.767 -50.8 1.035 4963.6 yes
41165 38 housemaid divorced university.degree no no no cellular nov wed ... 2 999 0 nonexistent -1.1 94.767 -50.8 1.030 4963.6 yes
41166 32 admin. married university.degree no no no telephone nov wed ... 1 999 1 failure -1.1 94.767 -50.8 1.030 4963.6 yes
41167 32 admin. married university.degree no yes no cellular nov wed ... 3 999 0 nonexistent -1.1 94.767 -50.8 1.030 4963.6 no
41168 38 entrepreneur married university.degree no no no cellular nov wed ... 2 999 0 nonexistent -1.1 94.767 -50.8 1.030 4963.6 no
41169 62 services married high.school no yes no cellular nov wed ... 5 999 0 nonexistent -1.1 94.767 -50.8 1.030 4963.6 no
41170 40 management divorced university.degree no yes no cellular nov wed ... 2 999 4 failure -1.1 94.767 -50.8 1.030 4963.6 no
41171 33 student married professional.course no yes no telephone nov thu ... 1 999 0 nonexistent -1.1 94.767 -50.8 1.031 4963.6 yes
41172 31 admin. single university.degree no yes no cellular nov thu ... 1 999 0 nonexistent -1.1 94.767 -50.8 1.031 4963.6 yes
41173 62 retired married university.degree no yes no cellular nov thu ... 1 999 2 failure -1.1 94.767 -50.8 1.031 4963.6 yes
41174 62 retired married university.degree no yes no cellular nov thu ... 1 1 6 success -1.1 94.767 -50.8 1.031 4963.6 yes
41175 34 student single unknown no yes no cellular nov thu ... 1 999 2 failure -1.1 94.767 -50.8 1.031 4963.6 no
41176 38 housemaid divorced high.school no yes yes cellular nov thu ... 1 999 0 nonexistent -1.1 94.767 -50.8 1.031 4963.6 no
41177 57 retired married professional.course no yes no cellular nov thu ... 6 999 0 nonexistent -1.1 94.767 -50.8 1.031 4963.6 no
41178 62 retired married university.degree no no no cellular nov thu ... 2 6 3 success -1.1 94.767 -50.8 1.031 4963.6 yes
41179 64 retired divorced professional.course no yes no cellular nov fri ... 3 999 0 nonexistent -1.1 94.767 -50.8 1.028 4963.6 no
41180 36 admin. married university.degree no no no cellular nov fri ... 2 999 0 nonexistent -1.1 94.767 -50.8 1.028 4963.6 no
41181 37 admin. married university.degree no yes no cellular nov fri ... 1 999 0 nonexistent -1.1 94.767 -50.8 1.028 4963.6 yes
41182 29 unemployed single basic.4y no yes no cellular nov fri ... 1 9 1 success -1.1 94.767 -50.8 1.028 4963.6 no
41183 73 retired married professional.course no yes no cellular nov fri ... 1 999 0 nonexistent -1.1 94.767 -50.8 1.028 4963.6 yes
41184 46 blue-collar married professional.course no no no cellular nov fri ... 1 999 0 nonexistent -1.1 94.767 -50.8 1.028 4963.6 no
41185 56 retired married university.degree no yes no cellular nov fri ... 2 999 0 nonexistent -1.1 94.767 -50.8 1.028 4963.6 no
41186 44 technician married professional.course no no no cellular nov fri ... 1 999 0 nonexistent -1.1 94.767 -50.8 1.028 4963.6 yes
41187 74 retired married professional.course no yes no cellular nov fri ... 3 999 1 failure -1.1 94.767 -50.8 1.028 4963.6 no

41188 rows × 21 columns

In [5]:
bank_orig.columns
Out[5]:
Index(['age', 'job', 'marital', 'education', 'default', 'housing', 'loan',
       'contact', 'month', 'day_of_week', 'duration', 'campaign', 'pdays',
       'previous', 'poutcome', 'emp.var.rate', 'cons.price.idx',
       'cons.conf.idx', 'euribor3m', 'nr.employed', 'y'],
      dtype='object')
In [14]:
#Visualización del paquete pandas: https://pandas.pydata.org/pandas-docs/stable/visualization.html
#Algunos ejemplos

#bank_orig.plot.hist(x='age', y='euribor3m',bins=20)

bank_orig['age'].plot.hist(bins=20)
Out[14]:
<matplotlib.axes._subplots.AxesSubplot at 0x1e4e2b1ba58>
In [7]:
bank_orig.boxplot(showfliers=False)
Out[7]:
<matplotlib.axes._subplots.AxesSubplot at 0x249a3957cf8>
In [8]:
bank_orig.boxplot(column=['age'], showfliers=False)
Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0x249a3d204e0>
In [19]:
# Agrupar por los valores de otro atributo
bank_orig.boxplot(column=['age'], by='marital', showfliers=False)
Out[19]:
<matplotlib.axes._subplots.AxesSubplot at 0x1e4e2bbacc0>
In [20]:
# gráfico de barras horizontales con la proporción de cada clase
plt.figure(1) # Crea una figura con un solo gráfico
plt.clf() # Limpia la figura
import seaborn as sns # Librería de visualización de Python basada en matplotlib
ax = sns.countplot(y="y", data=bank_orig, color="c");
ncount = bank_orig.shape[0]
for p in ax.patches:
    val_x=p.get_bbox().get_points()[:,0]
    val_y=p.get_bbox().get_points()[1,1]
    ax.annotate('{:.0f} ({:.1f}%)'.format(val_x[1], 100.*val_x[1]/ncount), (val_x.mean(), (val_y-0.4)), ha='center', va='center')
In [21]:
# gráfico de barras verticales con la proporción de cada clase
plt.figure(1) # Crea una figura con un solo gráfico
plt.clf() # Limpia la figura
import seaborn as sns # Librería de visualización de Python basada en matplotlib
ax = sns.countplot(x="y", data=bank_orig, color="c"); # Pinta el gráfico de barras básico
ncount = bank_orig.shape[0] # Total instancias
for p in ax.patches: # Para cada una de las barras
    # p.get_bbox().get_points() => puntos que definen la barra (esquina inferior izda y superior dcha)
    val_x=p.get_bbox().get_points()[:,0] 
    val_y=p.get_bbox().get_points()[1,1] 
    ax.annotate('{:.0f} ({:.1f}%)'.format(val_y, 100.*val_y/ncount), (val_x.mean(), (val_y/2.)), ha='center', va='center')
In [30]:
print("------ Preparando el scatter matrix...")
plt.figure(2)
plt.clf()
# para scatter matrix, se convierten las variables categóricas a numéricas
bank_int = bank_orig
# pipe aplica una función (lambda)
char_cols = bank_int.dtypes.pipe(lambda x: x[x == 'object']).index #lista de columnas con var. categóticas (las de tipo 'object')

for c in char_cols:
    bank_int[c] = pd.factorize(bank_int[c])[0]
    
# factorize: codifica el objeto como un enumerado o una variable categórica. Es útil para obtener una representación numérica de un array cuando lo único que importa es identificar valores distintos

lista_vars = list(bank_int)
lista_vars.remove('y') #excluimos la columna 'y' del plot
#se genera el scatter matrix
sns.set()

sns_plot = sns.pairplot(bank_int, vars=lista_vars, hue="y", diag_kind="kde") #en hue indicamos que la columna 'y' define los colores
# diag_kind tipo de gráficos en la diagonal

sns_plot.savefig("bank_scatter_plot.png")
print("")
------ Preparando el scatter matrix...
C:\Users\elena\Anaconda3\lib\site-packages\scipy\stats\stats.py:1713: FutureWarning: Using a non-tuple sequence for multidimensional indexing is deprecated; use `arr[tuple(seq)]` instead of `arr[seq]`. In the future this will be interpreted as an array index, `arr[np.array(seq)]`, which will result either in an error or a different result.
  return np.add.reduce(sorted[indexer] * weights, axis=axis) / sumval
Out[30]:
'\nsns_plot.savefig("bank_scatter_plot.png")\nprint("")\n'
<Figure size 432x288 with 0 Axes>