%load_ext autoreload
%autoreload 2
import sys
sys.path.append("..")
from optimus import Optimus
# Create optimus
op = Optimus()
C:\Users\argenisleon\Anaconda3\lib\site-packages\socks.py:58: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated, and in 3.8 it will stop working from collections import Callable You are using PySparkling of version 2.4.10, but your PySpark is of version 2.3.1. Please make sure Spark and PySparkling versions are compatible.
df = op.read.csv("data/random.csv",header=True, sep=";")
df.table()
LOCNCODE
1 (string)
nullable
|
LOCNDSCR
2 (string)
nullable
|
ADDRESS1
3 (string)
nullable
|
ADDRESS2
4 (string)
nullable
|
ADDRESS3
5 (string)
nullable
|
CITY
6 (string)
nullable
|
STATE
7 (string)
nullable
|
ZIPCODE
8 (string)
nullable
|
COUNTRY
9 (string)
nullable
|
Location_Segment
10 (string)
nullable
|
PAQ
11 (string)
nullable
|
TIPUNI
12 (string)
nullable
|
Tipo_unidad
13 (string)
nullable
|
ITEMNMBR
14 (string)
nullable
|
ITMSHNAM
15 (string)
nullable
|
MZ
16 (string)
nullable
|
LT
17 (string)
nullable
|
EDIF
18 (string)
nullable
|
NIVEL
19 (string)
nullable
|
NOUNI
20 (string)
nullable
|
CONDO
21 (string)
nullable
|
REGIMEN
22 (string)
nullable
|
ETAPA
23 (string)
nullable
|
PROTO
24 (string)
nullable
|
ITEMDESC
25 (string)
nullable
|
NIVELES
26 (string)
nullable
|
COCHERA
27 (string)
nullable
|
RECAM
28 (string)
nullable
|
ALCOB
29 (string)
nullable
|
BANOS
30 (string)
nullable
|
Num_Balcon
31 (string)
nullable
|
SALA
32 (string)
nullable
|
COMEDOR
33 (string)
nullable
|
COCINA
34 (string)
nullable
|
Cuarto_Lavado
35 (string)
nullable
|
Cuarto_Servicio
36 (string)
nullable
|
OTROX
37 (string)
nullable
|
OTROX1
38 (string)
nullable
|
SupCons
39 (string)
nullable
|
PATIOSERV
40 (string)
nullable
|
TERRAZA
41 (string)
nullable
|
BALCON
42 (string)
nullable
|
AZOTEA
43 (string)
nullable
|
Otros
44 (string)
nullable
|
AREATOT
45 (string)
nullable
|
FRENTE
46 (string)
nullable
|
Sup_Terreno
47 (string)
nullable
|
EXCEDENTE
48 (string)
nullable
|
OTRO1
49 (string)
nullable
|
OTRO2
50 (string)
nullable
|
TAMANO
51 (string)
nullable
|
UBICAVER
52 (string)
nullable
|
UBICAHORI
53 (string)
nullable
|
QTYONHND_
54 (string)
nullable
|
QTYSOLD
55 (string)
nullable
|
INACTIVE
56 (string)
nullable
|
UOMPRICE
57 (string)
nullable
|
MONTOAPA
58 (string)
nullable
|
PAGINI
59 (string)
nullable
|
ENGANCHE
60 (string)
nullable
|
FECHESCRIPRO
61 (string)
nullable
|
FECHAENTREGA
62 (string)
nullable
|
FECHASALIDAVENTAS
63 (string)
nullable
|
LIBERADO_NOLIBERADO
64 (string)
nullable
|
ACTIVO_INACTIVO
65 (string)
nullable
|
Estatus1Vivienda
66 (string)
nullable
|
Estatus2Vivienda
67 (string)
nullable
|
CUSTNMBR
68 (string)
nullable
|
Nombre_Completo
69 (string)
nullable
|
cNombre
70 (string)
nullable
|
cApellidoPaterno
71 (string)
nullable
|
cApellidoMaterno
72 (string)
nullable
|
cRfc
73 (string)
nullable
|
cCurp
74 (string)
nullable
|
fkIdGradoInteres
75 (string)
nullable
|
cSexo
76 (string)
nullable
|
cEmail
77 (string)
nullable
|
cTelefonoCasa
78 (string)
nullable
|
cTelefonoCelular
79 (string)
nullable
|
cTelefonoTrabajo
80 (string)
nullable
|
cNumeroSeguroSocial
81 (string)
nullable
|
dFechaNacimiento
82 (string)
nullable
|
cEstadoCivil
83 (string)
nullable
|
cRegimenConyugal
84 (string)
nullable
|
cNacionalidad
85 (string)
nullable
|
cLugarNacimiento
86 (string)
nullable
|
cRecomendadoPor
87 (string)
nullable
|
fkIdMedio
88 (string)
nullable
|
cMedioContacto
89 (string)
nullable
|
cCalle
90 (string)
nullable
|
cNumeroExterior
91 (string)
nullable
|
cNumeroInterior
92 (string)
nullable
|
cColonia
93 (string)
nullable
|
cMunicipio
94 (string)
nullable
|
cEstado
95 (string)
nullable
|
cPais
96 (string)
nullable
|
cCodigoPostal
97 (string)
nullable
|
nTiempoResidencia
98 (string)
nullable
|
cComentario
99 (string)
nullable
|
cNumeroIdentificacion
100 (string)
nullable
|
cTipoIdentificación
101 (string)
nullable
|
REFERENCIA
102 (string)
nullable
|
FACTURA
103 (string)
nullable
|
NOTACR
104 (string)
nullable
|
Precio_cierre
105 (string)
nullable
|
Precio_cierre_Tot
106 (string)
nullable
|
Aumento_al_Contrato
107 (string)
nullable
|
Condonacón
108 (string)
nullable
|
Precio_Escritura_Total
109 (string)
nullable
|
Precio_Dev
110 (string)
nullable
|
Precio_Dev_Total
111 (string)
nullable
|
Notarios_Proyectados
112 (string)
nullable
|
Gatos_A_terceros
113 (string)
nullable
|
Depositos
114 (string)
nullable
|
Saldo
115 (string)
nullable
|
dFechaCreacion
116 (string)
nullable
|
dFechaModificacion
117 (string)
nullable
|
FECHA_Cotizado
118 (string)
nullable
|
FECHA_SolApartado
119 (string)
nullable
|
FECHA_AutApartado
120 (string)
nullable
|
Vigencia_Apartado
121 (string)
nullable
|
FechaVencimientoApartado
122 (string)
nullable
|
FECHA_SolDictamen
123 (string)
nullable
|
FECHA_ProcDictamen
124 (string)
nullable
|
FECHA_DictaminadoLlamada
125 (string)
nullable
|
FECHA_DictaminadoFirma
126 (string)
nullable
|
FECHA_Dictaminado
127 (string)
nullable
|
FECHA_Rechazado
128 (string)
nullable
|
FECHA_EscrituraAvaluo
129 (string)
nullable
|
FECHA_EscrituraFolio
130 (string)
nullable
|
FolioEscsritura
131 (string)
nullable
|
FECHA_EscrituraReal
132 (string)
nullable
|
FECHA_Cancelado
133 (string)
nullable
|
FECHA_Liberado
134 (string)
nullable
|
FECHA_Entregado
135 (string)
nullable
|
MotivoCancelacion
136 (string)
nullable
|
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
ALV
|
Altos⋅Lindavista
|
Guanajuato⋅#⋅85
|
None
|
San⋅Bartolo⋅Atepehuacan
|
Gustavo⋅A.⋅Madero
|
Distrito⋅Federal
|
07730
|
Mexico
|
0531
|
None
|
2
|
ESTACIONAMIENTO
|
ALVV008
|
ALVCDEY0080
|
None
|
None
|
None
|
None
|
008
|
None
|
0
|
0
|
EST⋅CDEY
|
Cajon⋅virtual
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
2.2
|
None
|
None
|
None
|
None
|
None
|
None
|
2.4
|
None
|
0
|
None
|
None
|
Chico
|
Cajon⋅virtual
|
Cajon⋅virtual
|
0
|
0
|
1
|
0
|
None
|
None
|
None
|
None
|
None
|
None
|
NO⋅LIBERADO
|
INACTIVO
|
DISPONIBLE
|
000-DISPONIBLE
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
ALV
|
Altos⋅Lindavista
|
Guanajuato⋅#⋅85
|
None
|
San⋅Bartolo⋅Atepehuacan
|
Gustavo⋅A.⋅Madero
|
Distrito⋅Federal
|
07730
|
Mexico
|
0531
|
None
|
2
|
ESTACIONAMIENTO
|
ALVV021
|
ALVCDEY0690
|
None
|
None
|
None
|
None
|
069
|
None
|
0
|
0
|
EST⋅CDEY
|
Cajon⋅virtual
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
2.2
|
None
|
None
|
None
|
None
|
None
|
None
|
2.4
|
None
|
0
|
None
|
None
|
Chico
|
Cajon⋅virtual
|
Cajon⋅virtual
|
0
|
0
|
1
|
0
|
None
|
None
|
None
|
None
|
None
|
None
|
NO⋅LIBERADO
|
INACTIVO
|
DISPONIBLE
|
000-DISPONIBLE
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
ALV
|
Altos⋅Lindavista
|
Guanajuato⋅#⋅85
|
None
|
San⋅Bartolo⋅Atepehuacan
|
Gustavo⋅A.⋅Madero
|
Distrito⋅Federal
|
07730
|
Mexico
|
0531
|
None
|
2
|
ESTACIONAMIENTO
|
ALVV022
|
ALVCDEY0710
|
None
|
None
|
None
|
None
|
071
|
None
|
0
|
0
|
EST⋅CDEY
|
Cajon⋅virtual
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
2.2
|
None
|
None
|
None
|
None
|
None
|
None
|
2.4
|
None
|
0
|
None
|
None
|
Chico
|
Cajon⋅virtual
|
Cajon⋅virtual
|
0
|
0
|
1
|
0
|
None
|
None
|
None
|
None
|
None
|
None
|
NO⋅LIBERADO
|
INACTIVO
|
DISPONIBLE
|
000-DISPONIBLE
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
ALV
|
Altos⋅Lindavista
|
Guanajuato⋅#⋅85
|
None
|
San⋅Bartolo⋅Atepehuacan
|
Gustavo⋅A.⋅Madero
|
Distrito⋅Federal
|
07730
|
Mexico
|
0531
|
None
|
2
|
ESTACIONAMIENTO
|
ALVV027
|
ALVCDEY0810
|
None
|
None
|
None
|
None
|
081
|
None
|
0
|
0
|
EST⋅CDEY
|
Cajon⋅virtual
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
2.2
|
None
|
None
|
None
|
None
|
None
|
None
|
2.4
|
None
|
0
|
None
|
None
|
Chico
|
Cajon⋅virtual
|
Cajon⋅virtual
|
0
|
0
|
1
|
0
|
None
|
None
|
None
|
None
|
None
|
None
|
NO⋅LIBERADO
|
INACTIVO
|
DISPONIBLE
|
000-DISPONIBLE
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
ALV
|
Altos⋅Lindavista
|
Guanajuato⋅#⋅85
|
None
|
San⋅Bartolo⋅Atepehuacan
|
Gustavo⋅A.⋅Madero
|
Distrito⋅Federal
|
07730
|
Mexico
|
0531
|
None
|
2
|
ESTACIONAMIENTO
|
ALVV032
|
ALVCEEY0090
|
None
|
None
|
None
|
None
|
009
|
None
|
0
|
0
|
EST⋅CEEY
|
Cajon⋅virtual
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
2.2
|
None
|
None
|
None
|
None
|
None
|
None
|
2.4
|
None
|
0
|
None
|
None
|
Chico
|
Cajon⋅virtual
|
Cajon⋅virtual
|
0
|
0
|
1
|
0
|
None
|
None
|
None
|
None
|
None
|
None
|
NO⋅LIBERADO
|
INACTIVO
|
DISPONIBLE
|
000-DISPONIBLE
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
ALV
|
Altos⋅Lindavista
|
Guanajuato⋅#⋅85
|
None
|
San⋅Bartolo⋅Atepehuacan
|
Gustavo⋅A.⋅Madero
|
Distrito⋅Federal
|
07730
|
Mexico
|
0531
|
None
|
2
|
ESTACIONAMIENTO
|
ALVV035
|
ALVCEEY0150
|
None
|
None
|
None
|
None
|
015
|
None
|
0
|
0
|
EST⋅CEEY
|
Cajon⋅virtual
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
2.2
|
None
|
None
|
None
|
None
|
None
|
None
|
2.4
|
None
|
0
|
None
|
None
|
Chico
|
Cajon⋅virtual
|
Cajon⋅virtual
|
0
|
0
|
1
|
0
|
None
|
None
|
None
|
None
|
None
|
None
|
NO⋅LIBERADO
|
INACTIVO
|
DISPONIBLE
|
000-DISPONIBLE
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
ALV
|
Altos⋅Lindavista
|
Guanajuato⋅#⋅85
|
None
|
San⋅Bartolo⋅Atepehuacan
|
Gustavo⋅A.⋅Madero
|
Distrito⋅Federal
|
07730
|
Mexico
|
0531
|
None
|
2
|
ESTACIONAMIENTO
|
ALVV009
|
ALVCDEY0100
|
None
|
None
|
None
|
None
|
010
|
None
|
0
|
0
|
EST⋅CDEY
|
Cajon⋅virtual
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
2.2
|
None
|
None
|
None
|
None
|
None
|
None
|
2.4
|
None
|
0
|
None
|
None
|
Chico
|
Cajon⋅virtual
|
Cajon⋅virtual
|
0
|
0
|
1
|
0
|
None
|
None
|
None
|
None
|
None
|
None
|
NO⋅LIBERADO
|
INACTIVO
|
DISPONIBLE
|
000-DISPONIBLE
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
ALV
|
Altos⋅Lindavista
|
Guanajuato⋅#⋅85
|
None
|
San⋅Bartolo⋅Atepehuacan
|
Gustavo⋅A.⋅Madero
|
Distrito⋅Federal
|
07730
|
Mexico
|
0531
|
None
|
2
|
ESTACIONAMIENTO
|
ALVV012
|
ALVCDEY0160
|
None
|
None
|
None
|
None
|
016
|
None
|
0
|
0
|
EST⋅CDEY
|
Cajon⋅virtual
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
2.2
|
None
|
None
|
None
|
None
|
None
|
None
|
2.4
|
None
|
0
|
None
|
None
|
Chico
|
Cajon⋅virtual
|
Cajon⋅virtual
|
0
|
0
|
1
|
0
|
None
|
None
|
None
|
None
|
None
|
None
|
NO⋅LIBERADO
|
INACTIVO
|
DISPONIBLE
|
000-DISPONIBLE
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
ALV
|
Altos⋅Lindavista
|
Guanajuato⋅#⋅85
|
None
|
San⋅Bartolo⋅Atepehuacan
|
Gustavo⋅A.⋅Madero
|
Distrito⋅Federal
|
07730
|
Mexico
|
0531
|
None
|
2
|
ESTACIONAMIENTO
|
ALVV019
|
ALVCDEY0650
|
None
|
None
|
None
|
None
|
065
|
None
|
0
|
0
|
EST⋅CDEY
|
Cajon⋅virtual
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
2.2
|
None
|
None
|
None
|
None
|
None
|
None
|
2.4
|
None
|
0
|
None
|
None
|
Chico
|
Cajon⋅virtual
|
Cajon⋅virtual
|
0
|
0
|
1
|
0
|
None
|
None
|
None
|
None
|
None
|
None
|
NO⋅LIBERADO
|
INACTIVO
|
DISPONIBLE
|
000-DISPONIBLE
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
ALV
|
Altos⋅Lindavista
|
Guanajuato⋅#⋅85
|
None
|
San⋅Bartolo⋅Atepehuacan
|
Gustavo⋅A.⋅Madero
|
Distrito⋅Federal
|
07730
|
Mexico
|
0531
|
None
|
2
|
ESTACIONAMIENTO
|
ALVV044
|
ALVCUEY0340
|
None
|
None
|
None
|
None
|
034
|
None
|
0
|
0
|
EST⋅CUEY
|
Cajon⋅virtual
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
2.2
|
None
|
None
|
None
|
None
|
None
|
None
|
2.4
|
None
|
0
|
None
|
None
|
Chico
|
Cajon⋅virtual
|
Cajon⋅virtual
|
0
|
0
|
1
|
0
|
None
|
None
|
None
|
None
|
None
|
None
|
NO⋅LIBERADO
|
INACTIVO
|
DISPONIBLE
|
000-DISPONIBLE
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
from optimus.ml import keycollision as keyCol
# op.spark.conf.set("spark.sql.shuffle.partitions", "4")
# op.spark.conf.set("spark.sql.autoBroadcastJoinThreshold", 1 * 1024 * 1024 * 1024)
keyCol.fingerprint(df, 'STATE').table()
LOCNCODE
1 (string)
nullable
|
LOCNDSCR
2 (string)
nullable
|
ADDRESS1
3 (string)
nullable
|
ADDRESS2
4 (string)
nullable
|
ADDRESS3
5 (string)
nullable
|
CITY
6 (string)
nullable
|
STATE
7 (string)
nullable
|
ZIPCODE
8 (string)
nullable
|
COUNTRY
9 (string)
nullable
|
Location_Segment
10 (string)
nullable
|
PAQ
11 (string)
nullable
|
TIPUNI
12 (string)
nullable
|
Tipo_unidad
13 (string)
nullable
|
ITEMNMBR
14 (string)
nullable
|
ITMSHNAM
15 (string)
nullable
|
MZ
16 (string)
nullable
|
LT
17 (string)
nullable
|
EDIF
18 (string)
nullable
|
NIVEL
19 (string)
nullable
|
NOUNI
20 (string)
nullable
|
CONDO
21 (string)
nullable
|
REGIMEN
22 (string)
nullable
|
ETAPA
23 (string)
nullable
|
PROTO
24 (string)
nullable
|
ITEMDESC
25 (string)
nullable
|
NIVELES
26 (string)
nullable
|
COCHERA
27 (string)
nullable
|
RECAM
28 (string)
nullable
|
ALCOB
29 (string)
nullable
|
BANOS
30 (string)
nullable
|
Num_Balcon
31 (string)
nullable
|
SALA
32 (string)
nullable
|
COMEDOR
33 (string)
nullable
|
COCINA
34 (string)
nullable
|
Cuarto_Lavado
35 (string)
nullable
|
Cuarto_Servicio
36 (string)
nullable
|
OTROX
37 (string)
nullable
|
OTROX1
38 (string)
nullable
|
SupCons
39 (string)
nullable
|
PATIOSERV
40 (string)
nullable
|
TERRAZA
41 (string)
nullable
|
BALCON
42 (string)
nullable
|
AZOTEA
43 (string)
nullable
|
Otros
44 (string)
nullable
|
AREATOT
45 (string)
nullable
|
FRENTE
46 (string)
nullable
|
Sup_Terreno
47 (string)
nullable
|
EXCEDENTE
48 (string)
nullable
|
OTRO1
49 (string)
nullable
|
OTRO2
50 (string)
nullable
|
TAMANO
51 (string)
nullable
|
UBICAVER
52 (string)
nullable
|
UBICAHORI
53 (string)
nullable
|
QTYONHND_
54 (string)
nullable
|
QTYSOLD
55 (string)
nullable
|
INACTIVE
56 (string)
nullable
|
UOMPRICE
57 (string)
nullable
|
MONTOAPA
58 (string)
nullable
|
PAGINI
59 (string)
nullable
|
ENGANCHE
60 (string)
nullable
|
FECHESCRIPRO
61 (string)
nullable
|
FECHAENTREGA
62 (string)
nullable
|
FECHASALIDAVENTAS
63 (string)
nullable
|
LIBERADO_NOLIBERADO
64 (string)
nullable
|
ACTIVO_INACTIVO
65 (string)
nullable
|
Estatus1Vivienda
66 (string)
nullable
|
Estatus2Vivienda
67 (string)
nullable
|
CUSTNMBR
68 (string)
nullable
|
Nombre_Completo
69 (string)
nullable
|
cNombre
70 (string)
nullable
|
cApellidoPaterno
71 (string)
nullable
|
cApellidoMaterno
72 (string)
nullable
|
cRfc
73 (string)
nullable
|
cCurp
74 (string)
nullable
|
fkIdGradoInteres
75 (string)
nullable
|
cSexo
76 (string)
nullable
|
cEmail
77 (string)
nullable
|
cTelefonoCasa
78 (string)
nullable
|
cTelefonoCelular
79 (string)
nullable
|
cTelefonoTrabajo
80 (string)
nullable
|
cNumeroSeguroSocial
81 (string)
nullable
|
dFechaNacimiento
82 (string)
nullable
|
cEstadoCivil
83 (string)
nullable
|
cRegimenConyugal
84 (string)
nullable
|
cNacionalidad
85 (string)
nullable
|
cLugarNacimiento
86 (string)
nullable
|
cRecomendadoPor
87 (string)
nullable
|
fkIdMedio
88 (string)
nullable
|
cMedioContacto
89 (string)
nullable
|
cCalle
90 (string)
nullable
|
cNumeroExterior
91 (string)
nullable
|
cNumeroInterior
92 (string)
nullable
|
cColonia
93 (string)
nullable
|
cMunicipio
94 (string)
nullable
|
cEstado
95 (string)
nullable
|
cPais
96 (string)
nullable
|
cCodigoPostal
97 (string)
nullable
|
nTiempoResidencia
98 (string)
nullable
|
cComentario
99 (string)
nullable
|
cNumeroIdentificacion
100 (string)
nullable
|
cTipoIdentificación
101 (string)
nullable
|
REFERENCIA
102 (string)
nullable
|
FACTURA
103 (string)
nullable
|
NOTACR
104 (string)
nullable
|
Precio_cierre
105 (string)
nullable
|
Precio_cierre_Tot
106 (string)
nullable
|
Aumento_al_Contrato
107 (string)
nullable
|
Condonacón
108 (string)
nullable
|
Precio_Escritura_Total
109 (string)
nullable
|
Precio_Dev
110 (string)
nullable
|
Precio_Dev_Total
111 (string)
nullable
|
Notarios_Proyectados
112 (string)
nullable
|
Gatos_A_terceros
113 (string)
nullable
|
Depositos
114 (string)
nullable
|
Saldo
115 (string)
nullable
|
dFechaCreacion
116 (string)
nullable
|
dFechaModificacion
117 (string)
nullable
|
FECHA_Cotizado
118 (string)
nullable
|
FECHA_SolApartado
119 (string)
nullable
|
FECHA_AutApartado
120 (string)
nullable
|
Vigencia_Apartado
121 (string)
nullable
|
FechaVencimientoApartado
122 (string)
nullable
|
FECHA_SolDictamen
123 (string)
nullable
|
FECHA_ProcDictamen
124 (string)
nullable
|
FECHA_DictaminadoLlamada
125 (string)
nullable
|
FECHA_DictaminadoFirma
126 (string)
nullable
|
FECHA_Dictaminado
127 (string)
nullable
|
FECHA_Rechazado
128 (string)
nullable
|
FECHA_EscrituraAvaluo
129 (string)
nullable
|
FECHA_EscrituraFolio
130 (string)
nullable
|
FolioEscsritura
131 (string)
nullable
|
FECHA_EscrituraReal
132 (string)
nullable
|
FECHA_Cancelado
133 (string)
nullable
|
FECHA_Liberado
134 (string)
nullable
|
FECHA_Entregado
135 (string)
nullable
|
MotivoCancelacion
136 (string)
nullable
|
STATE_FINGERPRINT
137 (string)
nullable
|
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
ALV
|
Altos⋅Lindavista
|
Guanajuato⋅#⋅85
|
None
|
San⋅Bartolo⋅Atepehuacan
|
Gustavo⋅A.⋅Madero
|
Distrito⋅Federal
|
07730
|
Mexico
|
0531
|
None
|
2
|
ESTACIONAMIENTO
|
ALVV008
|
ALVCDEY0080
|
None
|
None
|
None
|
None
|
008
|
None
|
0
|
0
|
EST⋅CDEY
|
Cajon⋅virtual
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
2.2
|
None
|
None
|
None
|
None
|
None
|
None
|
2.4
|
None
|
0
|
None
|
None
|
Chico
|
Cajon⋅virtual
|
Cajon⋅virtual
|
0
|
0
|
1
|
0
|
None
|
None
|
None
|
None
|
None
|
None
|
NO⋅LIBERADO
|
INACTIVO
|
DISPONIBLE
|
000-DISPONIBLE
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
distritofederal
|
ALV
|
Altos⋅Lindavista
|
Guanajuato⋅#⋅85
|
None
|
San⋅Bartolo⋅Atepehuacan
|
Gustavo⋅A.⋅Madero
|
Distrito⋅Federal
|
07730
|
Mexico
|
0531
|
None
|
2
|
ESTACIONAMIENTO
|
ALVV021
|
ALVCDEY0690
|
None
|
None
|
None
|
None
|
069
|
None
|
0
|
0
|
EST⋅CDEY
|
Cajon⋅virtual
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
2.2
|
None
|
None
|
None
|
None
|
None
|
None
|
2.4
|
None
|
0
|
None
|
None
|
Chico
|
Cajon⋅virtual
|
Cajon⋅virtual
|
0
|
0
|
1
|
0
|
None
|
None
|
None
|
None
|
None
|
None
|
NO⋅LIBERADO
|
INACTIVO
|
DISPONIBLE
|
000-DISPONIBLE
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
distritofederal
|
ALV
|
Altos⋅Lindavista
|
Guanajuato⋅#⋅85
|
None
|
San⋅Bartolo⋅Atepehuacan
|
Gustavo⋅A.⋅Madero
|
Distrito⋅Federal
|
07730
|
Mexico
|
0531
|
None
|
2
|
ESTACIONAMIENTO
|
ALVV022
|
ALVCDEY0710
|
None
|
None
|
None
|
None
|
071
|
None
|
0
|
0
|
EST⋅CDEY
|
Cajon⋅virtual
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
2.2
|
None
|
None
|
None
|
None
|
None
|
None
|
2.4
|
None
|
0
|
None
|
None
|
Chico
|
Cajon⋅virtual
|
Cajon⋅virtual
|
0
|
0
|
1
|
0
|
None
|
None
|
None
|
None
|
None
|
None
|
NO⋅LIBERADO
|
INACTIVO
|
DISPONIBLE
|
000-DISPONIBLE
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
distritofederal
|
ALV
|
Altos⋅Lindavista
|
Guanajuato⋅#⋅85
|
None
|
San⋅Bartolo⋅Atepehuacan
|
Gustavo⋅A.⋅Madero
|
Distrito⋅Federal
|
07730
|
Mexico
|
0531
|
None
|
2
|
ESTACIONAMIENTO
|
ALVV027
|
ALVCDEY0810
|
None
|
None
|
None
|
None
|
081
|
None
|
0
|
0
|
EST⋅CDEY
|
Cajon⋅virtual
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
2.2
|
None
|
None
|
None
|
None
|
None
|
None
|
2.4
|
None
|
0
|
None
|
None
|
Chico
|
Cajon⋅virtual
|
Cajon⋅virtual
|
0
|
0
|
1
|
0
|
None
|
None
|
None
|
None
|
None
|
None
|
NO⋅LIBERADO
|
INACTIVO
|
DISPONIBLE
|
000-DISPONIBLE
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
distritofederal
|
ALV
|
Altos⋅Lindavista
|
Guanajuato⋅#⋅85
|
None
|
San⋅Bartolo⋅Atepehuacan
|
Gustavo⋅A.⋅Madero
|
Distrito⋅Federal
|
07730
|
Mexico
|
0531
|
None
|
2
|
ESTACIONAMIENTO
|
ALVV032
|
ALVCEEY0090
|
None
|
None
|
None
|
None
|
009
|
None
|
0
|
0
|
EST⋅CEEY
|
Cajon⋅virtual
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
2.2
|
None
|
None
|
None
|
None
|
None
|
None
|
2.4
|
None
|
0
|
None
|
None
|
Chico
|
Cajon⋅virtual
|
Cajon⋅virtual
|
0
|
0
|
1
|
0
|
None
|
None
|
None
|
None
|
None
|
None
|
NO⋅LIBERADO
|
INACTIVO
|
DISPONIBLE
|
000-DISPONIBLE
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
distritofederal
|
ALV
|
Altos⋅Lindavista
|
Guanajuato⋅#⋅85
|
None
|
San⋅Bartolo⋅Atepehuacan
|
Gustavo⋅A.⋅Madero
|
Distrito⋅Federal
|
07730
|
Mexico
|
0531
|
None
|
2
|
ESTACIONAMIENTO
|
ALVV035
|
ALVCEEY0150
|
None
|
None
|
None
|
None
|
015
|
None
|
0
|
0
|
EST⋅CEEY
|
Cajon⋅virtual
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
2.2
|
None
|
None
|
None
|
None
|
None
|
None
|
2.4
|
None
|
0
|
None
|
None
|
Chico
|
Cajon⋅virtual
|
Cajon⋅virtual
|
0
|
0
|
1
|
0
|
None
|
None
|
None
|
None
|
None
|
None
|
NO⋅LIBERADO
|
INACTIVO
|
DISPONIBLE
|
000-DISPONIBLE
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
distritofederal
|
ALV
|
Altos⋅Lindavista
|
Guanajuato⋅#⋅85
|
None
|
San⋅Bartolo⋅Atepehuacan
|
Gustavo⋅A.⋅Madero
|
Distrito⋅Federal
|
07730
|
Mexico
|
0531
|
None
|
2
|
ESTACIONAMIENTO
|
ALVV009
|
ALVCDEY0100
|
None
|
None
|
None
|
None
|
010
|
None
|
0
|
0
|
EST⋅CDEY
|
Cajon⋅virtual
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
2.2
|
None
|
None
|
None
|
None
|
None
|
None
|
2.4
|
None
|
0
|
None
|
None
|
Chico
|
Cajon⋅virtual
|
Cajon⋅virtual
|
0
|
0
|
1
|
0
|
None
|
None
|
None
|
None
|
None
|
None
|
NO⋅LIBERADO
|
INACTIVO
|
DISPONIBLE
|
000-DISPONIBLE
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
distritofederal
|
ALV
|
Altos⋅Lindavista
|
Guanajuato⋅#⋅85
|
None
|
San⋅Bartolo⋅Atepehuacan
|
Gustavo⋅A.⋅Madero
|
Distrito⋅Federal
|
07730
|
Mexico
|
0531
|
None
|
2
|
ESTACIONAMIENTO
|
ALVV012
|
ALVCDEY0160
|
None
|
None
|
None
|
None
|
016
|
None
|
0
|
0
|
EST⋅CDEY
|
Cajon⋅virtual
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
2.2
|
None
|
None
|
None
|
None
|
None
|
None
|
2.4
|
None
|
0
|
None
|
None
|
Chico
|
Cajon⋅virtual
|
Cajon⋅virtual
|
0
|
0
|
1
|
0
|
None
|
None
|
None
|
None
|
None
|
None
|
NO⋅LIBERADO
|
INACTIVO
|
DISPONIBLE
|
000-DISPONIBLE
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
distritofederal
|
ALV
|
Altos⋅Lindavista
|
Guanajuato⋅#⋅85
|
None
|
San⋅Bartolo⋅Atepehuacan
|
Gustavo⋅A.⋅Madero
|
Distrito⋅Federal
|
07730
|
Mexico
|
0531
|
None
|
2
|
ESTACIONAMIENTO
|
ALVV019
|
ALVCDEY0650
|
None
|
None
|
None
|
None
|
065
|
None
|
0
|
0
|
EST⋅CDEY
|
Cajon⋅virtual
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
2.2
|
None
|
None
|
None
|
None
|
None
|
None
|
2.4
|
None
|
0
|
None
|
None
|
Chico
|
Cajon⋅virtual
|
Cajon⋅virtual
|
0
|
0
|
1
|
0
|
None
|
None
|
None
|
None
|
None
|
None
|
NO⋅LIBERADO
|
INACTIVO
|
DISPONIBLE
|
000-DISPONIBLE
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
distritofederal
|
ALV
|
Altos⋅Lindavista
|
Guanajuato⋅#⋅85
|
None
|
San⋅Bartolo⋅Atepehuacan
|
Gustavo⋅A.⋅Madero
|
Distrito⋅Federal
|
07730
|
Mexico
|
0531
|
None
|
2
|
ESTACIONAMIENTO
|
ALVV044
|
ALVCUEY0340
|
None
|
None
|
None
|
None
|
034
|
None
|
0
|
0
|
EST⋅CUEY
|
Cajon⋅virtual
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
2.2
|
None
|
None
|
None
|
None
|
None
|
None
|
2.4
|
None
|
0
|
None
|
None
|
Chico
|
Cajon⋅virtual
|
Cajon⋅virtual
|
0
|
0
|
1
|
0
|
None
|
None
|
None
|
None
|
None
|
None
|
NO⋅LIBERADO
|
INACTIVO
|
DISPONIBLE
|
000-DISPONIBLE
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
.00000
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
None
|
distritofederal
|
keyCol.fingerprint_cluster(df, 'STATE').table()
STATE_CLUSTER_SIZE
1 (int)
not nullable
|
STATE_CLUSTER
2 (array<string>)
nullable
|
STATE_COUNT
3 (bigint)
nullable
|
STATE_RECOMMENDED
4 (string)
nullable
|
---|---|---|---|
1
|
['Estado⋅de⋅México']
|
810
|
Estado⋅de⋅México
|
2
|
['México⋅D.F.',⋅'Mexico⋅D.F.']
|
2495
|
Mexico⋅D.F.
|
1
|
['D.F.']
|
66
|
D.F.
|
1
|
['Distriro⋅Federal']
|
259
|
Distriro⋅Federal
|
3
|
['Distrito⋅Federal',⋅'DISTRITO⋅FEDERAL',⋅'distrito⋅federal']
|
11930
|
Distrito⋅Federal
|
keyCol.n_gram_fingerprint(df, 'STATE', 2).table(columns=["STATE","STATE_NGRAM", "STATE_NGRAM_FINGERPRINT"])
STATE_NGRAM STATE_NGRAM_FINGERPRINT
STATE
1 (string)
nullable
|
STATE_NGRAM
2 (array<string>)
not nullable
|
STATE_NGRAM_FINGERPRINT
3 (string)
nullable
|
---|---|---|
Distrito⋅Federal
|
['distritofederal']
|
|
Distrito⋅Federal
|
['distritofederal']
|
|
Distrito⋅Federal
|
['distritofederal']
|
|
Distrito⋅Federal
|
['distritofederal']
|
|
Distrito⋅Federal
|
['distritofederal']
|
|
Distrito⋅Federal
|
['distritofederal']
|
|
Distrito⋅Federal
|
['distritofederal']
|
|
Distrito⋅Federal
|
['distritofederal']
|
|
Distrito⋅Federal
|
['distritofederal']
|
|
Distrito⋅Federal
|
['distritofederal']
|
|
keyCol.n_gram_fingerprint_cluster(df, "STATE" , 2).table()
STATE_NGRAM STATE_NGRAM_FINGERPRINT
STATE_CLUSTER_SIZE
1 (int)
not nullable
|
STATE_CLUSTER
2 (array<string>)
nullable
|
STATE_COUNT
3 (double)
nullable
|
STATE_RECOMMENDED
4 (string)
nullable
|
---|---|---|---|
8
|
['Distrito⋅Federal',⋅'México⋅D.F.',⋅'DISTRITO⋅FEDERAL',⋅'Mexico⋅D.F.',⋅'Distr...
|
15560.0
|
Mexico⋅D.F.
|
keyCol.n_gram_fingerprint_cluster(df, "STATE" , 2).to_json()
STATE_NGRAM STATE_NGRAM_FINGERPRINT
[{'STATE_CLUSTER_SIZE': 8, 'STATE_CLUSTER': ['Distrito Federal', 'México D.F.', 'DISTRITO FEDERAL', 'Mexico D.F.', 'Distriro Federal', 'D.F.', 'Estado de México', 'distrito federal'], 'STATE_COUNT': 15560.0, 'STATE_RECOMMENDED': 'Mexico D.F.'}]
from optimus.ml import distancecluster as dc
dc.levenshtein_matrix(df,"STATE").table()
STATE_LEVENSHTEIN_1
1 (string)
nullable
|
STATE_LEVENSHTEIN_2
2 (string)
nullable
|
STATE_LEVENSHTEIN_DISTANCE
3 (int)
nullable
|
---|---|---|
estadodemexico
|
estadodemexico
|
0
|
estadodemexico
|
mexicodf
|
10
|
estadodemexico
|
df
|
13
|
estadodemexico
|
distrirofederal
|
11
|
estadodemexico
|
distritofederal
|
11
|
mexicodf
|
estadodemexico
|
10
|
mexicodf
|
mexicodf
|
0
|
mexicodf
|
df
|
6
|
mexicodf
|
distrirofederal
|
12
|
mexicodf
|
distritofederal
|
12
|
dc.levenshtein_filter(df,"STATE").table()
STATE_FROM
1 (string)
nullable
|
STATE_LEVENSHTEIN_DISTANCE
2 (int)
nullable
|
STATE_TO
3 (string)
nullable
|
---|---|---|
estadodemexico
|
10
|
mexicodf
|
df
|
6
|
mexicodf
|
distrirofederal
|
1
|
distritofederal
|
distritofederal
|
1
|
distrirofederal
|
mexicodf
|
6
|
df
|
dc.levenshtein_cluster(df,"STATE").to_json()
[{'STATE_CLUSTER': ['Estado de México'], 'STATE_CLUSTER_SIZE': 1, 'STATE_RECOMMENDED': 'Estado de México', 'STATE_COUNT': 810}, {'STATE_CLUSTER': ['D.F.'], 'STATE_CLUSTER_SIZE': 1, 'STATE_RECOMMENDED': 'D.F.', 'STATE_COUNT': 66}, {'STATE_CLUSTER': ['Distriro Federal'], 'STATE_CLUSTER_SIZE': 1, 'STATE_RECOMMENDED': 'Distriro Federal', 'STATE_COUNT': 259}, {'STATE_CLUSTER': ['Distrito Federal', 'DISTRITO FEDERAL', 'distrito federal'], 'STATE_CLUSTER_SIZE': 3, 'STATE_RECOMMENDED': 'Distrito Federal', 'STATE_COUNT': 11930}, {'STATE_CLUSTER': ['Mexico D.F.', 'México D.F.'], 'STATE_CLUSTER_SIZE': 2, 'STATE_RECOMMENDED': 'Mexico D.F.', 'STATE_COUNT': 2495}]
data = [('Japan', 'Tokyo', 37800000),('USA', 'New York', 19795791),('France', 'Paris', 12341418),
('Spain','Madrid',6489162)]
df = op.spark.createDataFrame(data, ["country", "city", "population"])
df.table()
country
1 (string)
nullable
|
city
2 (string)
nullable
|
population
3 (bigint)
nullable
|
---|---|---|
Japan
|
Tokyo
|
37800000
|
USA
|
New⋅York
|
19795791
|
France
|
Paris
|
12341418
|
Spain
|
Madrid
|
6489162
|
from optimus.ml import feature as fe
df_sti = fe.string_to_index(df, input_cols=["city", "country"])
df_sti.table()
country
1 (string)
nullable
|
city
2 (string)
nullable
|
population
3 (bigint)
nullable
|
city_INDEX
4 (double)
not nullable
|
country_INDEX
5 (double)
not nullable
|
---|---|---|---|---|
Japan
|
Tokyo
|
37800000
|
2.0
|
3.0
|
USA
|
New⋅York
|
19795791
|
3.0
|
2.0
|
France
|
Paris
|
12341418
|
0.0
|
1.0
|
Spain
|
Madrid
|
6489162
|
1.0
|
0.0
|
# Going back to strings from index
df_its = fe.index_to_string(df_sti, input_cols=["country_INDEX"])
# Show DF with column "county_index" back to string
df_its.table()
country
1 (string)
nullable
|
city
2 (string)
nullable
|
population
3 (bigint)
nullable
|
city_INDEX
4 (double)
not nullable
|
country_INDEX
5 (double)
not nullable
|
country_INDEX_string
6 (string)
nullable
|
---|---|---|---|---|---|
Japan
|
Tokyo
|
37800000
|
2.0
|
3.0
|
Japan
|
USA
|
New⋅York
|
19795791
|
3.0
|
2.0
|
USA
|
France
|
Paris
|
12341418
|
0.0
|
1.0
|
France
|
Spain
|
Madrid
|
6489162
|
1.0
|
0.0
|
Spain
|
# Creating DataFrame
data = [
(0, "a"),
(1, "b"),
(2, "c"),
(3, "a"),
(4, "a"),
(5, "c")
]
df = op.spark.createDataFrame(data,["id", "category"])
# One Hot Encoding
df_ohe = fe.one_hot_encoder(df, input_cols=["id"])
# Show encoded dataframe
df_ohe.table()
id
1 (bigint)
nullable
|
category
2 (string)
nullable
|
id__ENCODED
3 (vector)
nullable
|
---|---|---|
0
|
a
|
(5,[0],[1.0])
|
1
|
b
|
(5,[1],[1.0])
|
2
|
c
|
(5,[2],[1.0])
|
3
|
a
|
(5,[3],[1.0])
|
4
|
a
|
(5,[4],[1.0])
|
5
|
c
|
(5,[],[])
|
# Import Vectors
from pyspark.ml.linalg import Vectors
# Creating DataFrame
data = [(0, 18, 1.0, Vectors.dense([0.0, 10.0, 0.5]), 1.0)]
df = op.spark.createDataFrame(data,["id", "hour", "mobile", "user_features", "clicked"])
# Assemble features
df_va = fe.vector_assembler(df, input_cols=["hour", "mobile", "user_features"])
# Show assembled df
print("Assembled columns 'hour', 'mobile', 'user_features' to vector column 'features'")
df_va.select("features", "clicked").table()
Assembled columns 'hour', 'mobile', 'user_features' to vector column 'features'
features
1 (vector)
nullable
|
clicked
2 (double)
nullable
|
---|---|
[18.0,1.0,0.0,10.0,0.5]
|
1.0
|
# Import Vectors
from pyspark.ml.linalg import Vectors
data = [
(0, Vectors.dense([1.0, 0.5, -1.0]),),
(1, Vectors.dense([2.0, 1.0, 1.0]),),
(2, Vectors.dense([4.0, 10.0, 2.0]),)
]
df = op.spark.createDataFrame(data,["id", "features"])
df_norm = fe.normalizer(df, input_cols=["features"], p=2.0)
df_norm.table()
id
1 (bigint)
nullable
|
features
2 (vector)
nullable
|
---|---|
0
|
[1.0,0.5,-1.0]
|
1
|
[2.0,1.0,1.0]
|
2
|
[4.0,10.0,2.0]
|
id
1 (bigint)
nullable
|
features
2 (vector)
nullable
|
features_NORMALIZED
3 (vector)
nullable
|
---|---|---|
0
|
[1.0,0.5,-1.0]
|
[0.6666666666666666,0.3333333333333333,-0.6666666666666666]
|
1
|
[2.0,1.0,1.0]
|
[0.8164965809277261,0.4082482904638631,0.4082482904638631]
|
2
|
[4.0,10.0,2.0]
|
[0.3651483716701107,0.9128709291752769,0.18257418583505536]
|