%load_ext autoreload
%autoreload 2
import sys
sys.path.append("..")
from optimus import Optimus
# Create optimus
op = Optimus("dask", verbose = True)
# !pip install mysqlclient
# Put your db credentials here
db = op.connect(
driver="mysql",
host="165.227.196.70",
database= "optimus",
user= "test",
password = "test")
mysql://test:test@165.227.196.70:3306/optimus
db.tables()
['test_data']
db.table_to_df("test_data").ext.display()
id
1 (int64)
not nullable
|
first_name
2 (object)
not nullable
|
last_name
3 (object)
not nullable
|
email
4 (object)
not nullable
|
gender
5 (object)
not nullable
|
ip_address
6 (object)
not nullable
|
---|---|---|---|---|---|
1
|
Ikey
|
Crudginton
|
icrudginton0@freewebs.com
|
Male
|
72.210.21.255
|
2
|
Erwin
|
Edden
|
eedden1@nytimes.com
|
Male
|
16.205.155.142
|
3
|
Rudyard
|
Dullaghan
|
rdullaghan2@techcrunch.com
|
Male
|
84.170.67.167
|
4
|
Eugen
|
Staining
|
estaining3@merriam-webster.com
|
Male
|
211.36.45.228
|
5
|
Carleton
|
Hammond
|
chammond4@example.com
|
Male
|
177.7.250.134
|
6
|
Ermengarde
|
Knightly
|
eknightly5@google.co.jp
|
Female
|
231.176.117.190
|
7
|
Myles
|
Rattray
|
mrattray6@about.com
|
Male
|
4.193.247.67
|
8
|
Banky
|
Shires
|
bshires7@so-net.ne.jp
|
Male
|
16.18.210.158
|
9
|
Chastity
|
Birtwell
|
cbirtwell8@seesaa.net
|
Female
|
167.15.222.219
|
10
|
Harv
|
Fotherby
|
hfotherby9@godaddy.com
|
Male
|
143.117.248.106
|
11
|
Janaye
|
Farlam
|
jfarlama@goo.gl
|
Female
|
207.25.248.64
|
db.execute(query="SELECT * FROM test_data", partition_column= "id", table_name = "test_data").ext.display()
SELECT * FROM test_data WHERE test_data.id >= 1.0 AND test_data.id < 10.9 SELECT * FROM test_data WHERE test_data.id >= 10.9 AND test_data.id < 20.8 SELECT * FROM test_data WHERE test_data.id >= 20.8 AND test_data.id < 30.700000000000003 SELECT * FROM test_data WHERE test_data.id >= 30.700000000000003 AND test_data.id < 40.6 SELECT * FROM test_data WHERE test_data.id >= 40.6 AND test_data.id < 50.5 SELECT * FROM test_data WHERE test_data.id >= 50.5 AND test_data.id < 60.400000000000006 SELECT * FROM test_data WHERE test_data.id >= 60.400000000000006 AND test_data.id < 70.3 SELECT * FROM test_data WHERE test_data.id >= 70.3 AND test_data.id < 80.2 SELECT * FROM test_data WHERE test_data.id >= 80.2 AND test_data.id < 90.10000000000001 SELECT * FROM test_data WHERE test_data.id > 90.10000000000001 AND test_data.id <= 100.0
first_name
1 (object)
not nullable
|
last_name
2 (object)
not nullable
|
email
3 (object)
not nullable
|
gender
4 (object)
not nullable
|
ip_address
5 (object)
not nullable
|
---|---|---|---|---|
Ikey
|
Crudginton
|
icrudginton0@freewebs.com
|
Male
|
72.210.21.255
|
Erwin
|
Edden
|
eedden1@nytimes.com
|
Male
|
16.205.155.142
|
Rudyard
|
Dullaghan
|
rdullaghan2@techcrunch.com
|
Male
|
84.170.67.167
|
Eugen
|
Staining
|
estaining3@merriam-webster.com
|
Male
|
211.36.45.228
|
Carleton
|
Hammond
|
chammond4@example.com
|
Male
|
177.7.250.134
|
Ermengarde
|
Knightly
|
eknightly5@google.co.jp
|
Female
|
231.176.117.190
|
Myles
|
Rattray
|
mrattray6@about.com
|
Male
|
4.193.247.67
|
Banky
|
Shires
|
bshires7@so-net.ne.jp
|
Male
|
16.18.210.158
|
Chastity
|
Birtwell
|
cbirtwell8@seesaa.net
|
Female
|
167.15.222.219
|
Harv
|
Fotherby
|
hfotherby9@godaddy.com
|
Male
|
143.117.248.106
|
db.execute(query="SELECT * FROM test_data", table_name = "test_data", num_partitions = 9).ext.display()
id
1 (int64)
not nullable
|
first_name
2 (object)
not nullable
|
last_name
3 (object)
not nullable
|
email
4 (object)
not nullable
|
gender
5 (object)
not nullable
|
ip_address
6 (object)
not nullable
|
---|---|---|---|---|---|
1
|
Ikey
|
Crudginton
|
icrudginton0@freewebs.com
|
Male
|
72.210.21.255
|
2
|
Erwin
|
Edden
|
eedden1@nytimes.com
|
Male
|
16.205.155.142
|
3
|
Rudyard
|
Dullaghan
|
rdullaghan2@techcrunch.com
|
Male
|
84.170.67.167
|
4
|
Eugen
|
Staining
|
estaining3@merriam-webster.com
|
Male
|
211.36.45.228
|
5
|
Carleton
|
Hammond
|
chammond4@example.com
|
Male
|
177.7.250.134
|
6
|
Ermengarde
|
Knightly
|
eknightly5@google.co.jp
|
Female
|
231.176.117.190
|
7
|
Myles
|
Rattray
|
mrattray6@about.com
|
Male
|
4.193.247.67
|
8
|
Banky
|
Shires
|
bshires7@so-net.ne.jp
|
Male
|
16.18.210.158
|
9
|
Chastity
|
Birtwell
|
cbirtwell8@seesaa.net
|
Female
|
167.15.222.219
|
10
|
Harv
|
Fotherby
|
hfotherby9@godaddy.com
|
Male
|
143.117.248.106
|
11
|
Janaye
|
Farlam
|
jfarlama@goo.gl
|
Female
|
207.25.248.64
|
db.execute(query="SELECT * FROM test_data")
['id', 'first_name', 'last_name', 'email', 'gender', 'ip_address']
id | first_name | last_name | gender | ip_address | ||
---|---|---|---|---|---|---|
npartitions=10 | ||||||
0.0 | int64 | object | object | object | object | object |
1.0 | ... | ... | ... | ... | ... | ... |
... | ... | ... | ... | ... | ... | ... |
9.0 | ... | ... | ... | ... | ... | ... |
10.0 | ... | ... | ... | ... | ... | ... |
a = """SELECT *, NTILE (4) OVER (ORDER BY id) id FROM test_data;"""
%%time
db.execute(query="SELECT * FROM test_data").ext.display()
['id', 'first_name', 'last_name', 'email', 'gender', 'ip_address']
id
1 (int64)
not nullable
|
first_name
2 (object)
not nullable
|
last_name
3 (object)
not nullable
|
email
4 (object)
not nullable
|
gender
5 (object)
not nullable
|
ip_address
6 (object)
not nullable
|
---|---|---|---|---|---|
100
|
Otes
|
Regus
|
oregus2r@state.tx.us
|
Male
|
108.200.225.64
|
99
|
Theda
|
Clitherow
|
tclitherow2q@wired.com
|
Female
|
224.222.199.13
|
98
|
Dennis
|
Wolffers
|
dwolffers2p@si.edu
|
Male
|
192.73.35.122
|
97
|
Benoit
|
Bail
|
bbail2o@eventbrite.com
|
Male
|
230.203.89.67
|
96
|
Stillman
|
Birtle
|
sbirtle2n@google.ru
|
Male
|
29.119.151.180
|
95
|
Chad
|
Swalteridge
|
cswalteridge2m@webnode.com
|
Female
|
223.70.250.55
|
94
|
Rustin
|
MacKay
|
rmackay2l@icio.us
|
Male
|
170.203.62.186
|
93
|
Austina
|
Jaume
|
ajaume2k@cdc.gov
|
Female
|
77.3.231.136
|
92
|
Brana
|
Lawlee
|
blawlee2j@moonfruit.com
|
Female
|
223.177.21.208
|
91
|
Joannes
|
Nardoni
|
jnardoni2i@state.tx.us
|
Female
|
236.230.152.229
|
90
|
Carly
|
de⋅Amaya
|
cdeamaya2h@google.com.au
|
Male
|
165.206.105.231
|
89
|
Celle
|
Pelfer
|
cpelfer2g@desdev.cn
|
Female
|
71.24.177.38
|
88
|
Tildi
|
Lakin
|
tlakin2f@godaddy.com
|
Female
|
29.124.49.70
|
87
|
Orlando
|
Cann
|
ocann2e@webmd.com
|
Male
|
145.212.223.32
|
86
|
Dorian
|
De⋅Francisci
|
ddefrancisci2d@hibu.com
|
Female
|
155.207.166.60
|
85
|
Shay
|
Loffel
|
sloffel2c@cam.ac.uk
|
Female
|
199.115.204.136
|
84
|
Sharline
|
Devaney
|
sdevaney2b@paypal.com
|
Female
|
102.243.230.248
|
83
|
Muffin
|
McFaell
|
mmcfaell2a@salon.com
|
Male
|
12.100.76.50
|
82
|
Moishe
|
MacNockater
|
mmacnockater29@jiathis.com
|
Male
|
221.43.209.221
|
81
|
Rubina
|
Fairburn
|
rfairburn28@time.com
|
Female
|
79.25.16.53
|
80
|
Emeline
|
Sitford
|
esitford27@howstuffworks.com
|
Female
|
230.60.120.207
|
79
|
Agace
|
Eubank
|
aeubank26@amazon.co.uk
|
Female
|
160.186.113.75
|
78
|
Giff
|
Philippson
|
gphilippson25@1688.com
|
Male
|
224.124.122.55
|
77
|
Ardis
|
McTiernan
|
amctiernan24@psu.edu
|
Female
|
4.120.76.153
|
76
|
Stoddard
|
Lindenblatt
|
slindenblatt23@biblegateway.com
|
Female
|
223.238.3.100
|
75
|
Nydia
|
Hutchin
|
nhutchin22@paypal.com
|
Female
|
192.64.228.8
|
74
|
Alysa
|
Howard
|
ahoward21@stanford.edu
|
Female
|
118.114.176.179
|
73
|
Janka
|
D'Arrigo
|
jdarrigo20@home.pl
|
Female
|
69.25.245.202
|
72
|
Brant
|
Boij
|
bboij1z@oracle.com
|
Male
|
18.197.89.53
|
71
|
Moses
|
Standing
|
mstanding1y@skyrock.com
|
Male
|
162.119.26.224
|
70
|
Pietro
|
McCully
|
pmccully1x@discuz.net
|
Male
|
217.43.182.106
|
69
|
Jamison
|
Paulsen
|
jpaulsen1w@indiatimes.com
|
Male
|
80.179.85.115
|
68
|
Berti
|
Botler
|
bbotler1v@kickstarter.com
|
Male
|
121.194.38.205
|
67
|
Ronald
|
Normanell
|
rnormanell1u@ted.com
|
Male
|
21.161.236.96
|
66
|
Raynell
|
Maton
|
rmaton1t@wisc.edu
|
Female
|
115.200.121.215
|
65
|
Tann
|
Frarey
|
tfrarey1s@multiply.com
|
Male
|
187.201.92.171
|
64
|
Leanor
|
Petrie
|
lpetrie1r@mtv.com
|
Female
|
57.34.249.126
|
63
|
Felecia
|
Wonter
|
fwonter1q@geocities.com
|
Female
|
59.165.181.157
|
62
|
Cthrine
|
Brecknock
|
cbrecknock1p@ebay.co.uk
|
Female
|
198.33.59.97
|
61
|
Abner
|
De⋅Freitas
|
adefreitas1o@barnesandnoble.com
|
Male
|
17.46.49.64
|
60
|
Justin
|
Careless
|
jcareless1n@cloudflare.com
|
Male
|
238.69.199.67
|
59
|
Kristina
|
Henaughan
|
khenaughan1m@who.int
|
Female
|
4.15.64.1
|
58
|
Brennan
|
Hatfull
|
bhatfull1l@eepurl.com
|
Male
|
171.96.0.43
|
57
|
Woodrow
|
Tunbridge
|
wtunbridge1k@creativecommons.org
|
Male
|
172.40.160.130
|
56
|
Fleurette
|
Cambridge
|
fcambridge1j@illinois.edu
|
Female
|
85.214.219.212
|
55
|
Cindy
|
Lade
|
clade1i@bandcamp.com
|
Female
|
27.8.225.166
|
54
|
Isobel
|
Tompkiss
|
itompkiss1h@webmd.com
|
Female
|
89.241.183.111
|
53
|
Dannie
|
Midlane
|
dmidlane1g@blogger.com
|
Female
|
81.161.97.210
|
52
|
Emanuele
|
Curman
|
ecurman1f@usda.gov
|
Male
|
150.158.155.59
|
51
|
Bondy
|
Gallelli
|
bgallelli1e@hc360.com
|
Male
|
145.51.28.175
|
50
|
Tulley
|
Mertsching
|
tmertsching1d@hud.gov
|
Male
|
114.9.199.50
|
49
|
Lucius
|
Azam
|
lazam1c@moonfruit.com
|
Male
|
218.51.93.26
|
48
|
Donnie
|
Anthoin
|
danthoin1b@accuweather.com
|
Female
|
121.201.174.77
|
47
|
Ingmar
|
Halmkin
|
ihalmkin1a@aboutads.info
|
Male
|
223.91.160.113
|
46
|
Aurore
|
Dobby
|
adobby19@t-online.de
|
Female
|
98.169.201.146
|
45
|
Skyler
|
Wivell
|
swivell18@eepurl.com
|
Male
|
107.231.71.108
|
44
|
Rickert
|
Roch
|
rroch17@yellowpages.com
|
Male
|
182.72.67.92
|
43
|
Darcie
|
McGeaney
|
dmcgeaney16@canalblog.com
|
Female
|
10.235.104.189
|
42
|
Tiebold
|
Pottie
|
tpottie15@merriam-webster.com
|
Male
|
83.215.236.118
|
41
|
Carolan
|
Delmonti
|
cdelmonti14@phpbb.com
|
Female
|
228.17.150.76
|
40
|
Jo-anne
|
Galbreth
|
jgalbreth13@statcounter.com
|
Female
|
189.4.167.117
|
39
|
Chauncey
|
Burtwell
|
cburtwell12@unicef.org
|
Male
|
198.249.57.250
|
38
|
Asa
|
Thirwell
|
athirwell11@amazon.co.uk
|
Male
|
209.172.156.93
|
37
|
Layla
|
Aspole
|
laspole10@people.com.cn
|
Female
|
9.109.10.17
|
36
|
Curtice
|
Minget
|
cmingetz@businessweek.com
|
Male
|
189.242.14.127
|
35
|
Prudi
|
Rivitt
|
privitty@noaa.gov
|
Female
|
32.16.138.187
|
34
|
Margie
|
Gordon
|
mgordonx@lycos.com
|
Female
|
243.94.253.221
|
33
|
Pall
|
Sawrey
|
psawreyw@facebook.com
|
Male
|
209.246.100.200
|
32
|
Jorge
|
Racher
|
jracherv@google.com
|
Male
|
233.56.214.255
|
31
|
Davide
|
Scourfield
|
dscourfieldu@rediff.com
|
Male
|
113.239.62.4
|
30
|
Sarge
|
Haresign
|
sharesignt@miitbeian.gov.cn
|
Male
|
212.180.157.241
|
29
|
Somerset
|
Hawksby
|
shawksbys@accuweather.com
|
Male
|
209.158.180.144
|
28
|
Tad
|
Domange
|
tdomanger@wikimedia.org
|
Male
|
49.219.69.114
|
27
|
Benedicto
|
Holtham
|
bholthamq@constantcontact.com
|
Male
|
235.57.49.45
|
26
|
Leah
|
Jopling
|
ljoplingp@topsy.com
|
Female
|
39.158.211.20
|
25
|
Benita
|
Zarfati
|
bzarfatio@oakley.com
|
Female
|
130.115.169.187
|
24
|
Julita
|
Conradie
|
jconradien@tamu.edu
|
Female
|
179.65.95.37
|
23
|
Syman
|
Dumbellow
|
sdumbellowm@bbc.co.uk
|
Male
|
206.25.6.205
|
22
|
Chalmers
|
Palister
|
cpalisterl@ibm.com
|
Male
|
0.5.174.160
|
21
|
Saw
|
Habard
|
shabardk@reuters.com
|
Male
|
107.111.35.73
|
10
|
Harv
|
Fotherby
|
hfotherby9@godaddy.com
|
Male
|
143.117.248.106
|
9
|
Chastity
|
Birtwell
|
cbirtwell8@seesaa.net
|
Female
|
167.15.222.219
|
8
|
Banky
|
Shires
|
bshires7@so-net.ne.jp
|
Male
|
16.18.210.158
|
7
|
Myles
|
Rattray
|
mrattray6@about.com
|
Male
|
4.193.247.67
|
6
|
Ermengarde
|
Knightly
|
eknightly5@google.co.jp
|
Female
|
231.176.117.190
|
5
|
Carleton
|
Hammond
|
chammond4@example.com
|
Male
|
177.7.250.134
|
4
|
Eugen
|
Staining
|
estaining3@merriam-webster.com
|
Male
|
211.36.45.228
|
3
|
Rudyard
|
Dullaghan
|
rdullaghan2@techcrunch.com
|
Male
|
84.170.67.167
|
2
|
Erwin
|
Edden
|
eedden1@nytimes.com
|
Male
|
16.205.155.142
|
1
|
Ikey
|
Crudginton
|
icrudginton0@freewebs.com
|
Male
|
72.210.21.255
|
Wall time: 23.7 s
a.ext.table()
id
1 (int64)
not nullable
|
first_name
2 (object)
not nullable
|
last_name
3 (object)
not nullable
|
email
4 (object)
not nullable
|
gender
5 (object)
not nullable
|
ip_address
6 (object)
not nullable
|
---|---|---|---|---|---|
1
|
Ikey
|
Crudginton
|
icrudginton0@freewebs.com
|
Male
|
72.210.21.255
|
2
|
Erwin
|
Edden
|
eedden1@nytimes.com
|
Male
|
16.205.155.142
|
3
|
Rudyard
|
Dullaghan
|
rdullaghan2@techcrunch.com
|
Male
|
84.170.67.167
|
4
|
Eugen
|
Staining
|
estaining3@merriam-webster.com
|
Male
|
211.36.45.228
|
5
|
Carleton
|
Hammond
|
chammond4@example.com
|
Male
|
177.7.250.134
|
6
|
Ermengarde
|
Knightly
|
eknightly5@google.co.jp
|
Female
|
231.176.117.190
|
7
|
Myles
|
Rattray
|
mrattray6@about.com
|
Male
|
4.193.247.67
|
8
|
Banky
|
Shires
|
bshires7@so-net.ne.jp
|
Male
|
16.18.210.158
|
9
|
Chastity
|
Birtwell
|
cbirtwell8@seesaa.net
|
Female
|
167.15.222.219
|
10
|
Harv
|
Fotherby
|
hfotherby9@godaddy.com
|
Male
|
143.117.248.106
|
11
|
Janaye
|
Farlam
|
jfarlama@goo.gl
|
Female
|
207.25.248.64
|
12
|
Odille
|
Olivas
|
oolivasb@gravatar.com
|
Female
|
33.0.247.41
|
13
|
Tessie
|
Maffiotti
|
tmaffiottic@sciencedirect.com
|
Female
|
143.214.111.244
|
14
|
Sybila
|
Littleover
|
slittleoverd@businesswire.com
|
Female
|
165.13.137.100
|
15
|
Dag
|
Attryde
|
dattrydee@mediafire.com
|
Male
|
65.226.179.64
|
16
|
Alisa
|
Gilbey
|
agilbeyf@squidoo.com
|
Female
|
119.14.232.90
|
17
|
Alvin
|
Deyenhardt
|
adeyenhardtg@myspace.com
|
Male
|
101.33.81.5
|
18
|
Garey
|
Hanley
|
ghanleyh@gravatar.com
|
Male
|
196.203.82.107
|
19
|
Alaine
|
Vint
|
avinti@cnbc.com
|
Female
|
201.18.136.196
|
20
|
Berta
|
Sakins
|
bsakinsj@topsy.com
|
Female
|
117.231.82.148
|
df = db.table_to_df("test_data", limit=None)
db.tables_names_to_json()
['test_data']
# Put your db credentials here
db = op.connect(
driver="postgresql",
host="165.227.196.70",
database= "optimus",
user= "testuser",
password = "test")
--------------------------------------------------------------------------- AttributeError Traceback (most recent call last) <ipython-input-70-35a374ce0ffe> in <module> 5 database= "optimus", 6 user= "testuser", ----> 7 password = "test") ~\Documents\Optimus\optimus\engines\dask.py in connect(driver, host, database, user, password, port, schema, oracle_tns, oracle_service_name, oracle_sid, presto_catalog, cassandra_keyspace, cassandra_table) 34 35 return JDBC(host, database, user, password, port, driver, schema, oracle_tns, oracle_service_name, oracle_sid, ---> 36 presto_catalog, cassandra_keyspace, cassandra_table) 37 # def create(self, data): 38 # import dask.dataframe as dd ~\Documents\Optimus\optimus\dask\io\jdbc.py in __init__(self, host, database, user, password, port, driver, schema, oracle_tns, oracle_service_name, oracle_sid, presto_catalog, cassandra_keyspace, cassandra_table) 58 oracle_sid=oracle_sid, 59 oracle_service_name=oracle_service_name, ---> 60 presto_catalog=presto_catalog 61 ) 62 self.database = database ~\Documents\Optimus\optimus\spark\io\driver_context.py in uri(self, *args, **kwargs) 22 23 def uri(self, *args, **kwargs) -> str: ---> 24 return self._driver.uri(*args, **kwargs) 25 26 def url(self, *args, **kwargs) -> str: AttributeError: 'PostgreSQLDriver' object has no attribute 'uri'
db.tables()
INFO:optimus:( SELECT relname as table_name,cast (reltuples as integer) AS count FROM pg_class C LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace) WHERE nspname IN ('public') AND relkind='r' ORDER BY reltuples DESC) AS t INFO:optimus:jdbc:postgresql://165.227.196.70:5432/optimus?currentSchema=public
table_name
1 (string)
nullable
|
count
2 (int)
nullable
|
---|---|
test_data
|
1100
|
db.table_to_df("test_data").table()
INFO:optimus:SELECT * FROM test_data INFO:optimus:(SELECT * FROM test_data) AS t INFO:optimus:jdbc:postgresql://165.227.196.70:5432/optimus?currentSchema=public
id
1 (int)
nullable
|
first_name
2 (string)
nullable
|
last_name
3 (string)
nullable
|
email
4 (string)
nullable
|
gender
5 (string)
nullable
|
ip_address
6 (string)
nullable
|
---|---|---|---|---|---|
1
|
Ikey
|
Crudginton
|
icrudginton0@freewebs.com
|
Male
|
72.210.21.255
|
2
|
Erwin
|
Edden
|
eedden1@nytimes.com
|
Male
|
16.205.155.142
|
3
|
Rudyard
|
Dullaghan
|
rdullaghan2@techcrunch.com
|
Male
|
84.170.67.167
|
4
|
Eugen
|
Staining
|
estaining3@merriam-webster.com
|
Male
|
211.36.45.228
|
5
|
Carleton
|
Hammond
|
chammond4@example.com
|
Male
|
177.7.250.134
|
6
|
Ermengarde
|
Knightly
|
eknightly5@google.co.jp
|
Female
|
231.176.117.190
|
7
|
Myles
|
Rattray
|
mrattray6@about.com
|
Male
|
4.193.247.67
|
8
|
Banky
|
Shires
|
bshires7@so-net.ne.jp
|
Male
|
16.18.210.158
|
9
|
Chastity
|
Birtwell
|
cbirtwell8@seesaa.net
|
Female
|
167.15.222.219
|
10
|
Harv
|
Fotherby
|
hfotherby9@godaddy.com
|
Male
|
143.117.248.106
|
db.tables_names_to_json()
INFO:optimus:( SELECT relname as table_name FROM pg_class C LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace) WHERE nspname IN ('public') AND relkind='r' ORDER BY reltuples DESC) AS t INFO:optimus:jdbc:postgresql://165.227.196.70:5432/optimus?currentSchema=public
['test_data']
# Put your db credentials here
db = op.connect(
driver="sqlserver",
host="165.227.196.70",
database= "optimus",
user= "test",
password = "test*0261")
INFO:optimus:jdbc:sqlserver://165.227.196.70:1433;databaseName=optimus
db.tables()
INFO:optimus:(SELECT * FROM INFORMATION_SCHEMA.TABLES) AS t INFO:optimus:jdbc:sqlserver://165.227.196.70:1433;databaseName=optimus
TABLE_CATALOG
1 (string)
nullable
|
TABLE_SCHEMA
2 (string)
nullable
|
TABLE_NAME
3 (string)
nullable
|
TABLE_TYPE
4 (string)
nullable
|
---|---|---|---|
optimus
|
dbo
|
test_data
|
BASE⋅TABLE
|
db.table_to_df("test_data").table()
INFO:optimus:SELECT * FROM test_data INFO:optimus:(SELECT * FROM test_data) AS t INFO:optimus:jdbc:sqlserver://165.227.196.70:1433;databaseName=optimus
id
1 (int)
nullable
|
first_name
2 (string)
nullable
|
last_name
3 (string)
nullable
|
email
4 (string)
nullable
|
gender
5 (string)
nullable
|
ip_address
6 (string)
nullable
|
---|---|---|---|---|---|
1
|
Keenan
|
McAirt
|
kmcairt0@spotify.com
|
Male
|
68.97.227.147
|
2
|
Fredelia
|
Lemarie
|
flemarie1@furl.net
|
Female
|
16.145.123.46
|
1
|
Keenan
|
McAirt
|
kmcairt0@spotify.com
|
Male
|
68.97.227.147
|
2
|
Fredelia
|
Lemarie
|
flemarie1@furl.net
|
Female
|
16.145.123.46
|
1
|
Keenan
|
McAirt
|
kmcairt0@spotify.com
|
Male
|
68.97.227.147
|
2
|
Fredelia
|
Lemarie
|
flemarie1@furl.net
|
Female
|
16.145.123.46
|
1
|
Keenan
|
McAirt
|
kmcairt0@spotify.com
|
Male
|
68.97.227.147
|
2
|
Fredelia
|
Lemarie
|
flemarie1@furl.net
|
Female
|
16.145.123.46
|
2
|
Fredelia
|
Lemarie
|
flemarie1@furl.net
|
Female
|
16.145.123.46
|
1
|
Evyn
|
Abbey
|
eabbey0@mlb.com
|
Male
|
202.99.246.227
|
db.tables_names_to_json()
INFO:optimus:(SELECT TABLE_NAME FROM INFORMATION_SCHEMA.TABLES) AS t INFO:optimus:jdbc:sqlserver://165.227.196.70:1433;databaseName=optimus
['test_data']
# Put your db credentials here
db = op.connect(
driver="redshift",
host="165.227.196.70",
database= "optimus",
user= "testuser",
password = "test")
INFO:optimus:jdbc:redshift://redshift-cluster-1.chuvgsqx7epn.us-east-1.redshift.amazonaws.com:5439/dev?currentSchema=public
db.tables()
INFO:optimus:( SELECT relname as table_name,cast (reltuples as integer) AS count FROM pg_class C LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace) WHERE nspname IN ('public') AND relkind='r' ORDER BY reltuples DESC) AS t INFO:optimus:jdbc:redshift://redshift-cluster-1.chuvgsqx7epn.us-east-1.redshift.amazonaws.com:5439/dev?currentSchema=public
--------------------------------------------------------------------------- Py4JJavaError Traceback (most recent call last) <ipython-input-3-cdef22199e9a> in <module> ----> 1 db.tables() ~\Documents\Optimus\optimus\io\jdbc.py in tables(self, schema, database, limit) 179 FROM user_tables ORDER BY table_name""" 180 --> 181 df = self.execute(query, limit) 182 return df.table(limit) 183 ~\Documents\Optimus\optimus\io\jdbc.py in execute(self, query, limit) 309 conf.options(table=self.cassandra_table, keyspace=self.cassandra_keyspace) 310 --> 311 return self._limit(conf.load(), limit) 312 313 def df_to_table(self, df, table, mode="overwrite"): ~\Anaconda3\lib\site-packages\pyspark\sql\readwriter.py in load(self, path, format, schema, **options) 170 return self._df(self._jreader.load(self._spark._sc._jvm.PythonUtils.toSeq(path))) 171 else: --> 172 return self._df(self._jreader.load()) 173 174 @since(1.4) ~\Anaconda3\lib\site-packages\py4j\java_gateway.py in __call__(self, *args) 1255 answer = self.gateway_client.send_command(command) 1256 return_value = get_return_value( -> 1257 answer, self.gateway_client, self.target_id, self.name) 1258 1259 for temp_arg in temp_args: ~\Anaconda3\lib\site-packages\pyspark\sql\utils.py in deco(*a, **kw) 61 def deco(*a, **kw): 62 try: ---> 63 return f(*a, **kw) 64 except py4j.protocol.Py4JJavaError as e: 65 s = e.java_exception.toString() ~\Anaconda3\lib\site-packages\py4j\protocol.py in get_return_value(answer, gateway_client, target_id, name) 326 raise Py4JJavaError( 327 "An error occurred while calling {0}{1}{2}.\n". --> 328 format(target_id, ".", name), value) 329 else: 330 raise Py4JError( Py4JJavaError: An error occurred while calling o41.load. : java.sql.SQLException: [Amazon](500150) Error setting/closing connection: Connection timed out: connect. at com.amazon.redshift.client.PGClient.connect(Unknown Source) at com.amazon.redshift.client.PGClient.<init>(Unknown Source) at com.amazon.redshift.core.PGJDBCConnection.connect(Unknown Source) at com.amazon.jdbc.common.BaseConnectionFactory.doConnect(Unknown Source) at com.amazon.jdbc.common.AbstractDriver.connect(Unknown Source) at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$createConnectionFactory$1.apply(JdbcUtils.scala:63) at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$createConnectionFactory$1.apply(JdbcUtils.scala:54) at org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD$.resolveTable(JDBCRDD.scala:56) at org.apache.spark.sql.execution.datasources.jdbc.JDBCRelation.<init>(JDBCRelation.scala:115) at org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider.createRelation(JdbcRelationProvider.scala:52) at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:340) at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:239) at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:227) at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:164) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) at py4j.Gateway.invoke(Gateway.java:282) at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.GatewayConnection.run(GatewayConnection.java:238) Caused by: com.amazon.support.exceptions.GeneralException: [Amazon](500150) Error setting/closing connection: Connection timed out: connect. ... 24 more Caused by: java.net.ConnectException: Connection timed out: connect at sun.nio.ch.Net.connect0(Native Method) at sun.nio.ch.Net.connect(Net.java:454) at sun.nio.ch.Net.connect(Net.java:446) at sun.nio.ch.SocketChannelImpl.connect(SocketChannelImpl.java:648) at sun.nio.ch.SocketAdaptor.connect(SocketAdaptor.java:96) at com.amazon.redshift.client.PGClient.connect(Unknown Source) at com.amazon.redshift.client.PGClient.<init>(Unknown Source) at com.amazon.redshift.core.PGJDBCConnection.connect(Unknown Source) at com.amazon.jdbc.common.BaseConnectionFactory.doConnect(Unknown Source) at com.amazon.jdbc.common.AbstractDriver.connect(Unknown Source) at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$createConnectionFactory$1.apply(JdbcUtils.scala:63) at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$createConnectionFactory$1.apply(JdbcUtils.scala:54) at org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD$.resolveTable(JDBCRDD.scala:56) at org.apache.spark.sql.execution.datasources.jdbc.JDBCRelation.<init>(JDBCRelation.scala:115) at org.apache.spark.sql.execution.datasources.jdbc.JdbcRelationProvider.createRelation(JdbcRelationProvider.scala:52) at org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:340) at org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:239) at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:227) at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:164) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) at py4j.Gateway.invoke(Gateway.java:282) at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.GatewayConnection.run(GatewayConnection.java:238) at java.lang.Thread.run(Thread.java:748)
db.table_to_df("test_data").table()
# Put your db credentials here
db = op.connect(
driver="oracle",
host="165.227.196.70",
database= "optimus",
user= "testuser",
password = "test")
# Put your db credentials here
db = op.connect(
driver="sqlite",
host="chinook.db",
database= "employes",
user= "testuser",
password = "test")
INFO:optimus:jdbc:sqlite:chinook.db
db.tables()
INFO:optimus:(SELECT name FROM sqlite_master WHERE type='table') AS t INFO:optimus:jdbc:sqlite:chinook.db
name
1 (string)
nullable
|
---|
albums
|
sqlite_sequence
|
artists
|
customers
|
employees
|
genres
|
invoices
|
invoice_items
|
media_types
|
playlists
|
db.table_to_df("albums",limit="all").table()
INFO:optimus:(SELECT COUNT(*) as COUNT FROM albums) AS t INFO:optimus:jdbc:sqlite:chinook.db
347 rows
INFO:optimus:SELECT * FROM albums INFO:optimus:(SELECT * FROM albums) AS t INFO:optimus:jdbc:sqlite:chinook.db
AlbumId
1 (int)
nullable
|
Title
2 (string)
nullable
|
ArtistId
3 (int)
nullable
|
---|---|---|
1
|
For⋅Those⋅About⋅To⋅Rock⋅We⋅Salute⋅You
|
1
|
2
|
Balls⋅to⋅the⋅Wall
|
2
|
3
|
Restless⋅and⋅Wild
|
2
|
4
|
Let⋅There⋅Be⋅Rock
|
1
|
5
|
Big⋅Ones
|
3
|
6
|
Jagged⋅Little⋅Pill
|
4
|
7
|
Facelift
|
5
|
8
|
Warner⋅25⋅Anos
|
6
|
9
|
Plays⋅Metallica⋅By⋅Four⋅Cellos
|
7
|
10
|
Audioslave
|
8
|
db.tables_names_to_json()
INFO:optimus:(SELECT name FROM sqlite_master WHERE type='table') AS t INFO:optimus:jdbc:sqlite:chinook.db
['albums', 'sqlite_sequence', 'artists', 'customers', 'employees', 'genres', 'invoices', 'invoice_items', 'media_types', 'playlists', 'playlist_track', 'tracks', 'sqlite_stat1']
df = op.load.csv("https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.csv", sep=",", header='true', infer_schema='true', charset="UTF-8", null_value="None")
INFO:optimus:Downloading foo.csv from https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.csv INFO:optimus:Downloaded 967 bytes INFO:optimus:Creating DataFrame for foo.csv. Please wait...
df.table()
id
1 (int)
nullable
|
firstName
2 (string)
nullable
|
lastName
3 (string)
nullable
|
billingId
4 (int)
nullable
|
product
5 (string)
nullable
|
price
6 (int)
nullable
|
birth
7 (string)
nullable
|
dummyCol
8 (string)
nullable
|
---|---|---|---|---|---|---|---|
1
|
Luis
|
Alvarez$$%!
|
123
|
Cake
|
10
|
1980/07/07
|
never
|
2
|
André
|
Ampère
|
423
|
piza
|
8
|
1950/07/08
|
gonna
|
3
|
NiELS
|
Böhr//((%%
|
551
|
pizza
|
8
|
1990/07/09
|
give
|
4
|
PAUL
|
dirac$
|
521
|
pizza
|
8
|
1954/07/10
|
you
|
5
|
Albert
|
Einstein
|
634
|
pizza
|
8
|
1990/07/11
|
up
|
6
|
Galileo
|
⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI
|
672
|
arepa
|
5
|
1930/08/12
|
never
|
7
|
CaRL
|
Ga%%%uss
|
323
|
taco
|
3
|
1970/07/13
|
gonna
|
8
|
David
|
H$$$ilbert
|
624
|
taaaccoo
|
3
|
1950/07/14
|
let
|
9
|
Johannes
|
KEPLER
|
735
|
taco
|
3
|
1920/04/22
|
you
|
10
|
JaMES
|
M$$ax%%well
|
875
|
taco
|
3
|
1923/03/12
|
down
|
# Put your db credentials here
db = op.connect(
driver="redis",
host="165.227.196.70",
port = 6379,
database= 1,
password = "")
db.df_to_table(df, "hola1", redis_primary_key="id")
INFO:optimus:`id`,`firstName`,`lastName`,`billingId`,`product`,`price`,`birth`,`dummyCol` column(s) was not processed because is/are not array,vector INFO:optimus:Outputting 0 columns after filtering. Is this expected? INFO:optimus:Using 'column_exp' to process column 'id' with function _cast_to INFO:optimus:Using 'column_exp' to process column 'firstName' with function _cast_to INFO:optimus:Using 'column_exp' to process column 'lastName' with function _cast_to INFO:optimus:Using 'column_exp' to process column 'billingId' with function _cast_to INFO:optimus:Using 'column_exp' to process column 'product' with function _cast_to INFO:optimus:Using 'column_exp' to process column 'price' with function _cast_to INFO:optimus:Using 'column_exp' to process column 'birth' with function _cast_to INFO:optimus:Using 'column_exp' to process column 'dummyCol' with function _cast_to
hola1
# https://stackoverflow.com/questions/56707978/how-to-write-from-a-pyspark-dstream-to-redis
db.table_to_df(0)
--------------------------------------------------------------------------- TypeError Traceback (most recent call last) <ipython-input-33-b3e61395c772> in <module> 1 # https://stackoverflow.com/questions/56707978/how-to-write-from-a-pyspark-dstream-to-redis 2 ----> 3 db.table_to_df(0) ~\Documents\Optimus\optimus\io\jdbc.py in table_to_df(self, table_name, columns, limit) 122 123 db_table = table_name --> 124 query = self.driver_context.count_query(db_table=db_table) 125 if limit == "all": 126 count = self.execute(query, "all").first()[0] ~\Documents\Optimus\optimus\io\driver_context.py in count_query(self, *args, **kwargs) 31 32 def count_query(self, *args, **kwargs) -> str: ---> 33 return self._driver.count_query(*args, **kwargs) ~\Documents\Optimus\optimus\io\sqlserver.py in count_query(self, *args, **kwargs) 24 25 def count_query(self, *args, **kwargs) -> str: ---> 26 return "SELECT COUNT(*) as COUNT FROM " + kwargs["db_table"] TypeError: can only concatenate str (not "int") to str