import sys
sys.path.append("..")
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName('abc').getOrCreate()
df=spark.read.csv('data/foo.csv',header=True)
from optimus import Optimus
op = Optimus(spark)
<optimus.spark.Spark object at 0x0000024CCA0D7208>
df.table()
id
1 (string)
nullable
|
firstName
2 (string)
nullable
|
lastName
3 (string)
nullable
|
billingId
4 (string)
nullable
|
product
5 (string)
nullable
|
price
6 (string)
nullable
|
birth
7 (string)
nullable
|
dummyCol
8 (string)
nullable
|
---|---|---|---|---|---|---|---|
1
|
Luis
|
Alvarez$$%!
|
123
|
Cake
|
10
|
1980/07/07
|
never
|
2
|
André
|
Ampère
|
423
|
piza
|
8
|
1950/07/08
|
gonna
|
3
|
NiELS
|
Böhr//((%%
|
551
|
pizza
|
8
|
1990/07/09
|
give
|
4
|
PAUL
|
dirac$
|
521
|
pizza
|
8
|
1954/07/10
|
you
|
5
|
Albert
|
Einstein
|
634
|
pizza
|
8
|
1990/07/11
|
up
|
6
|
Galileo
|
⸱⸱⸱⸱⸱⸱⸱⸱⸱⸱⸱⸱⸱GALiLEI
|
672
|
arepa
|
5
|
1930/08/12
|
never
|
7
|
CaRL
|
Ga%%%uss
|
323
|
taco
|
3
|
1970/07/13
|
gonna
|
8
|
David
|
H$$$ilbert
|
624
|
taaaccoo
|
3
|
1950/07/14
|
let
|
9
|
Johannes
|
KEPLER
|
735
|
taco
|
3
|
1920/04/22
|
you
|
10
|
JaMES
|
M$$ax%%well
|
875
|
taco
|
3
|
1923/03/12
|
down
|
11
|
Isaac
|
Newton
|
992
|
pasta
|
9
|
1999/02/15
|
never⸱
|
12
|
Emmy%%
|
Nöether$
|
234
|
pasta
|
9
|
1993/12/08
|
gonna
|
13
|
Max!!!
|
Planck!!!
|
111
|
hamburguer
|
4
|
1994/01/04
|
run⸱
|
14
|
Fred
|
Hoy&&&le
|
553
|
pizzza
|
8
|
1997/06/27
|
around
|
15
|
(((⸱⸱⸱Heinrich⸱)))))
|
Hertz
|
116
|
pizza
|
8
|
1956/11/30
|
and
|
16
|
William
|
Gilbert###
|
886
|
BEER
|
2
|
1958/03/26
|
desert
|
17
|
Marie
|
CURIE
|
912
|
Rice
|
1
|
2000/03/22
|
you
|
18
|
Arthur
|
COM%%%pton
|
812
|
110790
|
5
|
1899/01/01
|
#
|
19
|
JAMES
|
Chadwick
|
467
|
null
|
10
|
1921/05/03
|
#
|