# If you modify Optimus or any library this code is going to reload it
%load_ext autoreload
%autoreload
# If you are in the example folder. This is the way to find optimus
import sys
sys.path.append("..")
# Create Optimus
from optimus import Optimus
op = Optimus()
You are using PySparkling of version 2.4.10, but your PySpark is of version 2.3.1. Please make sure Spark and PySparkling versions are compatible.
from pyspark.sql.types import StringType, IntegerType, ArrayType
df = op.create.df(
[
("words", "str", True),
("num", "int", True),
("animals", "str", True),
("thing", StringType(), True),
("two strings", StringType(), True),
("filter", StringType(), True),
("num 2", "string", True),
("col_array", ArrayType(StringType()), True),
("col_int", ArrayType(IntegerType()), True)
]
,
[
(" I like fish ", 1, "dog", "housé", "cat-car", "a", "1", ["baby", "sorry"], [1, 2, 3]),
(" zombies", 2, "cat", "tv", "dog-tv", "b", "2", ["baby 1", "sorry 1"], [3, 4]),
("simpsons cat lady", 2, "frog", "table", "eagle-tv-plus", "1", "3", ["baby 2", "sorry 2"], [5, 6, 7]),
(None, 3, "eagle", "glass", "lion-pc", "c", "4", ["baby 3", "sorry 3"], [7, 8])
])
# df.ext.display()
df.ext.display()
words
1 (string)
nullable
|
num
2 (int)
nullable
|
animals
3 (string)
nullable
|
thing
4 (string)
nullable
|
two strings
5 (string)
nullable
|
filter
6 (string)
nullable
|
num 2
7 (string)
nullable
|
col_array
8 (array<string>)
nullable
|
col_int
9 (array<int>)
nullable
|
---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
dog
|
housé
|
cat-car
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
⋅⋅⋅⋅zombies
|
2
|
cat
|
tv
|
dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
simpsons⋅⋅⋅cat⋅lady
|
2
|
frog
|
table
|
eagle-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
None
|
3
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|