Hi, this notebook will show you some column operation available in Optimus.
%load_ext autoreload
%autoreload 2
import sys
sys.path.append("..")
from optimus import Optimus
op = Optimus("pandas")
df = op.create.dataframe({
("words", "str", True): [' I like fish ', ' zombies', 'simpsons cat lady', None],
("num", "int", True): [1, 2, 2, 3],
("animals", "str", True): ['dog', 'cat', 'frog', 'eagle'],
("thing", "str", True): ['housé', 'tv', 'table', 'glass'],
("two strings", "str", True): ['cat-car', 'dog-tv', 'eagle-tv-plus', 'lion-pc'],
("filter", "str", True): ['a', 'b', '1', 'c'],
("num 2", "string", True): ['1', '2', '3', '4'],
("col_array", None, True): [['baby', 'sorry'], ['baby 1', 'sorry 1'], ['baby 2', 'sorry 2'], ['baby 3', 'sorry 3']],
("col_int", None, True): [[1, 2, 3], [3, 4], [5, 6, 7], [7, 8]]
})
df.display()
words
1 (object)
|
num
2 (int32)
|
animals
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (string)
|
col_array
8 (object)
|
col_int
9 (object)
|
---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
dog
|
housé
|
cat-car
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
⋅⋅⋅⋅zombies
|
2
|
cat
|
tv
|
dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
simpsons⋅⋅⋅cat⋅lady
|
2
|
frog
|
table
|
eagle-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
None
|
3
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
df = df.cols.set("new_col_1", 1)
df.display(highlight="new_col_1")
words
1 (object)
|
num
2 (int32)
|
animals
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (string)
|
col_array
8 (object)
|
col_int
9 (object)
|
new_col_1
10 (int64)
|
---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
dog
|
housé
|
cat-car
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
⋅⋅⋅⋅zombies
|
2
|
cat
|
tv
|
dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
simpsons⋅⋅⋅cat⋅lady
|
2
|
frog
|
table
|
eagle-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
None
|
3
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
df_col = op.create.dataframe({("NEW COLUMN", "str", True): ["q", "w", "e", "r"]})
df = df.cols.append(df_col)
df.display(highlight="NEW COLUMN")
words
1 (object)
|
num
2 (int32)
|
animals
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (string)
|
col_array
8 (object)
|
col_int
9 (object)
|
new_col_1
10 (int64)
|
NEW COLUMN
11 (object)
|
---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
dog
|
housé
|
cat-car
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
⋅⋅⋅⋅zombies
|
2
|
cat
|
tv
|
dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
simpsons⋅⋅⋅cat⋅lady
|
2
|
frog
|
table
|
eagle-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
None
|
3
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
df.cols.append({"col 2": 2.22, "col 3": 3}).display(highlight=["col 2", "col 3"])
words
1 (object)
|
num
2 (int32)
|
animals
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (string)
|
col_array
8 (object)
|
col_int
9 (object)
|
new_col_1
10 (int64)
|
NEW COLUMN
11 (object)
|
col 2
12 (float64)
|
col 3
13 (int64)
|
---|---|---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
dog
|
housé
|
cat-car
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
2.22
|
3
|
⋅⋅⋅⋅zombies
|
2
|
cat
|
tv
|
dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
2.22
|
3
|
simpsons⋅⋅⋅cat⋅lady
|
2
|
frog
|
table
|
eagle-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
2.22
|
3
|
None
|
3
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
2.22
|
3
|
df.cols.append({
"just test": "test",
"num 2": df['num'] * 2,
"more numbers": [1, 2, 3, 4]
}).cols.select(["num", "just test", "num 2", "more numbers"]).display()
num
1 (int32)
|
just test
2 (object)
|
num 2
3 (float64)
|
more numbers
4 (int64)
|
---|---|---|---|
1
|
test
|
2.0
|
1
|
2
|
test
|
4.0
|
2
|
2
|
test
|
4.0
|
3
|
3
|
test
|
6.0
|
4
|
display(df.cols.names())
columns = ["words", 1, "animals", 3]
df.cols.select(columns).display()
['words', 'num', 'animals', 'thing', 'two strings', 'filter', 'num 2', 'col_array', 'col_int', 'new_col_1', 'NEW COLUMN']
words
1 (object)
|
num
2 (int32)
|
animals
3 (object)
|
thing
4 (object)
|
---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
dog
|
housé
|
⋅⋅⋅⋅zombies
|
2
|
cat
|
tv
|
simpsons⋅⋅⋅cat⋅lady
|
2
|
frog
|
table
|
None
|
3
|
eagle
|
glass
|
df.cols.select("n.*", regex=True).display()
num
1 (int32)
|
num 2
2 (string)
|
new_col_1
3 (int64)
|
---|---|---|
1
|
1
|
1
|
2
|
2
|
1
|
2
|
3
|
1
|
3
|
4
|
1
|
df.cols.select(data_type="int").display()
num
1 (int32)
|
new_col_1
2 (int64)
|
---|---|
1
|
1
|
2
|
1
|
2
|
1
|
3
|
1
|
df.cols.rename('num', 'number').display(highlight="number")
words
1 (object)
|
number
2 (int32)
|
animals
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (string)
|
col_array
8 (object)
|
col_int
9 (object)
|
new_col_1
10 (int64)
|
NEW COLUMN
11 (object)
|
---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
dog
|
housé
|
cat-car
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
⋅⋅⋅⋅zombies
|
2
|
cat
|
tv
|
dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
simpsons⋅⋅⋅cat⋅lady
|
2
|
frog
|
table
|
eagle-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
None
|
3
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
df.cols.rename([('num', 'number'), ("animals", "gods")], str.upper).display(highlight=["NUMBER", "GODS"])
words
1 (object)
|
NUMBER
2 (int32)
|
GODS
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (string)
|
col_array
8 (object)
|
col_int
9 (object)
|
new_col_1
10 (int64)
|
NEW COLUMN
11 (object)
|
---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
dog
|
housé
|
cat-car
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
⋅⋅⋅⋅zombies
|
2
|
cat
|
tv
|
dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
simpsons⋅⋅⋅cat⋅lady
|
2
|
frog
|
table
|
eagle-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
None
|
3
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
df.cols.rename(str.lower).display(highlight="new column")
words
1 (object)
|
num
2 (int32)
|
animals
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (string)
|
col_array
8 (object)
|
col_int
9 (object)
|
new_col_1
10 (int64)
|
new column
11 (object)
|
---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
dog
|
housé
|
cat-car
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
⋅⋅⋅⋅zombies
|
2
|
cat
|
tv
|
dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
simpsons⋅⋅⋅cat⋅lady
|
2
|
frog
|
table
|
eagle-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
None
|
3
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
df.cols.rename(str.upper).display()
WORDS
1 (object)
|
NUM
2 (int32)
|
ANIMALS
3 (object)
|
THING
4 (object)
|
TWO STRINGS
5 (object)
|
FILTER
6 (object)
|
NUM 2
7 (string)
|
COL_ARRAY
8 (object)
|
COL_INT
9 (object)
|
NEW_COL_1
10 (int64)
|
NEW COLUMN
11 (object)
|
---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
dog
|
housé
|
cat-car
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
⋅⋅⋅⋅zombies
|
2
|
cat
|
tv
|
dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
simpsons⋅⋅⋅cat⋅lady
|
2
|
frog
|
table
|
eagle-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
None
|
3
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
This is a opinionated way to handle column casting. One of the first thing that every data cleaning process need to acomplish is define a data dictionary. Because of that we prefer to create a tuple like this:
df.cols().cast( [("words","str"), ("num","int"), ("animals","float"), ("thing","str")] )
df.cols.cast(columns=[("num", "str"), ("num 2", "int")]).cols.select(["num", "num 2"]).cols.dtypes()
{'num': 'object', 'num 2': 'int64'}
df.cols.cast("num", "str").cols.dtypes()
{'words': 'object', 'num': 'object', 'animals': 'object', 'thing': 'object', 'two strings': 'object', 'filter': 'object', 'num 2': 'string', 'col_array': 'object', 'col_int': 'object', 'new_col_1': 'int64', 'NEW COLUMN': 'object'}
df.cols.cast("*", "str").cols.dtypes()
{'words': 'object', 'num': 'object', 'animals': 'object', 'thing': 'object', 'two strings': 'object', 'filter': 'object', 'num 2': 'object', 'col_array': 'object', 'col_int': 'object', 'new_col_1': 'object', 'NEW COLUMN': 'object'}
df.cols.move("thing", "after", "words").display(highlight=["thing", "words"])
words
1 (object)
|
thing
2 (object)
|
num
3 (int32)
|
animals
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (string)
|
col_array
8 (object)
|
col_int
9 (object)
|
new_col_1
10 (int64)
|
NEW COLUMN
11 (object)
|
---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
housé
|
1
|
dog
|
cat-car
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
⋅⋅⋅⋅zombies
|
tv
|
2
|
cat
|
dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
simpsons⋅⋅⋅cat⋅lady
|
table
|
2
|
frog
|
eagle-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
None
|
glass
|
3
|
eagle
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
df.cols.sort().display()
animals
1 (object)
|
col_array
2 (object)
|
col_int
3 (object)
|
filter
4 (object)
|
NEW COLUMN
5 (object)
|
new_col_1
6 (int64)
|
num
7 (int32)
|
num 2
8 (string)
|
thing
9 (object)
|
two strings
10 (object)
|
words
11 (object)
|
---|---|---|---|---|---|---|---|---|---|---|
dog
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
a
|
q
|
1
|
1
|
1
|
housé
|
cat-car
|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
cat
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
b
|
w
|
1
|
2
|
2
|
tv
|
dog-tv
|
⋅⋅⋅⋅zombies
|
frog
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
1
|
2
|
3
|
table
|
eagle-tv-plus
|
simpsons⋅⋅⋅cat⋅lady
|
eagle
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
c
|
r
|
1
|
3
|
4
|
glass
|
lion-pc
|
None
|
df.cols.sort(order="desc").display()
words
1 (object)
|
two strings
2 (object)
|
thing
3 (object)
|
num 2
4 (string)
|
num
5 (int32)
|
new_col_1
6 (int64)
|
NEW COLUMN
7 (object)
|
filter
8 (object)
|
col_int
9 (object)
|
col_array
10 (object)
|
animals
11 (object)
|
---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
cat-car
|
housé
|
1
|
1
|
1
|
q
|
a
|
[1,⋅2,⋅3]
|
['baby',⋅'sorry']
|
dog
|
⋅⋅⋅⋅zombies
|
dog-tv
|
tv
|
2
|
2
|
1
|
w
|
b
|
[3,⋅4]
|
['baby⋅1',⋅'sorry⋅1']
|
cat
|
simpsons⋅⋅⋅cat⋅lady
|
eagle-tv-plus
|
table
|
3
|
2
|
1
|
e
|
1
|
[5,⋅6,⋅7]
|
['baby⋅2',⋅'sorry⋅2']
|
frog
|
None
|
lion-pc
|
glass
|
4
|
3
|
1
|
r
|
c
|
[7,⋅8]
|
['baby⋅3',⋅'sorry⋅3']
|
eagle
|
df.cols.drop("num").display()
words
1 (object)
|
animals
2 (object)
|
thing
3 (object)
|
two strings
4 (object)
|
filter
5 (object)
|
num 2
6 (string)
|
col_array
7 (object)
|
col_int
8 (object)
|
new_col_1
9 (int64)
|
NEW COLUMN
10 (object)
|
---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
dog
|
housé
|
cat-car
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
⋅⋅⋅⋅zombies
|
cat
|
tv
|
dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
simpsons⋅⋅⋅cat⋅lady
|
frog
|
table
|
eagle-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
None
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
df.cols.drop(["num", "words"]).display()
animals
1 (object)
|
thing
2 (object)
|
two strings
3 (object)
|
filter
4 (object)
|
num 2
5 (string)
|
col_array
6 (object)
|
col_int
7 (object)
|
new_col_1
8 (int64)
|
NEW COLUMN
9 (object)
|
---|---|---|---|---|---|---|---|---|
dog
|
housé
|
cat-car
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
cat
|
tv
|
dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
frog
|
table
|
eagle-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
cols and rows accessors are used to organize and encapsulate optimus
methods, it can be helpfull when you look at the code because every line is self explained.
The past transformations were done step by step, but this can be achieved by chaining all operations into one line of code, like the cell below. This way is much more efficient and scalable because it uses all optimization issues from the lazy evaluation approach.
df.display()
df \
.cols.rename([('num', 'number')]) \
.cols.drop(["number", "words"]) \
.cols.append({"col_10": 10}) \
.cols.sort(order="desc") \
.cols.upper("two strings") \
.display()
words
1 (object)
|
num
2 (int32)
|
animals
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (string)
|
col_array
8 (object)
|
col_int
9 (object)
|
new_col_1
10 (int64)
|
NEW COLUMN
11 (object)
|
---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
dog
|
housé
|
cat-car
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
⋅⋅⋅⋅zombies
|
2
|
cat
|
tv
|
dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
simpsons⋅⋅⋅cat⋅lady
|
2
|
frog
|
table
|
eagle-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
None
|
3
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
two strings
1 (object)
|
thing
2 (object)
|
num 2
3 (string)
|
new_col_1
4 (int64)
|
NEW COLUMN
5 (object)
|
filter
6 (object)
|
col_int
7 (object)
|
col_array
8 (object)
|
col_10
9 (int64)
|
animals
10 (object)
|
---|---|---|---|---|---|---|---|---|---|
CAT-CAR
|
housé
|
1
|
1
|
q
|
a
|
[1,⋅2,⋅3]
|
['baby',⋅'sorry']
|
10
|
dog
|
DOG-TV
|
tv
|
2
|
1
|
w
|
b
|
[3,⋅4]
|
['baby⋅1',⋅'sorry⋅1']
|
10
|
cat
|
EAGLE-TV-PLUS
|
table
|
3
|
1
|
e
|
1
|
[5,⋅6,⋅7]
|
['baby⋅2',⋅'sorry⋅2']
|
10
|
frog
|
LION-PC
|
glass
|
4
|
1
|
r
|
c
|
[7,⋅8]
|
['baby⋅3',⋅'sorry⋅3']
|
10
|
eagle
|
With unnest you can convert one column into multiple ones. it can hadle strings and arrays.
df.cols.select("two strings").cols.unnest("two strings", "-").display()
two strings
1 (object)
|
two strings_0
2 (object)
|
two strings_1
3 (object)
|
---|---|---|
cat-car
|
cat
|
car
|
dog-tv
|
dog
|
tv
|
eagle-tv-plus
|
eagle
|
tv-plus
|
lion-pc
|
lion
|
pc
|
df.cols.select("two strings").cols.unnest("two strings", "-", index=1).display()
two strings
1 (object)
|
two strings_1
2 (object)
|
---|---|
cat-car
|
car
|
dog-tv
|
tv
|
eagle-tv-plus
|
tv-plus
|
lion-pc
|
pc
|
df.cols.select("col_array").cols.unnest("col_array").display()
col_array
1 (object)
|
col_array_0
2 (object)
|
col_array_1
3 (object)
|
---|---|---|
['baby',⋅'sorry']
|
['baby',
|
'sorry']
|
['baby⋅1',⋅'sorry⋅1']
|
['baby
|
1',⋅'sorry⋅1']
|
['baby⋅2',⋅'sorry⋅2']
|
['baby
|
2',⋅'sorry⋅2']
|
['baby⋅3',⋅'sorry⋅3']
|
['baby
|
3',⋅'sorry⋅3']
|
df.cols.select("col_int").cols.unnest("col_int").display()
col_int
1 (object)
|
col_int_0
2 (object)
|
col_int_1
3 (object)
|
---|---|---|
[1,⋅2,⋅3]
|
[1,
|
2,⋅3]
|
[3,⋅4]
|
[3,
|
4]
|
[5,⋅6,⋅7]
|
[5,
|
6,⋅7]
|
[7,⋅8]
|
[7,
|
8]
|
df.cols.select("two strings").cols.unnest(["two strings"], splits=3, separator="-").display()
two strings
1 (object)
|
two strings_0
2 (object)
|
two strings_1
3 (object)
|
two strings_2
4 (object)
|
---|---|---|---|
cat-car
|
cat
|
car
|
None
|
dog-tv
|
dog
|
tv
|
None
|
eagle-tv-plus
|
eagle
|
tv
|
plus
|
lion-pc
|
lion
|
pc
|
None
|
df_fill = op.create.dataframe(A=[1.0, 2.0, op.nan, 4.0, 5.0], B=[op.nan, op.nan, 3.0, 4.0, 5.0], C=[1, op.nan, None, 2, 2])
df_fill.display()
A
1 (float64)
|
B
2 (float64)
|
C
3 (float64)
|
---|---|---|
1.0
|
nan
|
1.0
|
2.0
|
nan
|
nan
|
nan
|
3.0
|
nan
|
4.0
|
4.0
|
2.0
|
5.0
|
5.0
|
2.0
|
df_fill.cols.impute(["A", "B"], "continuous", "median")
A
1 (float64)
|
B
2 (float64)
|
C
3 (float64)
|
---|---|---|
1.0
|
4.0
|
1.0
|
2.0
|
4.0
|
nan
|
3.0
|
3.0
|
nan
|
4.0
|
4.0
|
2.0
|
5.0
|
5.0
|
2.0
|
df_fill.cols.impute(["C"], strategy="most_frequent")
A
1 (float64)
|
B
2 (float64)
|
C
3 (float64)
|
---|---|---|
1.0
|
nan
|
1.0
|
2.0
|
nan
|
2.0
|
nan
|
3.0
|
2.0
|
4.0
|
4.0
|
2.0
|
5.0
|
5.0
|
2.0
|
df
words
1 (object)
|
num
2 (int32)
|
animals
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (string)
|
col_array
8 (object)
|
col_int
9 (object)
|
new_col_1
10 (int64)
|
NEW COLUMN
11 (object)
|
---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
dog
|
housé
|
cat-car
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
⋅⋅⋅⋅zombies
|
2
|
cat
|
tv
|
dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
simpsons⋅⋅⋅cat⋅lady
|
2
|
frog
|
table
|
eagle-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
None
|
3
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
filter
¶Sometimes there are columns with numeric and string values together.
In order to solve this problem, set.numeric() function can be used to operate over just one of those types.
In the next example we replace ever number with a string "new string"
def func(val, arg):
return arg
df.set.numeric("filter", func, args="new string").display(highlight="filter")
words
1 (object)
|
num
2 (int32)
|
animals
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (string)
|
col_array
8 (object)
|
col_int
9 (object)
|
new_col_1
10 (int64)
|
NEW COLUMN
11 (object)
|
---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
dog
|
housé
|
cat-car
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
⋅⋅⋅⋅zombies
|
2
|
cat
|
tv
|
dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
simpsons⋅⋅⋅cat⋅lady
|
2
|
frog
|
table
|
eagle-tv-plus
|
new⋅string
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
None
|
3
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
Or you could pass the value directly
df.set.numeric("filter", "new string").display(highlight="filter")
words
1 (object)
|
num
2 (int32)
|
animals
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (string)
|
col_array
8 (object)
|
col_int
9 (object)
|
new_col_1
10 (int64)
|
NEW COLUMN
11 (object)
|
---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
dog
|
housé
|
cat-car
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
⋅⋅⋅⋅zombies
|
2
|
cat
|
tv
|
dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
simpsons⋅⋅⋅cat⋅lady
|
2
|
frog
|
table
|
eagle-tv-plus
|
new⋅string
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
None
|
3
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
def func(val):
return val + 20
df.cols.set(["num", "num 2"], func).display(highlight=["num", "num 2"])
words
1 (object)
|
num
2 (int32)
|
animals
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (int32)
|
col_array
8 (object)
|
col_int
9 (object)
|
new_col_1
10 (int64)
|
NEW COLUMN
11 (object)
|
---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
21
|
dog
|
housé
|
cat-car
|
a
|
21
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
⋅⋅⋅⋅zombies
|
22
|
cat
|
tv
|
dog-tv
|
b
|
22
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
simpsons⋅⋅⋅cat⋅lady
|
22
|
frog
|
table
|
eagle-tv-plus
|
1
|
22
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
None
|
23
|
eagle
|
glass
|
lion-pc
|
c
|
23
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
filter
is an integer¶df.rows.int("filter").display(highlight="filter")
words
1 (object)
|
num
2 (int32)
|
animals
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (string)
|
col_array
8 (object)
|
col_int
9 (object)
|
new_col_1
10 (int64)
|
NEW COLUMN
11 (object)
|
---|---|---|---|---|---|---|---|---|---|---|
simpsons⋅⋅⋅cat⋅lady
|
2
|
frog
|
table
|
eagle-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
num
is greater than 1¶df.rows.greater_than("num", 1).display(highlight="num")
words
1 (object)
|
num
2 (int32)
|
animals
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (string)
|
col_array
8 (object)
|
col_int
9 (object)
|
new_col_1
10 (int64)
|
NEW COLUMN
11 (object)
|
---|---|---|---|---|---|---|---|---|---|---|
⋅⋅⋅⋅zombies
|
2
|
cat
|
tv
|
dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
simpsons⋅⋅⋅cat⋅lady
|
2
|
frog
|
table
|
eagle-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
None
|
3
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
df.cols.set
¶def func(val, arg1, arg2):
return val + arg1 + arg2
df.cols.set("num_sum", func, args=[5, 6], default="num").display(highlight="num_sum")
words
1 (object)
|
num
2 (int32)
|
num_sum
3 (int32)
|
animals
4 (object)
|
thing
5 (object)
|
two strings
6 (object)
|
filter
7 (object)
|
num 2
8 (string)
|
col_array
9 (object)
|
col_int
10 (object)
|
new_col_1
11 (int64)
|
NEW COLUMN
12 (object)
|
---|---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
12
|
dog
|
housé
|
cat-car
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
⋅⋅⋅⋅zombies
|
2
|
13
|
cat
|
tv
|
dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
simpsons⋅⋅⋅cat⋅lady
|
2
|
13
|
frog
|
table
|
eagle-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
None
|
3
|
14
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
num
and num 2
are both greater than 2¶where = (df["num"] > 2) | (df["num 2"]> 2)
df.cols.set(["num", "num 2"], 10, where=where).display(highlight=["num", "num 2"])
words
1 (object)
|
num
2 (int64)
|
animals
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (int64)
|
col_array
8 (object)
|
col_int
9 (object)
|
new_col_1
10 (int64)
|
NEW COLUMN
11 (object)
|
---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
dog
|
housé
|
cat-car
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
⋅⋅⋅⋅zombies
|
2
|
cat
|
tv
|
dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
simpsons⋅⋅⋅cat⋅lady
|
10
|
frog
|
table
|
eagle-tv-plus
|
1
|
10
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
None
|
10
|
eagle
|
glass
|
lion-pc
|
c
|
10
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
df_null = op.create.dataframe(session=[1,1,1,1,1,1,1], timestamp=[1,2,3,4,5,6,6], id=[None, 5.0, op.nan, None, 10.0, op.nan, op.nan])
df_null
session
1 (int64)
|
timestamp
2 (int64)
|
id
3 (float64)
|
---|---|---|
1
|
1
|
nan
|
1
|
2
|
5.0
|
1
|
3
|
nan
|
1
|
4
|
nan
|
1
|
5
|
10.0
|
1
|
6
|
nan
|
1
|
6
|
nan
|
df_null.cols.count_nulls("id")
5
df_null.cols.count_nulls("*")
{'session': 0, 'timestamp': 0, 'id': 5}
df.cols.count_uniques("*")
{'words': 4, 'num': 3, 'animals': 4, 'thing': 4, 'two strings': 4, 'filter': 4, 'num 2': 4, 'col_array': 4, 'col_int': 4, 'new_col_1': 1, 'NEW COLUMN': 4}
df.display()
words
1 (object)
|
num
2 (int32)
|
animals
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (string)
|
col_array
8 (object)
|
col_int
9 (object)
|
new_col_1
10 (int64)
|
NEW COLUMN
11 (object)
|
---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
dog
|
housé
|
cat-car
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
⋅⋅⋅⋅zombies
|
2
|
cat
|
tv
|
dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
simpsons⋅⋅⋅cat⋅lady
|
2
|
frog
|
table
|
eagle-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
None
|
3
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
df_distinct = op.create.dataframe({
("words", "str"): [" I like fish ", " zombies", "simpsons cat lady", None, None],
("num", "int"): [1,2,2,3,0],
})
df_distinct
words
1 (object)
|
num
2 (int32)
|
---|---|
⋅⋅I⋅like⋅⋅⋅⋅fish⋅⋅
|
1
|
⋅⋅⋅⋅zombies
|
2
|
simpsons⋅⋅⋅cat⋅lady
|
2
|
None
|
3
|
None
|
0
|
df_distinct.rows.unique("num").print()
words num (object) (int32) -------------- --------- I like fish 1 zombies 2 3 0
df_zeros = df_distinct
df_zeros.cols.count_zeros("*")
{'words': 0, 'num': 1}
df.cols.dtypes('*')
{'words': 'object', 'num': 'int32', 'animals': 'object', 'thing': 'object', 'two strings': 'object', 'filter': 'object', 'num 2': 'string', 'col_array': 'object', 'col_int': 'object', 'new_col_1': 'int64', 'NEW COLUMN': 'object'}
df.display(highlight="animals")
words
1 (object)
|
num
2 (int32)
|
animals
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (string)
|
col_array
8 (object)
|
col_int
9 (object)
|
new_col_1
10 (int64)
|
NEW COLUMN
11 (object)
|
---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
dog
|
housé
|
cat-car
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
⋅⋅⋅⋅zombies
|
2
|
cat
|
tv
|
dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
simpsons⋅⋅⋅cat⋅lady
|
2
|
frog
|
table
|
eagle-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
None
|
3
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
"dog"
and "cat"
in animals
by the string "animals"
¶df.cols.replace("animals", ["dog", "cat"], "animals").display(highlight="animals")
words
1 (object)
|
num
2 (int32)
|
animals
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (string)
|
col_array
8 (object)
|
col_int
9 (object)
|
new_col_1
10 (int64)
|
NEW COLUMN
11 (object)
|
---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
animals
|
housé
|
cat-car
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
⋅⋅⋅⋅zombies
|
2
|
animals
|
tv
|
dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
simpsons⋅⋅⋅cat⋅lady
|
2
|
frog
|
table
|
eagle-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
None
|
3
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
"dog-tv"
, "cat"
, "eagle"
and "fish"
in columns two strings
and animals
by the string "animals"
¶df.cols.replace(["two strings", "animals"], ["dog-tv", "cat", "eagle", "fish"], "animals").display(highlight=["animals", "two strings"])
words
1 (object)
|
num
2 (int32)
|
animals
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (string)
|
col_array
8 (object)
|
col_int
9 (object)
|
new_col_1
10 (int64)
|
NEW COLUMN
11 (object)
|
---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
dog
|
housé
|
animals-car
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
⋅⋅⋅⋅zombies
|
2
|
animals
|
tv
|
animals
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
simpsons⋅⋅⋅cat⋅lady
|
2
|
frog
|
table
|
animals-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
None
|
3
|
animals
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
"dog"
by "dog_1"
and "cat"
by "cat_1"
in animals
¶df.cols.replace("animals", [("dog", "dog_1"), ("cat", "cat_1")]).display(highlight="animals")
words
1 (object)
|
num
2 (int32)
|
animals
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (string)
|
col_array
8 (object)
|
col_int
9 (object)
|
new_col_1
10 (int64)
|
NEW COLUMN
11 (object)
|
---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
dog_1
|
housé
|
cat-car
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
⋅⋅⋅⋅zombies
|
2
|
cat_1
|
tv
|
dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
simpsons⋅⋅⋅cat⋅lady
|
2
|
frog
|
table
|
eagle-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
None
|
3
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
"dog"
by "pet"
in animals
¶df.cols.replace("animals", "dog", "pet").display(highlight="animals")
words
1 (object)
|
num
2 (int32)
|
animals
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (string)
|
col_array
8 (object)
|
col_int
9 (object)
|
new_col_1
10 (int64)
|
NEW COLUMN
11 (object)
|
---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
pet
|
housé
|
cat-car
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
⋅⋅⋅⋅zombies
|
2
|
cat
|
tv
|
dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
simpsons⋅⋅⋅cat⋅lady
|
2
|
frog
|
table
|
eagle-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
None
|
3
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
"a"
, "b"
and "c"
by "%"
in all columns¶df.cols.replace("*", ["a", "b", "c"], "%")
words
1 (object)
|
num
2 (object)
|
animals
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (object)
|
col_array
8 (object)
|
col_int
9 (object)
|
new_col_1
10 (object)
|
NEW COLUMN
11 (object)
|
---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
dog
|
housé
|
%%t-%%r
|
%
|
1
|
['%%%y',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
⋅⋅⋅⋅zom%ies
|
2
|
%%t
|
tv
|
dog-tv
|
%
|
2
|
['%%%y⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
simpsons⋅⋅⋅%%t⋅l%dy
|
2
|
frog
|
t%%le
|
e%gle-tv-plus
|
1
|
3
|
['%%%y⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
None
|
3
|
e%gle
|
gl%ss
|
lion-p%
|
%
|
4
|
['%%%y⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
3
and 2
by 10
in a numeric column¶df.cols.replace('num', [3, 2], 10, search_by="values").display(highlight="num")
words
1 (object)
|
num
2 (int64)
|
animals
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (string)
|
col_array
8 (object)
|
col_int
9 (object)
|
new_col_1
10 (int64)
|
NEW COLUMN
11 (object)
|
---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
dog
|
housé
|
cat-car
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
⋅⋅⋅⋅zombies
|
10
|
cat
|
tv
|
dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
simpsons⋅⋅⋅cat⋅lady
|
10
|
frog
|
table
|
eagle-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
None
|
10
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
3
by 6
and 2
by 12
in a numeric column¶df.cols.replace('num', [(3, 6), (2, 12)], search_by="values").display(highlight="num")
words
1 (object)
|
num
2 (int64)
|
animals
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (string)
|
col_array
8 (object)
|
col_int
9 (object)
|
new_col_1
10 (int64)
|
NEW COLUMN
11 (object)
|
---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
dog
|
housé
|
cat-car
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
⋅⋅⋅⋅zombies
|
12
|
cat
|
tv
|
dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
simpsons⋅⋅⋅cat⋅lady
|
12
|
frog
|
table
|
eagle-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
None
|
6
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
df.cols.replace("animals", "dog", "animal", search_by="words").display(highlight="animals")
words
1 (object)
|
num
2 (int32)
|
animals
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (string)
|
col_array
8 (object)
|
col_int
9 (object)
|
new_col_1
10 (int64)
|
NEW COLUMN
11 (object)
|
---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
animal
|
housé
|
cat-car
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
⋅⋅⋅⋅zombies
|
2
|
cat
|
tv
|
dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
simpsons⋅⋅⋅cat⋅lady
|
2
|
frog
|
table
|
eagle-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
None
|
3
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
df.cols.replace("animals", "dog", "animal", search_by="words", output_cols="friends").display(highlight=["animals", "friends"])
words
1 (object)
|
num
2 (int32)
|
animals
3 (object)
|
friends
4 (object)
|
thing
5 (object)
|
two strings
6 (object)
|
filter
7 (object)
|
num 2
8 (string)
|
col_array
9 (object)
|
col_int
10 (object)
|
new_col_1
11 (int64)
|
NEW COLUMN
12 (object)
|
---|---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
dog
|
animal
|
housé
|
cat-car
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
⋅⋅⋅⋅zombies
|
2
|
cat
|
cat
|
tv
|
dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
simpsons⋅⋅⋅cat⋅lady
|
2
|
frog
|
frog
|
table
|
eagle-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
None
|
3
|
eagle
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
df.cols.replace("*", "1", "11", search_by="chars")
words
1 (object)
|
num
2 (object)
|
animals
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (object)
|
col_array
8 (object)
|
col_int
9 (object)
|
new_col_1
10 (object)
|
NEW COLUMN
11 (object)
|
---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
11
|
dog
|
housé
|
cat-car
|
a
|
11
|
['baby',⋅'sorry']
|
[11,⋅2,⋅3]
|
11
|
q
|
⋅⋅⋅⋅zombies
|
2
|
cat
|
tv
|
dog-tv
|
b
|
2
|
['baby⋅11',⋅'sorry⋅11']
|
[3,⋅4]
|
11
|
w
|
simpsons⋅⋅⋅cat⋅lady
|
2
|
frog
|
table
|
eagle-tv-plus
|
11
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
11
|
e
|
None
|
3
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
11
|
r
|
df.cols.replace("animals", [("dog", "dog_1"), ("cat", "cat_1")], search_by="words").display(highlight="animals")
words
1 (object)
|
num
2 (int32)
|
animals
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (string)
|
col_array
8 (object)
|
col_int
9 (object)
|
new_col_1
10 (int64)
|
NEW COLUMN
11 (object)
|
---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
dog_1
|
housé
|
cat-car
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
⋅⋅⋅⋅zombies
|
2
|
cat_1
|
tv
|
dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
simpsons⋅⋅⋅cat⋅lady
|
2
|
frog
|
table
|
eagle-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
None
|
3
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
df.cols.replace("animals", ["dog", "cat"], "animals", "words").display(highlight="animals")
words
1 (object)
|
num
2 (int32)
|
animals
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (string)
|
col_array
8 (object)
|
col_int
9 (object)
|
new_col_1
10 (int64)
|
NEW COLUMN
11 (object)
|
---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
animals
|
housé
|
cat-car
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
⋅⋅⋅⋅zombies
|
2
|
animals
|
tv
|
dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
simpsons⋅⋅⋅cat⋅lady
|
2
|
frog
|
table
|
eagle-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
None
|
3
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
df.cols.replace_regex('*', '.*[Cc]at.*', 'contains cat')
words
1 (object)
|
num
2 (int32)
|
animals
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (string)
|
col_array
8 (object)
|
col_int
9 (object)
|
new_col_1
10 (int64)
|
NEW COLUMN
11 (object)
|
---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
dog
|
housé
|
contains⋅cat
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
⋅⋅⋅⋅zombies
|
2
|
contains⋅cat
|
tv
|
dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
contains⋅cat
|
2
|
frog
|
table
|
eagle-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
None
|
3
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
df.cols.nest(["animals", "two strings"], output_col="col_nested", separator="---").display(highlight=["animals", "two strings", "col_nested"])
words
1 (object)
|
num
2 (int32)
|
animals
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
col_nested
6 (object)
|
filter
7 (object)
|
num 2
8 (string)
|
col_array
9 (object)
|
col_int
10 (object)
|
new_col_1
11 (int64)
|
NEW COLUMN
12 (object)
|
---|---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
dog
|
housé
|
cat-car
|
dog---cat-car
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
⋅⋅⋅⋅zombies
|
2
|
cat
|
tv
|
dog-tv
|
cat---dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
simpsons⋅⋅⋅cat⋅lady
|
2
|
frog
|
table
|
eagle-tv-plus
|
frog---eagle-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
None
|
3
|
eagle
|
glass
|
lion-pc
|
eagle---lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
df.cols.copy("num", "num_1").cols.nest(["num", "num_1"], output_col="col_nested", shape="vector").display(highlight=["num", "num_1", "col_nested"])
words
1 (object)
|
num
2 (int32)
|
num_1
3 (int32)
|
col_nested
4 (object)
|
animals
5 (object)
|
thing
6 (object)
|
two strings
7 (object)
|
filter
8 (object)
|
num 2
9 (string)
|
col_array
10 (object)
|
col_int
11 (object)
|
new_col_1
12 (int64)
|
NEW COLUMN
13 (object)
|
---|---|---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
1
|
[1,⋅1]
|
dog
|
housé
|
cat-car
|
a
|
1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
⋅⋅⋅⋅zombies
|
2
|
2
|
[2,⋅2]
|
cat
|
tv
|
dog-tv
|
b
|
2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
simpsons⋅⋅⋅cat⋅lady
|
2
|
2
|
[2,⋅2]
|
frog
|
table
|
eagle-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
None
|
3
|
3
|
[3,⋅3]
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
df.cols.nest(["animals", "two strings", "num 2"], output_col="col_nested", shape="array").display(highlight=["animals", "two strings", "num 2", "col_nested"])
words
1 (object)
|
num
2 (int32)
|
animals
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (string)
|
col_nested
8 (object)
|
col_array
9 (object)
|
col_int
10 (object)
|
new_col_1
11 (int64)
|
NEW COLUMN
12 (object)
|
---|---|---|---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1
|
dog
|
housé
|
cat-car
|
a
|
1
|
['dog',⋅'cat-car',⋅'1']
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
1
|
q
|
⋅⋅⋅⋅zombies
|
2
|
cat
|
tv
|
dog-tv
|
b
|
2
|
['cat',⋅'dog-tv',⋅'2']
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
1
|
w
|
simpsons⋅⋅⋅cat⋅lady
|
2
|
frog
|
table
|
eagle-tv-plus
|
1
|
3
|
['frog',⋅'eagle-tv-plus',⋅'3']
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
1
|
e
|
None
|
3
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['eagle',⋅'lion-pc',⋅'4']
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
1
|
r
|
df = op.load.csv("https://raw.githubusercontent.com/ironmussa/Optimus/master/examples/data/foo.csv")
df
id
1 (int64)
|
firstName
2 (object)
|
lastName
3 (object)
|
billingId
4 (int64)
|
product
5 (object)
|
price
6 (int64)
|
birth
7 (object)
|
dummyCol
8 (object)
|
---|---|---|---|---|---|---|---|
1
|
Luis
|
Alvarez$$%!
|
123
|
Cake
|
10
|
1980/07/07
|
never
|
2
|
André
|
Ampère
|
423
|
piza
|
8
|
1950/07/08
|
gonna
|
3
|
NiELS
|
Böhr//((%%
|
551
|
pizza
|
8
|
1990/07/09
|
give
|
4
|
PAUL
|
dirac$
|
521
|
pizza
|
8
|
1954/07/10
|
you
|
5
|
Albert
|
Einstein
|
634
|
pizza
|
8
|
1990/07/11
|
up
|
6
|
Galileo
|
⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI
|
672
|
arepa
|
5
|
1930/08/12
|
never
|
7
|
CaRL
|
Ga%%%uss
|
323
|
taco
|
3
|
1970/07/13
|
gonna
|
8
|
David
|
H$$$ilbert
|
624
|
taaaccoo
|
3
|
1950/07/14
|
let
|
9
|
Johannes
|
KEPLER
|
735
|
taco
|
3
|
1920/04/22
|
you
|
10
|
JaMES
|
M$$ax%%well
|
875
|
taco
|
3
|
1923/03/12
|
down
|
11
|
Isaac
|
Newton
|
992
|
pasta
|
9
|
1999/02/15
|
never⋅
|
df.cols.hist("price", 10)
{'hist': {'price': [{'lower': 1.0, 'upper': 2.0, 'count': 1}, {'lower': 2.0, 'upper': 3.0, 'count': 1}, {'lower': 3.0, 'upper': 4.0, 'count': 4}, {'lower': 4.0, 'upper': 5.0, 'count': 1}, {'lower': 5.0, 'upper': 6.0, 'count': 2}, {'lower': 6.0, 'upper': 7.0, 'count': 0}, {'lower': 7.0, 'upper': 8.0, 'count': 0}, {'lower': 8.0, 'upper': 9.0, 'count': 6}, {'lower': 9.0, 'upper': 10.0, 'count': 4}]}}
df.cols.frequency("billingId")
{'frequency': {'billingId': {'values': [{'value': 672, 'count': 1}, {'value': 323, 'count': 1}, {'value': 912, 'count': 1}, {'value': 423, 'count': 1}, {'value': 521, 'count': 1}, {'value': 234, 'count': 1}, {'value': 875, 'count': 1}, {'value': 812, 'count': 1}, {'value': 111, 'count': 1}, {'value': 624, 'count': 1}, {'value': 467, 'count': 1}, {'value': 634, 'count': 1}, {'value': 992, 'count': 1}, {'value': 553, 'count': 1}, {'value': 116, 'count': 1}, {'value': 551, 'count': 1}, {'value': 123, 'count': 1}, {'value': 886, 'count': 1}, {'value': 735, 'count': 1}]}}}
display(df.cols.min("billingId"))
display(df.cols.percentile(['billingId', 'price'], [0.05, 0.25, 0.5, 0.75, 0.95]))
display(df.cols.max("billingId"))
display(df.cols.median(["billingId", "price"]))
display(df.cols.range(["billingId", "price"]))
display(df.cols.std(["billingId", "price"]))
111
{'billingId': {0.05: 115.5, 0.25: 373.0, 0.5: 553.0, 0.75: 773.5, 0.95: 919.9999999999998}, 'price': {0.05: 1.9, 0.25: 3.0, 0.5: 8.0, 0.75: 8.0, 0.95: 10.0}}
992
{'billingId': 553.0, 'price': 8.0}
{'billingId': {'min': 111.0, 'max': 992.0}, 'price': {'min': 1.0, 'max': 10.0}}
{'billingId': 280.19735108590083, 'price': 2.9528457876452054}
df.cols.min("*")
{'id': 1, 'firstName': '((( Heinrich )))))', 'lastName': ' GALiLEI', 'billingId': 111, 'product': '110790', 'price': 1, 'birth': '1899/01/01', 'dummyCol': '#'}
display(df.cols.kurtosis("billingId"))
display(df.cols.mean("billingId"))
display(df.cols.skew("billingId"))
display(df.cols.sum("billingId"))
display(df.cols.var("billingId"))
display(df.cols.mad("billingId"))
-0.9809401924348062
556.0
-0.23247289799632914
10564.0
78510.55555555556
230.0
df.cols.mad("price")
2.0
df.cols.mad("price", more=True)
{'price': {'mad': 2.0, 'median': 8.0}}
print(df.cols.percentile(['price'], [0.05, 0.25, 0.5, 0.75, 0.95]))
{'price': {0.05: 1.9, 0.25: 3.0, 0.5: 8.0, 0.75: 8.0, 0.95: 10.0}}
print(df.cols.mode(["price", "billingId"]))
{'price': 8.0, 'billingId': {0: 111.0, 1: 116.0, 2: 123.0, 3: 234.0, 4: 323.0, 5: 423.0, 6: 467.0, 7: 521.0, 8: 551.0, 9: 553.0, 10: 624.0, 11: 634.0, 12: 672.0, 13: 735.0, 14: 812.0, 15: 875.0, 16: 886.0, 17: 912.0, 18: 992.0}}
df
id
1 (int64)
|
firstName
2 (object)
|
lastName
3 (object)
|
billingId
4 (int64)
|
product
5 (object)
|
price
6 (int64)
|
birth
7 (object)
|
dummyCol
8 (object)
|
---|---|---|---|---|---|---|---|
1
|
Luis
|
Alvarez$$%!
|
123
|
Cake
|
10
|
1980/07/07
|
never
|
2
|
André
|
Ampère
|
423
|
piza
|
8
|
1950/07/08
|
gonna
|
3
|
NiELS
|
Böhr//((%%
|
551
|
pizza
|
8
|
1990/07/09
|
give
|
4
|
PAUL
|
dirac$
|
521
|
pizza
|
8
|
1954/07/10
|
you
|
5
|
Albert
|
Einstein
|
634
|
pizza
|
8
|
1990/07/11
|
up
|
6
|
Galileo
|
⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI
|
672
|
arepa
|
5
|
1930/08/12
|
never
|
7
|
CaRL
|
Ga%%%uss
|
323
|
taco
|
3
|
1970/07/13
|
gonna
|
8
|
David
|
H$$$ilbert
|
624
|
taaaccoo
|
3
|
1950/07/14
|
let
|
9
|
Johannes
|
KEPLER
|
735
|
taco
|
3
|
1920/04/22
|
you
|
10
|
JaMES
|
M$$ax%%well
|
875
|
taco
|
3
|
1923/03/12
|
down
|
11
|
Isaac
|
Newton
|
992
|
pasta
|
9
|
1999/02/15
|
never⋅
|
df \
.cols.trim("lastName") \
.cols.lower("lastName") \
.cols.upper(["product", "firstName"]) \
.cols.reverse("firstName") \
.display()
id
1 (int64)
|
firstName
2 (object)
|
lastName
3 (object)
|
billingId
4 (int64)
|
product
5 (object)
|
price
6 (int64)
|
birth
7 (object)
|
dummyCol
8 (object)
|
---|---|---|---|---|---|---|---|
1
|
SIUL
|
alvarez$$%!
|
123
|
CAKE
|
10
|
1980/07/07
|
never
|
2
|
ÉRDNA
|
ampère
|
423
|
PIZA
|
8
|
1950/07/08
|
gonna
|
3
|
SLEIN
|
böhr//((%%
|
551
|
PIZZA
|
8
|
1990/07/09
|
give
|
4
|
LUAP
|
dirac$
|
521
|
PIZZA
|
8
|
1954/07/10
|
you
|
5
|
TREBLA
|
einstein
|
634
|
PIZZA
|
8
|
1990/07/11
|
up
|
6
|
OELILAG
|
galilei
|
672
|
AREPA
|
5
|
1930/08/12
|
never
|
7
|
LRAC
|
ga%%%uss
|
323
|
TACO
|
3
|
1970/07/13
|
gonna
|
8
|
DIVAD
|
h$$$ilbert
|
624
|
TAAACCOO
|
3
|
1950/07/14
|
let
|
9
|
SENNAHOJ
|
kepler
|
735
|
TACO
|
3
|
1920/04/22
|
you
|
10
|
SEMAJ
|
m$$ax%%well
|
875
|
TACO
|
3
|
1923/03/12
|
down
|
11
|
CAASI
|
newton
|
992
|
PASTA
|
9
|
1999/02/15
|
never⋅
|
df.cols.iqr("price")
5.0
df.cols.iqr("price", more=True)
{'iqr': 5.0, 'q1': 3.0, 'q2': 8.0, 'q3': 8.0}
df.cols.years_between("birth", date_format="%Y/%m/%d", round=True).display(highlight="birth")
id
1 (int64)
|
firstName
2 (object)
|
lastName
3 (object)
|
billingId
4 (int64)
|
product
5 (object)
|
price
6 (int64)
|
birth
7 (float64)
|
dummyCol
8 (object)
|
---|---|---|---|---|---|---|---|
1
|
Luis
|
Alvarez$$%!
|
123
|
Cake
|
10
|
41.0
|
never
|
2
|
André
|
Ampère
|
423
|
piza
|
8
|
71.0
|
gonna
|
3
|
NiELS
|
Böhr//((%%
|
551
|
pizza
|
8
|
31.0
|
give
|
4
|
PAUL
|
dirac$
|
521
|
pizza
|
8
|
67.0
|
you
|
5
|
Albert
|
Einstein
|
634
|
pizza
|
8
|
31.0
|
up
|
6
|
Galileo
|
⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI
|
672
|
arepa
|
5
|
91.0
|
never
|
7
|
CaRL
|
Ga%%%uss
|
323
|
taco
|
3
|
51.0
|
gonna
|
8
|
David
|
H$$$ilbert
|
624
|
taaaccoo
|
3
|
71.0
|
let
|
9
|
Johannes
|
KEPLER
|
735
|
taco
|
3
|
101.0
|
you
|
10
|
JaMES
|
M$$ax%%well
|
875
|
taco
|
3
|
98.0
|
down
|
11
|
Isaac
|
Newton
|
992
|
pasta
|
9
|
22.0
|
never⋅
|
df.cols.remove("*", ["&", "%"]).display()
id
1 (object)
|
firstName
2 (object)
|
lastName
3 (object)
|
billingId
4 (object)
|
product
5 (object)
|
price
6 (object)
|
birth
7 (object)
|
dummyCol
8 (object)
|
---|---|---|---|---|---|---|---|
1
|
Luis
|
Alvarez$$!
|
123
|
Cake
|
10
|
1980/07/07
|
never
|
2
|
André
|
Ampère
|
423
|
piza
|
8
|
1950/07/08
|
gonna
|
3
|
NiELS
|
Böhr//((
|
551
|
pizza
|
8
|
1990/07/09
|
give
|
4
|
PAUL
|
dirac$
|
521
|
pizza
|
8
|
1954/07/10
|
you
|
5
|
Albert
|
Einstein
|
634
|
pizza
|
8
|
1990/07/11
|
up
|
6
|
Galileo
|
⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI
|
672
|
arepa
|
5
|
1930/08/12
|
never
|
7
|
CaRL
|
Gauss
|
323
|
taco
|
3
|
1970/07/13
|
gonna
|
8
|
David
|
H$$$ilbert
|
624
|
taaaccoo
|
3
|
1950/07/14
|
let
|
9
|
Johannes
|
KEPLER
|
735
|
taco
|
3
|
1920/04/22
|
you
|
10
|
JaMES
|
M$$axwell
|
875
|
taco
|
3
|
1923/03/12
|
down
|
11
|
Isaac
|
Newton
|
992
|
pasta
|
9
|
1999/02/15
|
never⋅
|
df.cols.normalize_chars("lastName").display(highlight="lastName")
id
1 (int64)
|
firstName
2 (object)
|
lastName
3 (object)
|
billingId
4 (int64)
|
product
5 (object)
|
price
6 (int64)
|
birth
7 (object)
|
dummyCol
8 (object)
|
---|---|---|---|---|---|---|---|
1
|
Luis
|
Alvarez$$%!
|
123
|
Cake
|
10
|
1980/07/07
|
never
|
2
|
André
|
Ampere
|
423
|
piza
|
8
|
1950/07/08
|
gonna
|
3
|
NiELS
|
Bohr//((%%
|
551
|
pizza
|
8
|
1990/07/09
|
give
|
4
|
PAUL
|
dirac$
|
521
|
pizza
|
8
|
1954/07/10
|
you
|
5
|
Albert
|
Einstein
|
634
|
pizza
|
8
|
1990/07/11
|
up
|
6
|
Galileo
|
⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI
|
672
|
arepa
|
5
|
1930/08/12
|
never
|
7
|
CaRL
|
Ga%%%uss
|
323
|
taco
|
3
|
1970/07/13
|
gonna
|
8
|
David
|
H$$$ilbert
|
624
|
taaaccoo
|
3
|
1950/07/14
|
let
|
9
|
Johannes
|
KEPLER
|
735
|
taco
|
3
|
1920/04/22
|
you
|
10
|
JaMES
|
M$$ax%%well
|
875
|
taco
|
3
|
1923/03/12
|
down
|
11
|
Isaac
|
Newton
|
992
|
pasta
|
9
|
1999/02/15
|
never⋅
|
df.cols.remove_special_chars("lastName").display(highlight="lastName")
id
1 (int64)
|
firstName
2 (object)
|
lastName
3 (object)
|
billingId
4 (int64)
|
product
5 (object)
|
price
6 (int64)
|
birth
7 (object)
|
dummyCol
8 (object)
|
---|---|---|---|---|---|---|---|
1
|
Luis
|
Alvarez
|
123
|
Cake
|
10
|
1980/07/07
|
never
|
2
|
André
|
Ampère
|
423
|
piza
|
8
|
1950/07/08
|
gonna
|
3
|
NiELS
|
Böhr
|
551
|
pizza
|
8
|
1990/07/09
|
give
|
4
|
PAUL
|
dirac
|
521
|
pizza
|
8
|
1954/07/10
|
you
|
5
|
Albert
|
Einstein
|
634
|
pizza
|
8
|
1990/07/11
|
up
|
6
|
Galileo
|
⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI
|
672
|
arepa
|
5
|
1930/08/12
|
never
|
7
|
CaRL
|
Gauss
|
323
|
taco
|
3
|
1970/07/13
|
gonna
|
8
|
David
|
Hilbert
|
624
|
taaaccoo
|
3
|
1950/07/14
|
let
|
9
|
Johannes
|
KEPLER
|
735
|
taco
|
3
|
1920/04/22
|
you
|
10
|
JaMES
|
Maxwell
|
875
|
taco
|
3
|
1923/03/12
|
down
|
11
|
Isaac
|
Newton
|
992
|
pasta
|
9
|
1999/02/15
|
never⋅
|
df.cols.clip("billingId", 100, 200).display(highlight="billingId")
id
1 (int64)
|
firstName
2 (object)
|
lastName
3 (object)
|
billingId
4 (float64)
|
product
5 (object)
|
price
6 (int64)
|
birth
7 (object)
|
dummyCol
8 (object)
|
---|---|---|---|---|---|---|---|
1
|
Luis
|
Alvarez$$%!
|
123.0
|
Cake
|
10
|
1980/07/07
|
never
|
2
|
André
|
Ampère
|
200.0
|
piza
|
8
|
1950/07/08
|
gonna
|
3
|
NiELS
|
Böhr//((%%
|
200.0
|
pizza
|
8
|
1990/07/09
|
give
|
4
|
PAUL
|
dirac$
|
200.0
|
pizza
|
8
|
1954/07/10
|
you
|
5
|
Albert
|
Einstein
|
200.0
|
pizza
|
8
|
1990/07/11
|
up
|
6
|
Galileo
|
⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI
|
200.0
|
arepa
|
5
|
1930/08/12
|
never
|
7
|
CaRL
|
Ga%%%uss
|
200.0
|
taco
|
3
|
1970/07/13
|
gonna
|
8
|
David
|
H$$$ilbert
|
200.0
|
taaaccoo
|
3
|
1950/07/14
|
let
|
9
|
Johannes
|
KEPLER
|
200.0
|
taco
|
3
|
1920/04/22
|
you
|
10
|
JaMES
|
M$$ax%%well
|
200.0
|
taco
|
3
|
1923/03/12
|
down
|
11
|
Isaac
|
Newton
|
200.0
|
pasta
|
9
|
1999/02/15
|
never⋅
|
df_abs = op.create.dataframe({
("words", "str"): [' I like fish ', ' zombies', 'simpsons cat lady', None],
("num", "int"): [-1, -2, -2, 3],
("animals", "str"): ['dog', 'cat', 'frog', 'eagle'],
("thing", "str"): ['housé', 'tv', 'table', 'glass'],
("two strings", "str"): ['cat-car', 'dog-tv', 'eagle-tv-plus', 'lion-pc'],
("filter", "str"): ['a', 'b', '1', 'c'],
("num 2", "string"): ['-1', '-2', '3', '4'],
("col_array", None): [['baby', 'sorry'],
['baby 1', 'sorry 1'],
['baby 2', 'sorry 2'],
['baby 3', 'sorry 3']],
("col_int", None): [[1, 2, 3], [3, 4], [5, 6, 7], [7, 8]],
})
df_abs
words
1 (object)
|
num
2 (int32)
|
animals
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (string)
|
col_array
8 (object)
|
col_int
9 (object)
|
---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
-1
|
dog
|
housé
|
cat-car
|
a
|
-1
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
⋅⋅⋅⋅zombies
|
-2
|
cat
|
tv
|
dog-tv
|
b
|
-2
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
simpsons⋅⋅⋅cat⋅lady
|
-2
|
frog
|
table
|
eagle-tv-plus
|
1
|
3
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
None
|
3
|
eagle
|
glass
|
lion-pc
|
c
|
4
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
df_abs.cols.abs(["num", "num 2"]).display(highlight=["num", "num 2"])
words
1 (object)
|
num
2 (float64)
|
animals
3 (object)
|
thing
4 (object)
|
two strings
5 (object)
|
filter
6 (object)
|
num 2
7 (float64)
|
col_array
8 (object)
|
col_int
9 (object)
|
---|---|---|---|---|---|---|---|---|
⋅⋅I⋅like⋅⋅⋅⋅⋅fish⋅⋅
|
1.0
|
dog
|
housé
|
cat-car
|
a
|
1.0
|
['baby',⋅'sorry']
|
[1,⋅2,⋅3]
|
⋅⋅⋅⋅zombies
|
2.0
|
cat
|
tv
|
dog-tv
|
b
|
2.0
|
['baby⋅1',⋅'sorry⋅1']
|
[3,⋅4]
|
simpsons⋅⋅⋅cat⋅lady
|
2.0
|
frog
|
table
|
eagle-tv-plus
|
1
|
3.0
|
['baby⋅2',⋅'sorry⋅2']
|
[5,⋅6,⋅7]
|
None
|
3.0
|
eagle
|
glass
|
lion-pc
|
c
|
4.0
|
['baby⋅3',⋅'sorry⋅3']
|
[7,⋅8]
|
df.cols.qcut("billingId", quantiles=5).display(highlight="billingId")
id
1 (int64)
|
firstName
2 (object)
|
lastName
3 (object)
|
billingId
4 (category)
|
product
5 (object)
|
price
6 (int64)
|
birth
7 (object)
|
dummyCol
8 (object)
|
---|---|---|---|---|---|---|---|
1
|
Luis
|
Alvarez$$%!
|
(110.999,⋅287.4]
|
Cake
|
10
|
1980/07/07
|
never
|
2
|
André
|
Ampère
|
(287.4,⋅527.0]
|
piza
|
8
|
1950/07/08
|
gonna
|
3
|
NiELS
|
Böhr//((%%
|
(527.0,⋅632.0]
|
pizza
|
8
|
1990/07/09
|
give
|
4
|
PAUL
|
dirac$
|
(287.4,⋅527.0]
|
pizza
|
8
|
1954/07/10
|
you
|
5
|
Albert
|
Einstein
|
(632.0,⋅837.2]
|
pizza
|
8
|
1990/07/11
|
up
|
6
|
Galileo
|
⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅⋅GALiLEI
|
(632.0,⋅837.2]
|
arepa
|
5
|
1930/08/12
|
never
|
7
|
CaRL
|
Ga%%%uss
|
(287.4,⋅527.0]
|
taco
|
3
|
1970/07/13
|
gonna
|
8
|
David
|
H$$$ilbert
|
(527.0,⋅632.0]
|
taaaccoo
|
3
|
1950/07/14
|
let
|
9
|
Johannes
|
KEPLER
|
(632.0,⋅837.2]
|
taco
|
3
|
1920/04/22
|
you
|
10
|
JaMES
|
M$$ax%%well
|
(837.2,⋅992.0]
|
taco
|
3
|
1923/03/12
|
down
|
11
|
Isaac
|
Newton
|
(837.2,⋅992.0]
|
pasta
|
9
|
1999/02/15
|
never⋅
|