👋 Hi, Are you in Binder?
In Binder you can easily run Optimus. If you're not, you may want visit the link below
from optimus import Optimus
op = Optimus("pandas")
Create a dataframe to passing a list of values for each column.
df = op.create.dataframe({
"words": [" I like fish ", " zombies", "simpsons cat lady", None],
"num": [1, 2, 2, 3],
"animals": ["dog", "cat", "frog", "eagle"],
"thing": ["housé", "tv", "table", "glass"],
"two strings": ["cat-car", "dog-tv", "eagle-tv-plus", "lion-pc"],
"filter": ["a", "b", "1", "c"],
"num 2": ["1", "2", "3", "4"],
"col_array": [["baby", "sorry"], ["baby 1", "sorry 1"], ["baby 2", "sorry 2"], ["baby 3", "sorry 3"]],
"col_int": [[1, 2, 3], [3, 4], [5, 6, 7], [7, 8]]
})
df.display()
Creating a dataframe by passing a list of tuples specifyng the column data type.
df = op.create.dataframe({
("words", "str"): [" I like fish ", " zombies", "simpsons cat lady", None],
("num", "int"): [1, 2, 2, 3],
("animals", "str"): ["dog", "cat", "frog", "eagle"],
("thing", "str"): ["housé", "tv", "table", "glass"],
("two strings", "str"): ["cat-car", "dog-tv", "eagle-tv-plus", "lion-pc"],
("filter", "str"): ["a", "b", "1", "c"],
("num 2", "string"): ["1", "2", "3", "4"],
"col_array": [["baby", "sorry"], ["baby 1", "sorry 1"], ["baby 2", "sorry 2"], ["baby 3", "sorry 3"]],
"col_int": [[1, 2, 3], [3, 4], [5, 6, 7], [7, 8]]
})
df.display()
Creating an Optimus dataframe using a pandas dataframe
import pandas as pd
data = [("bumbl#ebéé ", 17.5, "Espionage", 7),
("Optim'us", 28.0, "Leader", 10),
("ironhide&", 26.0, "Security", 7)]
labels = ["names", "height", "function", "rank"]
pdf = pd.DataFrame.from_records(data, columns=labels)
df = op.create.dataframe(dfd=pdf)
df.display()
df = op.load.file("https://raw.githubusercontent.com/hi-primus/optimus/develop-21.8/examples/data/foo.csv")
df.display()
Here is how to view the first 20 elements in a dataframe
df.display(20)
Display in plain text using print
df.print(5)
To transform data you can use operations like upper
to transform the text data to uppercases or rename
to rename a column.
df.display()
df.cols.rename("firstName", "name").display(highlight="name")
df.cols.upper("lastName").display(highlight="lastName")
The past transformations were done step by step, but this can be achieved by chaining all operations into one line of code, like the cell below.
df.display()
df \
.cols.rename("billingId", "billing") \
.cols.drop(["id", "dummyCol"]) \
.cols.append({"zeros": 0}) \
.cols.sort(order="desc") \
.cols.upper("product") \
.display()
Delete repeated rows
df.rows.drop_duplicated("product").display()
Replace repeated values
df.set.duplicated("product", "N/A").display(highlight="product")
Profile of the dataframe
df.profile("*", bins=3) # "*" = select all columns