#!/usr/bin/env python # coding: utf-8 # # Explore Pandera # # https://pandera.readthedocs.io/en/stable/index.html # In[1]: import pandas as pd # data to validate df = pd.DataFrame({ "column1": [1, 4, 0, 10, 9], "column2": [-1.3, -1.4, -2.9, -10.1, -20.4], "column3": ["value_1", "value_2", "value_3", "value_2", "value_1"], }) df # ## "Quick" API # In[4]: schema = pa.DataFrameSchema({ "column1": pa.Column(int, checks=pa.Check.le(10)), "column2": pa.Column(float, checks=pa.Check.lt(-1.2)), "column3": pa.Column(str, checks=[ pa.Check.str_startswith("value_"), # define custom checks as functions that take a series as input and # outputs a boolean or boolean Series pa.Check(lambda s: s.str.split("_", expand=True).shape[1] == 2) ]), }) schema(df) # ## OO API # In[6]: import pandera as pa from pandera.typing import Series class Schema(pa.DataFrameModel): column1: Series[int] = pa.Field(le=10) column2: Series[float] = pa.Field(lt=-1.2) column3: Series[str] = pa.Field(str_startswith="value_") @pa.check("column3") def column_3_check(cls, series: Series[str]) -> Series[bool]: """Check that column3 values have two elements after being split with '_'""" return series.str.split("_", expand=True).shape[1] == 2 Schema.validate(df) # ## Load a LAS file # In[ ]: # In[ ]: # In[ ]: