import pandas as pd
import numpy as np
pd.__version__, np.__version__
# needs two steps
# one to assign the dataframe to a variable
df = pd.DataFrame({
'name':['john','david','anna'],
'country':['USA','UK',np.nan]
})
df
# another one to perform the filter
df[df['country']=='USA']
pd.DataFrame({
'name':['john','david','anna'],
'country':['USA','UK',np.nan]
}).query("country == 'USA'")
df = pd.DataFrame({
'name':['john','david','anna'],
'country':['USA','UK',np.nan]
})
df.query('country.isnull()')
import pandas as pd
import numpy as np
df = pd.DataFrame({
'name':['john','david','anna'],
'country':['USA','UK',np.nan],
'age':[23,45,45]
})
target_age = 45
df.query('age == @target_age')
import pandas as pd
df = pd.DataFrame({
'name':['john','david','anna'],
'country':['USA','UK', 'USA'],
'age':[23,45,45]
})
df
df.query("(name=='john') or (country=='UK')")
import pandas as pd
df = pd.DataFrame({
'name':['john','david','anna'],
'country':['USA','UK', 'USA'],
'age':[23,45,45]
})
df
df.query("(country=='USA') and (age==23)")
import pandas as pd
df = pd.DataFrame({
'name':['john','david','anna'],
'country':['USA','UK', 'USA'],
'age':[23,45,45]
})
df
names_array = ['john','anna']
df.query('name in @names_array')
import pandas as pd
df = pd.DataFrame({
'name':['john','david','anna'],
'country':['USA','UK', 'USA'],
'age':[23,45,45]
})
df
invalid_array = ['anna']
df.query('name not in @invalid_array')
import pandas as pd
df = pd.DataFrame({
'name':['john','david','anna'],
'country of birth':['USA','UK', 'USA'],
'age':[23,45,45]
})
df
df.query('`country of birth` == "UK"')
import pandas as pd
import numpy as np
df = pd.DataFrame({
'name':['john','david','anna'],
'country':['USA','UK',np.nan]
})
df
df.query('country.isnull()')
import pandas as pd
import numpy as np
df = pd.DataFrame({
'name':['john','david','anna'],
'country':['USA','UK',np.nan]
})
df
df.query('country.notnull()')
import pandas as pd
df = pd.DataFrame({
'col1':['foo','bar','baz','quux']
})
df
df.query('col1.str.contains("ba")')