In [1]:
import pandas as pd
import numpy as np
In [2]:
pd.__version__, np.__version__
Out[2]:
('1.0.5', '1.19.0')
In [3]:
# needs two steps
# one to assign the dataframe to a variable
df = pd.DataFrame({
    'name':['john','david','anna'],
    'country':['USA','UK',np.nan]
})
df
Out[3]:
name country
0 john USA
1 david UK
2 anna NaN
In [4]:
# another one to perform the filter
df[df['country']=='USA']
Out[4]:
name country
0 john USA
In [5]:
pd.DataFrame({
    'name':['john','david','anna'],
    'country':['USA','UK',np.nan]
}).query("country == 'USA'")
Out[5]:
name country
0 john USA

query where is null

In [6]:
df = pd.DataFrame({
    'name':['john','david','anna'],
    'country':['USA','UK',np.nan]
})

df.query('country.isnull()')
Out[6]:
name country
2 anna NaN

query with python variable

In [7]:
import pandas as pd
import numpy as np

df = pd.DataFrame({
    'name':['john','david','anna'],
    'country':['USA','UK',np.nan],
    'age':[23,45,45]
})

target_age = 45

df.query('age == @target_age')
Out[7]:
name country age
1 david UK 45
2 anna NaN 45

OR operator

In [8]:
import pandas as pd

df = pd.DataFrame({
    'name':['john','david','anna'],
    'country':['USA','UK', 'USA'],
    'age':[23,45,45]
})
df
Out[8]:
name country age
0 john USA 23
1 david UK 45
2 anna USA 45
In [9]:
df.query("(name=='john') or (country=='UK')")
Out[9]:
name country age
0 john USA 23
1 david UK 45

AND operator

In [10]:
import pandas as pd

df = pd.DataFrame({
    'name':['john','david','anna'],
    'country':['USA','UK', 'USA'],
    'age':[23,45,45]
})
df
Out[10]:
name country age
0 john USA 23
1 david UK 45
2 anna USA 45
In [11]:
df.query("(country=='USA') and (age==23)")
Out[11]:
name country age
0 john USA 23

Column is in array

In [12]:
import pandas as pd

df = pd.DataFrame({
    'name':['john','david','anna'],
    'country':['USA','UK', 'USA'],
    'age':[23,45,45]
})
df
Out[12]:
name country age
0 john USA 23
1 david UK 45
2 anna USA 45
In [13]:
names_array = ['john','anna']

df.query('name in @names_array')
Out[13]:
name country age
0 john USA 23
2 anna USA 45

Column is not in array

In [14]:
import pandas as pd

df = pd.DataFrame({
    'name':['john','david','anna'],
    'country':['USA','UK', 'USA'],
    'age':[23,45,45]
})
df
Out[14]:
name country age
0 john USA 23
1 david UK 45
2 anna USA 45
In [15]:
invalid_array = ['anna']

df.query('name not in @invalid_array')
Out[15]:
name country age
0 john USA 23
1 david UK 45

Column name with spaces

In [16]:
import pandas as pd

df = pd.DataFrame({
    'name':['john','david','anna'],
    'country of birth':['USA','UK', 'USA'],
    'age':[23,45,45]
})
df
Out[16]:
name country of birth age
0 john USA 23
1 david UK 45
2 anna USA 45
In [17]:
df.query('`country of birth` == "UK"')
Out[17]:
name country of birth age
1 david UK 45

Where column is null

In [18]:
import pandas as pd
import numpy as np

df = pd.DataFrame({
    'name':['john','david','anna'],
    'country':['USA','UK',np.nan]
})

df
Out[18]:
name country
0 john USA
1 david UK
2 anna NaN
In [19]:
df.query('country.isnull()')
Out[19]:
name country
2 anna NaN

Where column is not null

In [20]:
import pandas as pd
import numpy as np

df = pd.DataFrame({
    'name':['john','david','anna'],
    'country':['USA','UK',np.nan]
})

df
Out[20]:
name country
0 john USA
1 david UK
2 anna NaN
In [21]:
df.query('country.notnull()')
Out[21]:
name country
0 john USA
1 david UK

like

In [25]:
import pandas as pd

df = pd.DataFrame({
    'col1':['foo','bar','baz','quux']
})

df
Out[25]:
col1
0 foo
1 bar
2 baz
3 quux
In [26]:
df.query('col1.str.contains("ba")')
Out[26]:
col1
1 bar
2 baz