import pandas as pd
import numpy as np
# dataframe with one whole column with np.nan and numeric data; 0 included
df1 = pd.DataFrame(data = [[1, np.nan, np.nan, 0],
[np.nan, 2, np.nan, 0],
[np.nan, np.nan, np.nan, 0],
[np.nan, np.nan, np.nan, np.nan]],
columns = ["col1", "col2", "col3", "col4"])
df1
col1 | col2 | col3 | col4 | |
---|---|---|---|---|
0 | 1.0 | NaN | NaN | 0.0 |
1 | NaN | 2.0 | NaN | 0.0 |
2 | NaN | NaN | NaN | 0.0 |
3 | NaN | NaN | NaN | NaN |
# datatype of each of the series in dataframe
df1.dtypes
col1 float64 col2 float64 col3 float64 col4 float64 dtype: object
# here the default is bool_only = None, but our dataframe has only floats right now,
# no column has boolean values, so the result matches the documentation
df1.any(axis = "columns", skipna = False)
0 True 1 True 2 True 3 True dtype: bool
# dataframe with one whole column as np.nan, numeric data; 0 replaced with True or False
df2 = pd.DataFrame(data = [[1, np.nan, np.nan, True],
[np.nan, 2, np.nan, True],
[np.nan, np.nan, np.nan, False],
[np.nan, np.nan, np.nan, np.nan]],
columns = ["col1", "col2", "col3", "col4"])
df2
col1 | col2 | col3 | col4 | |
---|---|---|---|---|
0 | 1.0 | NaN | NaN | True |
1 | NaN | 2.0 | NaN | True |
2 | NaN | NaN | NaN | False |
3 | NaN | NaN | NaN | NaN |
# Col4 Series as a whole is object
print(df2.dtypes)
print("\n")
# indivdual elements are still their repective types
print("col4 elements:", [type(each) for each in df2["col4"]])
col1 float64 col2 float64 col3 float64 col4 object dtype: object col4 elements: [<class 'bool'>, <class 'bool'>, <class 'bool'>, <class 'float'>]
# the result we are currently getting when bool_only = None (default)
df2.any(axis = "columns", skipna = False)
0 1 1 NaN 2 NaN 3 NaN dtype: object
# the last index 3 which is completely None, now returns True
df2.any(axis = "columns", skipna = False, bool_only = False)
0 True 1 True 2 True 3 True dtype: bool
# we don't have any row that is completely Boolean so my understanding is this should result in empty series,
# but it give result exactly as bool_only = False option
df2.any(axis = "columns", skipna = False, bool_only = True)
0 True 1 True 2 True 3 True dtype: bool
df2.any(axis = "index", skipna = False)
col1 1 col2 NaN col3 NaN col4 True dtype: object
df2.any(axis = "index", skipna = False, bool_only = False)
col1 True col2 True col3 True col4 True dtype: bool
df2.any(axis = "index", skipna = False, bool_only = True)
Series([], dtype: bool)
# dataframe with np.nan, 0 and boolean
df3 = pd.DataFrame(data = [[np.nan, np.nan, np.nan, True],
[np.nan, np.nan, np.nan, False],
[0, 0, 0, 0],
[np.nan, np.nan, np.nan, np.nan],
[False, False, False, False],
[True, True, True, True]],
columns = ["col1", "col2", "col3", "col4"])
df3
col1 | col2 | col3 | col4 | |
---|---|---|---|---|
0 | NaN | NaN | NaN | True |
1 | NaN | NaN | NaN | False |
2 | 0 | 0 | 0 | 0 |
3 | NaN | NaN | NaN | NaN |
4 | False | False | False | False |
5 | True | True | True | True |
df3.dtypes
col1 object col2 object col3 object col4 object dtype: object
df3.any(axis = "columns", skipna = False)
0 NaN 1 NaN 2 0 3 NaN 4 False 5 True dtype: object
df3.any(axis = "columns", skipna = False, bool_only = False)
0 True 1 True 2 False 3 True 4 False 5 True dtype: bool
# I expected this to be empty but its giving exact result as above - This I can't really understand
df3.any(axis = "columns", skipna = False, bool_only = True)
0 True 1 True 2 False 3 True 4 False 5 True dtype: bool
df3.any(axis = "index", skipna = False)
col1 NaN col2 NaN col3 NaN col4 True dtype: object
df3.any(axis = "index", skipna = False, bool_only = False)
col1 True col2 True col3 True col4 True dtype: bool
# no Series with all Bool type elements so it returns empty? but then why not in code cell 15? This is baffling!!!!!
df3.any(axis = "index", skipna = False, bool_only = True)
Series([], dtype: bool)