#!/usr/bin/env python # coding: utf-8 # In[1]: import pandas as pd import numpy as np # In[2]: # dataframe with one whole column with np.nan and numeric data; 0 included df1 = pd.DataFrame(data = [[1, np.nan, np.nan, 0], [np.nan, 2, np.nan, 0], [np.nan, np.nan, np.nan, 0], [np.nan, np.nan, np.nan, np.nan]], columns = ["col1", "col2", "col3", "col4"]) df1 # In[3]: # datatype of each of the series in dataframe df1.dtypes # In[4]: # here the default is bool_only = None, but our dataframe has only floats right now, # no column has boolean values, so the result matches the documentation df1.any(axis = "columns", skipna = False) # In[5]: # dataframe with one whole column as np.nan, numeric data; 0 replaced with True or False df2 = pd.DataFrame(data = [[1, np.nan, np.nan, True], [np.nan, 2, np.nan, True], [np.nan, np.nan, np.nan, False], [np.nan, np.nan, np.nan, np.nan]], columns = ["col1", "col2", "col3", "col4"]) df2 # In[6]: # Col4 Series as a whole is object print(df2.dtypes) print("\n") # indivdual elements are still their repective types print("col4 elements:", [type(each) for each in df2["col4"]]) # In[7]: # the result we are currently getting when bool_only = None (default) df2.any(axis = "columns", skipna = False) # In[8]: # the last index 3 which is completely None, now returns True df2.any(axis = "columns", skipna = False, bool_only = False) # In[9]: # we don't have any row that is completely Boolean so my understanding is this should result in empty series, # but it give result exactly as bool_only = False option df2.any(axis = "columns", skipna = False, bool_only = True) # In[10]: df2.any(axis = "index", skipna = False) # In[11]: df2.any(axis = "index", skipna = False, bool_only = False) # In[12]: df2.any(axis = "index", skipna = False, bool_only = True) # In[13]: # dataframe with np.nan, 0 and boolean df3 = pd.DataFrame(data = [[np.nan, np.nan, np.nan, True], [np.nan, np.nan, np.nan, False], [0, 0, 0, 0], [np.nan, np.nan, np.nan, np.nan], [False, False, False, False], [True, True, True, True]], columns = ["col1", "col2", "col3", "col4"]) df3 # In[14]: df3.dtypes # In[15]: df3.any(axis = "columns", skipna = False) # In[16]: df3.any(axis = "columns", skipna = False, bool_only = False) # In[17]: # I expected this to be empty but its giving exact result as above - This I can't really understand df3.any(axis = "columns", skipna = False, bool_only = True) # In[18]: df3.any(axis = "index", skipna = False) # In[19]: df3.any(axis = "index", skipna = False, bool_only = False) # In[20]: # no Series with all Bool type elements so it returns empty? but then why not in code cell 15? This is baffling!!!!! df3.any(axis = "index", skipna = False, bool_only = True)