#!/usr/bin/env python # coding: utf-8 # In[54]: import pandas as pd import numpy as np # In[65]: arr = np.random.randn(100000) * 50 arr # In[66]: ref_q1 = pd.DataFrame(arr, columns=["value"]).quantile(0.1).value print("ref q1 is %s" % ref_q1) # In[67]: arr = np.random.permutation(arr) sub_groups = [] while len(arr) > 0: num_rows = len(arr) if len(arr) <= 1000 else np.random.randint(100, 1000) print("will take %s rows from %s" % (num_rows, len(arr))) sub_group = arr[0:num_rows] sub_groups.append(sub_group) arr = arr[num_rows:] # In[68]: computed = [] for group in sub_groups: df = pd.DataFrame(group, columns=["value"]) computed.append([len(group), df.quantile(0.1).value, df.quantile(0.5).value]) df = pd.DataFrame(computed, columns=["population", "q1", "q5"]) q1 = (df.q1*df.population).sum() / df.population.sum() print("df.population.sum() is %s" % df.population.sum()) print("computed q1 is %s" % q1) # In[69]: a = [1,2,3,4,5,6,7] b = a[0:3] c = a[3:] print("b is %s and c is %s" % (b,c)) # In[ ]: