import numpy as np import pandas as pd try: import hidrokit except ModuleNotFoundError: !pip install git+https://github.com/taruma/hidrokit.git@latest -q import hidrokit print(f'hidrokit version: {hidrokit.__version__}') !wget -O sample.xlsx "https://taruma.github.io/assets/hidrokit_dataset/data_daily_sample.xlsx" -q dataset_path = 'sample.xlsx' from hidrokit.contrib.taruma import hk88 _data = hk88.read_workbook(dataset_path, ['STA_A', 'STA_B', 'STA_C']) dataset = _data.infer_objects() dataset.info() dataset.head() def summary_station(dataset, column, ufunc, ufunc_col, n_days='MS'): grouped = [dataset.index.year, dataset.index.month] ufunc = ufunc if isinstance(ufunc, (list, tuple)) else (ufunc,) ufunc_col = (ufunc_col if isinstance(ufunc_col, (list, tuple)) else (ufunc_col,)) if len(ufunc) != len(ufunc_col): raise ValueError('length ufunc and ufunc_col are not matched.') if n_days.endswith("D") or n_days.endswith("MS") or n_days.endswith("M"): ix_month = [] val_month = [] for _, x in dataset[column].groupby(by=grouped): each_month = x.groupby(pd.Grouper(freq=n_days)).agg(ufunc) val_month.append(each_month.values) ix_month.append(each_month.index.to_numpy()) return pd.DataFrame( data=np.vstack(val_month), index=np.hstack(ix_month), columns=pd.MultiIndex.from_product([[column], ufunc_col]), ).rename_axis("DATE") else: summary = dataset[[column]].resample(n_days).agg(ufunc) summary.columns = pd.MultiIndex.from_product([[column], ufunc_col]) return summary def summary_all(dataset, ufunc, ufunc_col, columns=None, n_days='MS', verbose=False): res = [] columns = columns if columns is not None else list(dataset.columns) columns = columns if isinstance(columns, (list, tuple)) else [columns] for column in columns: if verbose: print('PROCESSING:', column) res.append( summary_station(dataset, column, ufunc, ufunc_col, n_days=n_days) ) return pd.concat(res, axis=1) # Fungsi buatan sendiri def n_rain(x): "Jumlah hari hujan" return (x > 0).sum() myfunc = [np.sum, n_rain, len] myfunc_col = ['sum', 'n_rain', 'n_days'] summary_station( dataset=dataset, column='STA_B', ufunc=myfunc, ufunc_col=myfunc_col) # Setiap 8 Hari summary_station( dataset=dataset, column='STA_B', ufunc=myfunc, ufunc_col=myfunc_col, n_days='8D') # Setiap 15 Hari summary_station( dataset=dataset, column='STA_C', ufunc=myfunc, ufunc_col=myfunc_col, n_days='15D') # Menggunakan fungsi yang lebih banyak def n_rain(x): "Jumlah hari hujan" return (x > 0).sum() def n_dry(x): "Jumlah hari kering" return np.logical_or(x == 0, x.isna()).sum() myfunc_all = [len, n_rain, n_dry, np.sum, np.mean, np.std] myfunc_all_col = ['n_days', 'n_rain', 'n_dry', 'SUM', 'MEAN', 'STD'] summary_all( dataset=dataset, ufunc=myfunc_all, ufunc_col=myfunc_all_col, n_days='7D') summary_all( dataset=dataset, columns=['STA_A', 'STA_C'], ufunc=myfunc_all, ufunc_col=myfunc_all_col, n_days='16D') summary_all( dataset=dataset, ufunc=myfunc_all, ufunc_col=myfunc_all_col, n_days='7D', verbose=True)