Berdasarkan isu #87: perhitungan debit andalan sungai dengan kurva durasi debit
Referensi isu:
Deskripsi Permasalahan:
Strategi Penyelesaian Masalah:
pandas.DataFrame
dengan jumlah observasi sembarang. Sehingga, penggunaan fungsi akan ditentukan.list/array
yang merupakan nilai (kumulatif) probabilitas.Catatan:
try:
import hidrokit
except:
!pip install git+https://github.com/hidrokit/hidrokit.git@244-refactor-hk87
# Unduh dataset
!wget -O data.csv "https://taruma.github.io/assets/hidrokit_dataset/data_sni_67382015.csv" -q
FILE = 'data.csv'
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Import dataset
dataset = pd.read_csv(FILE, index_col=0, header=0, parse_dates=True)
dataset.info()
dataset.head()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 240 entries, 1982-01-31 to 2001-12-31 Data columns (total 1 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 debit 240 non-null float64 dtypes: float64(1) memory usage: 3.8 KB
debit | |
---|---|
1982-01-31 | 118.0 |
1982-02-28 | 63.9 |
1982-03-31 | 77.2 |
1982-04-30 | 155.0 |
1982-05-31 | 39.6 |
import numpy as np
import pandas as pd
from hidrokit.contrib.taruma.utils import deprecated
def calculate_weibull_probability(shape, scale):
"""
Calculate the Weibull probability.
Parameters:
shape (float): The shape parameter of the Weibull distribution.
scale (float): The scale parameter of the Weibull distribution.
Returns:
float: The calculated Weibull probability.
"""
return shape / (scale + 1) * 100
@deprecated("calculate_weibull_probability")
def prob_weibull(m, n):
"""Calculate Weibull probability"""
return calculate_weibull_probability(m, n)
def _array_weibull(n):
return np.array([calculate_weibull_probability(i, n) for i in range(1, n + 1)])
def _fdc_xy(df):
n = len(df.index)
x = _array_weibull(n)
y = df.sort_values(ascending=False).values
return x, y
def _interpolate(probability, x, y):
return {p: np.interp(p, x, y) for p in probability}
def dependable_flow(dataframe, column_name, return_type='table', probabilities=None):
"""
Calculate the dependable flow values based on a given dataframe and column name.
Parameters:
dataframe (pd.DataFrame): The input dataframe.
column_name (str): The name of the column in the dataframe.
return_type (str, optional): The type of the return value. Default is 'table'.
Possible values are 'array', 'prob', and 'table'.
probabilities (list, optional): The list of probabilities to calculate. Default is None.
Only applicable when return_type is 'prob'.
Returns:
If return_type is 'array', returns a tuple of x_values and y_values.
If return_type is 'prob', returns the interpolated values based on the given probabilities.
If return_type is None / 'table', returns a pandas DataFrame with the following columns:
- index: The index of the dataframe sorted in descending order.
- rank: The rank of each value in the dataframe.
- probability: The x-values.
- data: The y-values.
"""
probabilities = [80, 90, 95] if probabilities is None else probabilities
x_values, y_values = _fdc_xy(dataframe.loc[:, column_name])
if return_type.lower() == "array":
return x_values, y_values
if return_type.lower() == "prob":
return _interpolate(probabilities, x_values, y_values)
data = {
"index": dataframe.loc[:, column_name].sort_values(ascending=False).index,
"rank": list(range(1, len(dataframe.index) + 1)),
"probability": x_values,
"data": y_values,
}
return pd.DataFrame(data)
@deprecated("dependable_flow")
def debit_andal(df, column, kind="table", prob=None):
"""Calculate dependable flow based on SNI 6738:2015"""
return dependable_flow(df, column, kind, prob)
def monthly_dependable_flow(df, column, **kwargs):
"""
Calculate the monthly dependable flow for a given DataFrame and column.
Parameters:
- df: DataFrame
The input DataFrame containing the data.
- column: str
The name of the column in the DataFrame to calculate the dependable flow.
- **kwargs: additional keyword arguments
Additional arguments to be passed to the `dependable_flow` function.
Returns:
- dict
A dictionary containing the monthly dependable flow for each month (1-12).
"""
return {
m: dependable_flow(df[df.index.month == m], column, **kwargs)
for m in range(1, 13)
}
@deprecated("monthly_dependable_flow")
def debit_andal_bulanan(df, column, **kwargs):
"""Calculate monthly dependable flow based on SNI 6738:2015"""
return monthly_dependable_flow(df, column, **kwargs)
.debit_andal()
¶Pada fungsi ini terdapat parameter yang perlu diperhatikan selain df
dan column
yaitu kind
. Parameter kind
menentukan hasil keluaran dari fungsi. Berikut nilai yang diterima oleh parameter kind
:
'array'
: keluaran berupa tuple berisi dua np.array
yaitu x
(untuk sumbu x, probabilitas weibull) dan y
(untuk sumbu y, nilai debit yang telah diurutkan).'table'
(default) : keluaran berupa pandas.DataFrame
tabelaris yang berisikan kolom idx
(indeks/tanggal kejadian), rank
(ranking), prob
(probabilitas weibull), data
(nilai yang telah diurutkan).'prob'
: keluaran berupa dictionary dengan key sebagai nilai probabilitas dan value sebagai nilai data yang dicari. Nilai tersebut diperoleh menggunakan fungsi interpolasi dari numpy
yaitu np.interp()
.kind='array'
¶x, y = debit_andal(dataset, 'debit', kind='array')
print(f'len(x) = {len(x)}\tx[:5] = {x[:5]}')
print(f'len(y) = {len(y)}\ty[:5] = {y[:5]}')
len(x) = 240 x[:5] = [0.41493776 0.82987552 1.24481328 1.65975104 2.0746888 ] len(y) = 240 y[:5] = [226. 210. 194. 184. 184.]
kind='table'
(default)¶debit_andal(dataset, 'debit') # atau debit_andal(dataset, 'debit', kind='table')
index | rank | probability | data | |
---|---|---|---|---|
0 | 2001-11-30 | 1 | 0.414938 | 226.0 |
1 | 1986-03-31 | 2 | 0.829876 | 210.0 |
2 | 2001-04-30 | 3 | 1.244813 | 194.0 |
3 | 1996-11-30 | 4 | 1.659751 | 184.0 |
4 | 1988-01-31 | 5 | 2.074689 | 184.0 |
... | ... | ... | ... | ... |
235 | 1982-10-31 | 236 | 97.925311 | 6.2 |
236 | 1994-09-30 | 237 | 98.340249 | 6.0 |
237 | 1991-08-31 | 238 | 98.755187 | 5.7 |
238 | 1994-08-31 | 239 | 99.170124 | 5.3 |
239 | 1983-09-30 | 240 | 99.585062 | 2.2 |
240 rows × 4 columns
kind='prob'
¶Nilai probabilitas yang digunakan yaitu $Q_{80}$, $Q_{90}$, $Q_{95}$ atau [80, 90, 95
]
debit_andal(dataset, 'debit', kind='prob')
# atau debit_andal(dataset, 'debit', kind='prob', prob=[80, 90, 95])
{80: 27.12, 90: 13.330000000000005, 95: 7.249999999999974}
Contoh menggunakan nilai probabilitas yang berbeda [30, 35, 70, 85, 95]
.
debit_andal(dataset, 'debit', kind='prob', prob=[30, 35, 70, 85, 95])
{30: 103.7, 35: 96.165, 70: 40.469999999999985, 85: 19.1, 95: 7.249999999999974}
.debit_andal_bulanan()
¶Fungsi ini merupakan pengembangan lebih lanjut dari .debit_andal()
yang dapat digunakan untuk membuat kurva durasi debit per bulan. Fungsi .debit_andal_bulanan()
dapat menerima parameter yang sama dengan .debit_andal()
seperti kind
dan prob
.
Key pada hasil keluaran fungsi ini menunjukkan bulan, contoh: [1]
mengartikan bulan ke-1 (Januari).
bulanan = debit_andal_bulanan(dataset, 'debit')
print(f'keys = {bulanan.keys()}')
print(f'values = {type(bulanan[1])}')
# out: berupa dataframe karena nilai kind='table' (default) fungsi debit_andal()
keys = dict_keys([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) values = <class 'pandas.core.frame.DataFrame'>
Menampilkan tabel untuk bulan Maret (ke-3)
bulanan[3].head()
index | rank | probability | data | |
---|---|---|---|---|
0 | 1986-03-31 | 1 | 4.761905 | 210.0 |
1 | 1998-03-31 | 2 | 9.523810 | 174.0 |
2 | 1992-03-31 | 3 | 14.285714 | 173.0 |
3 | 1993-03-31 | 4 | 19.047619 | 164.0 |
4 | 1991-03-31 | 5 | 23.809524 | 155.0 |
Contoh menampilkan nilai $Q_{80}, Q_{85}, Q_{90}, Q_{95}$ untuk setiap bulan
bulanan_prob = debit_andal_bulanan(
dataset, 'debit', return_type='prob', probabilities=[80, 85, 90, 95]
)
for key, value in bulanan_prob.items():
print('Bulan ke-', key, ':\t', value, sep='')
Bulan ke-1: {80: 74.0, 85: 72.05499999999999, 90: 68.11, 95: 56.67999999999998} Bulan ke-2: {80: 66.16, 85: 63.93, 90: 57.42000000000001, 95: 52.04499999999999} Bulan ke-3: {80: 78.67999999999999, 85: 70.82499999999999, 90: 57.01000000000001, 95: 36.219999999999956} Bulan ke-4: {80: 102.0, 85: 93.945, 90: 74.61000000000001, 95: 44.47999999999993} Bulan ke-5: {80: 55.599999999999994, 85: 45.329999999999984, 90: 40.02, 95: 39.03} Bulan ke-6: {80: 33.339999999999996, 85: 28.319999999999993, 90: 24.270000000000003, 95: 14.589999999999979} Bulan ke-7: {80: 14.419999999999998, 85: 13.86, 90: 10.020000000000003, 95: 7.319999999999995} Bulan ke-8: {80: 7.039999999999999, 85: 6.359999999999999, 90: 5.760000000000001, 95: 5.3199999999999985} Bulan ke-9: {80: 6.92, 85: 6.475, 90: 6.04, 95: 2.3899999999999917} Bulan ke-10: {80: 10.019999999999998, 85: 8.86, 90: 6.5500000000000025, 95: 6.205} Bulan ke-11: {80: 36.279999999999994, 85: 24.249999999999986, 90: 14.920000000000007, 95: 10.01499999999999} Bulan ke-12: {80: 65.16, 85: 49.49999999999998, 90: 42.480000000000004, 95: 37.249999999999986}
from hidrokit.contrib.taruma import hk87
hk87.debit_andal(dataset, 'debit', kind='prob', prob=[30, 35, 70, 85, 95])
{30: 103.7, 35: 96.165, 70: 40.469999999999985, 85: 19.1, 95: 7.249999999999974}
- 20240414 - 1.1.0 / 0.5.0 - Refactor hk87
- 20191214 - 1.0.0 - Initial
Source code in this notebook is licensed under a MIT License. Data in this notebook is licensed under a Creative Common Attribution 4.0 International.