Berdasarkan isu #60: Menentukan kriteria berdasarkan total nilai bulanan
Deskripsi permasalahan:
# import library
import pandas as pd
import numpy as np
# buat dataset random
SEED = 110891
np.random.seed(SEED)
date_index = pd.date_range('20000101', '20501231')
dataset = pd.DataFrame(
index=date_index,
data=np.random.rand(len(date_index), 3)*10,
columns='a b c'.split()
)
dataset.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 18628 entries, 2000-01-01 to 2050-12-31 Freq: D Data columns (total 3 columns): a 18628 non-null float64 b 18628 non-null float64 c 18628 non-null float64 dtypes: float64(3) memory usage: 582.1 KB
dataset.head()
a | b | c | |
---|---|---|---|
2000-01-01 | 2.877766 | 3.213378 | 2.609509 |
2000-01-02 | 6.087079 | 0.528814 | 2.181410 |
2000-01-03 | 7.788553 | 8.597564 | 3.401175 |
2000-01-04 | 7.952444 | 3.182324 | 1.600600 |
2000-01-05 | 1.721316 | 7.618591 | 5.018759 |
# menghitung total ch setiap bulannya
total_summary = dataset.groupby([dataset.index.year, dataset.index.month]).sum()
total_summary
a | b | c | ||
---|---|---|---|---|
2000 | 1 | 162.847727 | 156.897045 | 144.812644 |
2 | 146.883002 | 163.938908 | 125.541164 | |
3 | 149.917733 | 162.343038 | 153.770483 | |
4 | 151.332312 | 146.398578 | 131.572011 | |
5 | 148.824870 | 159.312776 | 178.204217 | |
6 | 142.645200 | 141.804644 | 162.177388 | |
7 | 170.140213 | 130.409430 | 191.273702 | |
8 | 170.068540 | 159.861622 | 179.001989 | |
9 | 178.699585 | 157.825042 | 144.558428 | |
10 | 145.530305 | 191.522354 | 178.904414 | |
11 | 128.861376 | 152.381631 | 121.446012 | |
12 | 128.531776 | 145.206447 | 160.007800 | |
2001 | 1 | 165.739167 | 150.058786 | 160.826603 |
2 | 141.511927 | 134.187962 | 130.801650 | |
3 | 135.688425 | 177.826575 | 173.841181 | |
4 | 167.878367 | 163.536799 | 167.083027 | |
5 | 159.875967 | 168.663616 | 120.514476 | |
6 | 155.665123 | 153.639577 | 153.308357 | |
7 | 142.746376 | 139.065767 | 147.011501 | |
8 | 142.728787 | 174.440000 | 148.623053 | |
9 | 116.060098 | 165.032239 | 153.688107 | |
10 | 168.868071 | 164.222337 | 170.667792 | |
11 | 140.969798 | 155.106032 | 121.242189 | |
12 | 123.714519 | 163.179190 | 166.616499 | |
2002 | 1 | 167.053233 | 155.069400 | 171.605847 |
2 | 120.012374 | 146.039734 | 153.264692 | |
3 | 183.218227 | 164.571052 | 154.357133 | |
4 | 154.822616 | 120.226232 | 102.963488 | |
5 | 169.889488 | 152.184870 | 142.519123 | |
6 | 151.163008 | 130.519755 | 151.762828 | |
... | ... | ... | ... | ... |
2048 | 7 | 148.672508 | 167.142954 | 164.381128 |
8 | 126.655985 | 148.710034 | 144.413049 | |
9 | 183.513775 | 114.640279 | 169.897208 | |
10 | 135.829907 | 151.918643 | 176.459592 | |
11 | 146.398529 | 125.322861 | 148.996056 | |
12 | 159.041660 | 156.619241 | 165.230088 | |
2049 | 1 | 157.052500 | 139.911295 | 145.751750 |
2 | 117.204951 | 135.909696 | 128.467320 | |
3 | 140.775834 | 172.049295 | 186.388653 | |
4 | 142.940225 | 156.801915 | 141.438030 | |
5 | 175.040427 | 157.488158 | 156.365536 | |
6 | 140.708484 | 133.383572 | 147.509383 | |
7 | 145.790755 | 165.487569 | 154.127876 | |
8 | 149.804165 | 169.480737 | 138.583017 | |
9 | 158.301834 | 164.360739 | 164.492783 | |
10 | 159.818715 | 141.081036 | 142.240236 | |
11 | 96.027429 | 154.389690 | 154.491933 | |
12 | 168.802017 | 140.951503 | 148.711046 | |
2050 | 1 | 175.387880 | 156.662521 | 203.628996 |
2 | 137.018246 | 133.811524 | 144.568716 | |
3 | 161.716170 | 143.697188 | 143.519191 | |
4 | 162.027600 | 135.305125 | 156.008888 | |
5 | 154.067237 | 161.674996 | 169.782035 | |
6 | 153.781478 | 158.463390 | 171.016871 | |
7 | 149.692955 | 171.071892 | 144.742615 | |
8 | 163.018816 | 161.417455 | 157.176567 | |
9 | 142.620717 | 171.070855 | 117.322726 | |
10 | 138.459144 | 137.613439 | 164.343386 | |
11 | 150.625937 | 147.692128 | 164.288570 | |
12 | 157.485470 | 184.238850 | 160.089807 |
612 rows × 3 columns
# Memilih nilai spesifik bulan dan tahun pada kolom tertentu
TAHUN, BULAN, KOLOM = 2000, 1, 'a'
total_summary.loc[(TAHUN, BULAN), KOLOM]
162.84772662835172
#
def total_each_month(x, column='ch', summary=None):
year = x['index'].year
month = x['index'].month
return summary.loc[(year, month), column]
def criteria(value):
if value < 130:
return "rendah"
elif value >= 130 and value <= 160:
return "sedang"
else:
return "tinggi"
KOLOM = 'a'
dataset['total_a'] = dataset.reset_index().apply(lambda x: total_each_month(x, column=KOLOM, summary=total_summary), axis=1).values
dataset['kategori'] = dataset['total_a'].apply(criteria)
dataset.head()
a | b | c | total_a | kategori | |
---|---|---|---|---|---|
2000-01-01 | 2.877766 | 3.213378 | 2.609509 | 162.847727 | tinggi |
2000-01-02 | 6.087079 | 0.528814 | 2.181410 | 162.847727 | tinggi |
2000-01-03 | 7.788553 | 8.597564 | 3.401175 | 162.847727 | tinggi |
2000-01-04 | 7.952444 | 3.182324 | 1.600600 | 162.847727 | tinggi |
2000-01-05 | 1.721316 | 7.618591 | 5.018759 | 162.847727 | tinggi |
dataset.sample(n=10, random_state=SEED)
a | b | c | total_a | kategori | |
---|---|---|---|---|---|
2048-11-11 | 1.223814 | 0.436503 | 5.906254 | 146.398529 | sedang |
2006-04-05 | 7.494172 | 3.334511 | 1.982660 | 161.010925 | tinggi |
2038-06-19 | 1.737448 | 1.266665 | 2.950612 | 130.366532 | sedang |
2022-06-14 | 7.999164 | 1.079437 | 4.436997 | 171.157052 | tinggi |
2012-10-24 | 9.006064 | 1.942224 | 0.583746 | 160.424030 | tinggi |
2007-06-10 | 0.790671 | 6.943135 | 1.963164 | 139.102825 | sedang |
2008-03-31 | 7.362242 | 3.070636 | 9.000031 | 143.855427 | sedang |
2004-08-20 | 6.696845 | 2.676529 | 3.607975 | 179.573750 | tinggi |
2014-12-28 | 1.035354 | 8.811785 | 0.589958 | 143.172699 | sedang |
2044-07-11 | 3.686524 | 8.440504 | 0.009692 | 151.676157 | sedang |
- 20190717 - 1.0.0 - Initial
Source code in this notebook is licensed under a MIT License. Data in this notebook is licensed under a Creative Common Attribution 4.0 International.