Berdasarkan isu #43: ask: ubah pivot table ke dataframe.
Deskripsi permasalahan (baca isu untuk lebih merinci):
Strategi penyelesaian:
import logging
# Get the root logger
logger = logging.getLogger()
# If the logger has handlers, remove them
if logger.hasHandlers():
logger.handlers.clear()
# Set the logging level and format
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logging.info("This is an info message")
2024-04-13 23:36:37,362 - INFO - This is an info message
import warnings
import functools
def deprecated(new_func_name):
"""
Decorator to mark a function as deprecated.
Parameters:
- new_func_name (str): The name of the new function that should be used instead.
Returns:
- wrapper (function): The decorated function.
Example:
@deprecated("new_function")
def old_function():
pass
The above example will generate a warning when `old_function` is called,
suggesting to use `new_function` instead.
"""
def decorator(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
warnings.warn(
f"{func.__name__} is deprecated, use {new_func_name} instead",
DeprecationWarning,
)
return func(*args, **kwargs)
return wrapper
return decorator
FILE_PATH = 'debit_bd_pamarayan_1998_2008.xls'
DRIVE_DATASET_PATH = '/content/'
SINGLE_DATASET = DRIVE_DATASET_PATH + FILE_PATH
from calendar import isleap
from collections import defaultdict
from pathlib import Path
from typing import Callable, Any, Dict, Union, List
import logging
import pandas as pd
import numpy as np
DROP_INDICES = [59, 60, 61, 123, 185, 278, 340]
DROP_INDICES_LEAP = [60, 61, 123, 185, 278, 340]
def _extract_years_from_excel(file_path: str) -> List[int]:
"""
Get a list of years from an Excel file.
Parameters:
file_path (str): The path to the Excel file.
Returns:
List[int]: A sorted list of years found in the Excel file.
"""
excel = pd.ExcelFile(file_path)
years = []
for sheet in excel.sheet_names:
if sheet.isdigit():
years.append(int(sheet))
return sorted(years)
def _get_pivot_from_excel(excel_file: str, year: int, data_format: str) -> pd.DataFrame:
"""
Get a pivot table from an Excel file.
Parameters:
excel_file (str): The path to the Excel file.
year (int): The year of the data to retrieve.
data_format (str): The format of the data to retrieve.
Returns:
pandas.DataFrame: The pivot table containing the data.
Raises:
ValueError: If the data format is unknown.
"""
# Map data formats to parameters
formats = {
"uma.debit": ("AN:AY", 16, 47),
"uma.hujan": ("B:M", 19, 50),
}
if data_format not in formats:
raise ValueError(f"Unknown data format: {data_format}")
usecols, start_row, end_row = formats[data_format]
# Read the Excel data
df = pd.read_excel(excel_file, sheet_name=str(year), header=None, usecols=usecols)
# Return the pivot
return df.iloc[start_row:end_row, :]
def _get_data_for_year(file_path: str, year: int, data_format: str) -> np.ndarray:
"""
Get data for a specific year from a file and return it as a single vector numpy array.
Parameters:
file_path (str): The path to the file.
year (int): The year for which to retrieve the data.
data_format (str): The format of the data.
Returns:
numpy.ndarray: The data for the specified year.
Raises:
ValueError: If the year is not a positive integer.
IOError: If the file cannot be read.
"""
if not isinstance(year, int) or year < 0:
raise ValueError("Year must be a positive integer.")
try:
pivot_table = _get_pivot_from_excel(
file_path, str(year), data_format=data_format
)
except Exception as e:
raise IOError("Could not read file: " + str(e)) from e
reshaped_data = pivot_table.melt().drop("variable", axis=1)
if isleap(year):
return reshaped_data["value"].drop(DROP_INDICES_LEAP).values
return reshaped_data["value"].drop(DROP_INDICES).values
def _get_data_all_year(
file_path: Union[str, Path], data_format: str, return_as_list: bool = False
) -> Union[List[np.ndarray], np.ndarray]:
"""
Get data for all years from a given file.
Args:
file_path (Union[str, Path]): The path to the file.
data_format (str): The format of the data.
return_as_list (bool, optional): Whether to return the data as a list of arrays.
Defaults to False.
Returns:
Union[List[np.ndarray], np.ndarray]: The data for all years.
Raises:
FileNotFoundError: If the file does not exist.
"""
file_path = Path(file_path)
if not file_path.exists():
raise FileNotFoundError(f"No such file or directory: '{file_path}'")
list_years = _extract_years_from_excel(file_path)
data_each_year = []
for year in list_years:
data = _get_data_for_year(file_path, year=year, data_format=data_format)
data_each_year.append(data)
if return_as_list:
return data_each_year
return np.hstack(data_each_year)
def _get_invalid_elements_indices(
num_array: Any, validation_func: Callable[[Any], Any]
) -> Dict[str, List[int]]:
"""
Returns a dictionary containing the indices of invalid elements in the given `num_array`.
Parameters:
- num_array (array-like): The array containing the elements to be validated.
- validation_func (function): The validation function to be applied to each element.
Returns:
- invalid_element_indices (defaultdict):
A defaultdict object containing the indices of invalid elements.
The keys of the dictionary represent the type of invalidity,
such as "NaN" for elements that are NaN, and
the values are lists of indices corresponding to each type of invalidity.
"""
invalid_element_indices: Dict[str, List[int]] = defaultdict(list)
for index, element in enumerate(num_array):
try:
result = validation_func(element)
if np.isnan(result):
invalid_element_indices["NaN"].append(index)
except ValueError:
invalid_element_indices[str(element)].append(index)
return invalid_element_indices
def have_invalid(array: List[Any], validation_func: Callable[[Any], Any]) -> bool:
"""
Check if the given array has any invalid elements based on the provided validation function.
Args:
array (list): The array to check for invalid elements.
validation_func (function): The validation function used
to determine if an element is invalid.
Returns:
bool: True if the array has any invalid elements, False otherwise.
"""
return bool(_get_invalid_elements_indices(array, validation_func=validation_func))
def _check_invalid(array, validation_func=float):
"""
Check if there are any invalid elements in the array.
Parameters:
array (iterable): The array to check.
validation_func (callable): The validation function to use.
Returns:
dict or None: A dictionary with the indices of invalid elements,
or None if there are no invalid elements.
"""
invalid_elements_indices = _get_invalid_elements_indices(
array, validation_func=validation_func
)
return invalid_elements_indices if invalid_elements_indices is not None else None
def read_folder(
dataset_path: str,
filename_pattern: str,
data_format: str,
station_name_prefix: str = "",
check_for_invalid_data: bool = False,
) -> Union[Dict[str, np.ndarray], Dict[str, np.ndarray], Dict[str, List[int]]]:
"""
Read files from a folder and extract data for each station.
Args:
dataset_path (str): The path to the dataset folder.
filename_pattern (str): The pattern to match the filenames.
data_format (str): The format of the data in the files.
station_name_prefix (str, optional): The prefix to add to the station names.
Defaults to "".
check_for_invalid_data (bool, optional): Whether to check for invalid data.
Defaults to False.
Returns:
dict: A dictionary containing the extracted data for each station.
If `check_for_invalid_data` is True,
it also returns a dictionary of invalid data for each station.
"""
dataset_path = Path(dataset_path)
all_files = list(dataset_path.rglob(filename_pattern))
total_files = len(all_files)
if total_files == 0:
logging.warning("No files found that match the pattern %s", filename_pattern)
return {}
logging.info("Found %d file(s)", total_files)
all_station_data = {}
invalid_data = {}
for counter, file in enumerate(dataset_path.glob(filename_pattern)):
logging.info(":: %4d:\t%s", counter, file.name)
station_name = station_name_prefix + "_".join(file.stem.split("_")[1:-2])
each_station_data = _get_data_all_year(file, data_format=data_format)
all_station_data[station_name] = each_station_data
if check_for_invalid_data:
invalid_data[station_name] = _check_invalid(each_station_data)
if check_for_invalid_data:
return all_station_data, invalid_data
return all_station_data
@deprecated("_extract_years_from_excel")
def _get_years(io: str) -> List[int]:
return _extract_years_from_excel(io)
@deprecated("_get_pivot_from_excel")
def _get_pivot(io, year, fmt):
return _get_pivot_from_excel(io, year, fmt)
@deprecated("_get_data_for_year")
def _get_data_oneyear(io, year, fmt):
return _get_data_for_year(io, year, fmt)
@deprecated("_get_data_all_year")
def _get_data_allyear(*args, **kwargs):
return _get_data_all_year(*args, **kwargs)
@deprecated("_get_invalid_elements_indices")
def _get_invalid(array, check):
return _get_invalid_elements_indices(array, validation_func=check)
@deprecated("have_invalid")
def _have_invalid(array, check):
return have_invalid(array, validation_func=check)
2024-04-13 23:36:37,620 - INFO - NumExpr defaulting to 2 threads.
_get_years()
¶Tujuan: Memperoleh list
tahun di dalam berkas excel.
_get_years(SINGLE_DATASET)
<ipython-input-2-23e254b88fe0>:27: DeprecationWarning: _get_years is deprecated, use _extract_years_from_excel instead warnings.warn(
[1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008]
_get_pivot()
¶Tujuan: memperoleh pivot table dari berkas excel
_get_pivot(SINGLE_DATASET, year=1998, fmt='uma.debit')
<ipython-input-2-23e254b88fe0>:27: DeprecationWarning: _get_pivot is deprecated, use _get_pivot_from_excel instead warnings.warn(
39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
16 | 0 | 0 | 90.12 | 74.6 | 160.28 | 22.1 | 19.96 | 24.82 | 490 | 56.36 | 158.84 | 54.58 |
17 | 0 | 0 | 97.9 | 126.44 | 167.16 | 318.52 | 21.46 | 25.22 | 11.36 | 52.38 | 79.96 | 56.45 |
18 | 0 | 0 | 88.9 | 128.08 | 132.3 | 105.66 | 21.74 | 139.18 | 54.06 | 19.16 | 58.98 | 26.7 |
19 | 0 | 0 | 90.3 | 126.72 | 202.8 | 99.38 | 22.5 | 63.68 | 78.68 | 22.46 | 283.04 | 84.02 |
20 | 0 | 0 | 210.06 | 74.36 | 80.4 | 173.96 | 21.9 | 173.82 | 14.54 | 135.92 | 166.48 | 81.28 |
21 | 0 | 0 | 82.9 | 78.08 | 204.54 | 84.58 | 24.94 | 170.98 | 78.28 | 89.18 | 20.9.46 | 82.28 |
22 | 0 | 0 | 274.42 | 120.18 | 88.12 | 84.06 | 24.5 | 19.46 | 165.48 | 83.58 | 238.98 | 50.18 |
23 | 0 | 0 | 216.36 | 75.18 | 77.48 | 24.82 | 23.62 | 19.86 | 168.18 | 80.6 | 81.46 | 166.02 |
24 | 0 | 0 | 86.84 | 30.38 | 207.28 | 23.26 | 24.02 | 20.82 | 22 | 20.62 | 79.52 | 166.42 |
25 | 0 | 0 | 88.68 | 92.98 | 206.4 | 18.66 | 25.82 | 21.42 | 84.04 | 19.52 | 75.92 | 84.38 |
26 | 0 | 0 | 80.8 | 30.38 | 284.3 | 51.1 | 26.26 | 23.78 | 21.74 | 16.56 | 44.92 | 51.4 |
27 | 0 | 0 | 84.3 | 158.7 | 351.76 | 20.7 | 20.4 | 27.76 | 58.86 | 82.62 | 255.4 | 24.62 |
28 | 0 | 0 | 82.3 | 348.8 | 83.44 | 19.96 | 17.62 | 58.14 | 21.52 | 84.02 | 161.98 | 24.62 |
29 | 0 | 0 | 120.96 | 77.46 | 233.52 | 20.36 | 15.34 | 116.7 | 19.84 | 21.74 | 133.68 | 70 |
30 | 0 | 0 | 123.28 | 77.32 | 81.78 | 83.12 | 18.34 | 115.82 | 23.1 | 80.98 | 52.42 | 68.3 |
31 | 0 | 0 | 131.06 | 128.6 | 81.98 | 22.24 | 19.3 | 24.42 | 14.84 | 20 | 85.36 | 68.9 |
32 | 0 | 0 | 74.86 | 161.92 | 82.18 | 19.74 | 80.94 | 22.82 | 20.38 | 18.75 | 55.5 | 25.22 |
33 | 0 | 0 | 159.66 | 388.18 | 84.3 | 19.02 | 167.76 | 21.44 | 20.28 | 275.19 | 266.18 | 27.32 |
34 | 0 | 0 | 69.68 | 123.62 | 86.56 | 17.76 | 133.76 | 24.38 | 20.14 | 252.96 | 162.38 | 25.9 |
35 | 0 | 0 | 68.36 | 129.34 | 22.46 | 49.6 | 162.44 | 24.62 | 15.48 | 63.58 | 166.58 | 26.06 |
36 | 0 | 0 | 73.56 | 157.76 | 20.74 | 54.42 | 94.98 | 111.78 | 22.8 | 77.27 | 133.56 | 70.32 |
37 | 0 | 0 | 74.64 | 129.98 | 83.54 | 87.22 | 172.52 | 20.22 | 19.46 | 81.04 | 133 | 121.32 |
38 | 0 | 0 | 84.96 | 14.76 | 15.08 | 22.54 | 134.14 | 22.78 | 19.64 | 49.5 | 81.76 | 69.6 |
39 | 0 | 0 | 376.54 | 84.9 | 13.92 | 19.96 | 82.94 | 24.68 | 57.28 | 18.42 | 133.98 | 139.83 |
40 | 0 | 0 | 126.76 | 74.76 | 64.24 | 85.24 | 23.16 | 83.28 | 23.18 | 15.52 | 48.16 | 88.9 |
41 | 0 | 0 | 128.22 | 76.88 | 19.96 | 212.46 | 22.98 | 22.28 | 21.62 | 120.14 | 84.25 | 417 |
42 | 0 | 0 | 125.46 | 124.6 | 21.84 | 84.7 | 84.62 | 22.22 | 19.46 | 213.97 | 20.62 | 164.75 |
43 | 0 | 0 | 444.62 | 133.6 | 96.9 | 83.58 | 24.04 | 162.64 | 17.1 | 367.19 | 20.78 | 253.8 |
44 | 0 | NaN | 154.68 | 154.96 | 113.88 | 19.94 | 21.82 | 13.08 | 14.3 | 104.2 | 21.38 | 77.54 |
45 | 0 | NaN | 11.68 | 348.8 | 85.06 | 17.58 | 22.58 | 170.36 | 14.44 | 78.65 | 24.8 | 157.76 |
46 | 0 | NaN | 77.12 | NaN | 93.24 | NaN | 63.56 | 84.68 | NaN | 80.67 | NaN | 309.58 |
_get_data_oneyear()
¶Tujuan: Memperoleh data dari sheet tunggal dan disajikan dalam bentuk array
1D.
_get_data_oneyear(SINGLE_DATASET, year=1998, fmt='uma.debit')
<ipython-input-2-23e254b88fe0>:27: DeprecationWarning: _get_data_oneyear is deprecated, use _get_data_for_year instead warnings.warn(
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 90.12, 97.9, 88.9, 90.3, 210.06, 82.9, 274.42, 216.36, 86.84, 88.68, 80.8, 84.3, 82.3, 120.96, 123.28, 131.06, 74.86, 159.66, 69.68, 68.36, 73.56, 74.64, 84.96, 376.54, 126.76, 128.22, 125.46, 444.62, 154.68, 11.68, 77.12, 74.6, 126.44, 128.08, 126.72, 74.36, 78.08, 120.18, 75.18, 30.38, 92.98, 30.38, 158.7, 348.8, 77.46, 77.32, 128.6, 161.92, 388.18, 123.62, 129.34, 157.76, 129.98, 14.76, 84.9, 74.76, 76.88, 124.6, 133.6, 154.96, 348.8, 160.28, 167.16, 132.3, 202.8, 80.4, 204.54, 88.12, 77.48, 207.28, 206.4, 284.3, 351.76, 83.44, 233.52, 81.78, 81.98, 82.18, 84.3, 86.56, 22.46, 20.74, 83.54, 15.08, 13.92, 64.24, 19.96, 21.84, 96.9, 113.88, 85.06, 93.24, 22.1, 318.52, 105.66, 99.38, 173.96, 84.58, 84.06, 24.82, 23.26, 18.66, 51.1, 20.7, 19.96, 20.36, 83.12, 22.24, 19.74, 19.02, 17.76, 49.6, 54.42, 87.22, 22.54, 19.96, 85.24, 212.46, 84.7, 83.58, 19.94, 17.58, 19.96, 21.46, 21.74, 22.5, 21.9, 24.94, 24.5, 23.62, 24.02, 25.82, 26.26, 20.4, 17.62, 15.34, 18.34, 19.3, 80.94, 167.76, 133.76, 162.44, 94.98, 172.52, 134.14, 82.94, 23.16, 22.98, 84.62, 24.04, 21.82, 22.58, 63.56, 24.82, 25.22, 139.18, 63.68, 173.82, 170.98, 19.46, 19.86, 20.82, 21.42, 23.78, 27.76, 58.14, 116.7, 115.82, 24.42, 22.82, 21.44, 24.38, 24.62, 111.78, 20.22, 22.78, 24.68, 83.28, 22.28, 22.22, 162.64, 13.08, 170.36, 84.68, 490, 11.36, 54.06, 78.68, 14.54, 78.28, 165.48, 168.18, 22, 84.04, 21.74, 58.86, 21.52, 19.84, 23.1, 14.84, 20.38, 20.28, 20.14, 15.48, 22.8, 19.46, 19.64, 57.28, 23.18, 21.62, 19.46, 17.1, 14.3, 14.44, 56.36, 52.38, 19.16, 22.46, 135.92, 89.18, 83.58, 80.6, 20.62, 19.52, 16.56, 82.62, 84.02, 21.74, 80.98, 20, 18.75, 275.19, 252.96, 63.58, 77.27, 81.04, 49.5, 18.42, 15.52, 120.14, 213.97, 367.19, 104.2, 78.65, 80.67, 158.84, 79.96, 58.98, 283.04, 166.48, '20.9.46', 238.98, 81.46, 79.52, 75.92, 44.92, 255.4, 161.98, 133.68, 52.42, 85.36, 55.5, 266.18, 162.38, 166.58, 133.56, 133, 81.76, 133.98, 48.16, 84.25, 20.62, 20.78, 21.38, 24.8, 54.58, 56.45, 26.7, 84.02, 81.28, 82.28, 50.18, 166.02, 166.42, 84.38, 51.4, 24.62, 24.62, 70, 68.3, 68.9, 25.22, 27.32, 25.9, 26.06, 70.32, 121.32, 69.6, 139.83, 88.9, 417, 164.75, 253.8, 77.54, 157.76, 309.58], dtype=object)
_get_data_oneyear(SINGLE_DATASET, year=1998, fmt='uma.debit').shape
<ipython-input-2-23e254b88fe0>:27: DeprecationWarning: _get_data_oneyear is deprecated, use _get_data_for_year instead warnings.warn(
(365,)
_get_data_allyear()
¶Tujuan: Memeroleh data dari seluruh sheet dan disajikan dalam bentuk array
1D.
_get_data_allyear(SINGLE_DATASET, 'uma.debit')
<ipython-input-2-23e254b88fe0>:27: DeprecationWarning: _get_data_allyear is deprecated, use _get_data_all_year instead warnings.warn(
array([0, 0, 0, ..., 20.14, 208.54, 208.14], dtype=object)
_get_data_allyear(SINGLE_DATASET, 'uma.debit').shape
<ipython-input-2-23e254b88fe0>:27: DeprecationWarning: _get_data_allyear is deprecated, use _get_data_all_year instead warnings.warn(
(4018,)
aslist=True
¶Tujuan: disajikan dalam bentuk list
array
untuk setiap tahunnya.
_get_data_allyear(SINGLE_DATASET, data_format='uma.debit', return_as_list=True)[:2]
<ipython-input-2-23e254b88fe0>:27: DeprecationWarning: _get_data_allyear is deprecated, use _get_data_all_year instead warnings.warn(
[array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 90.12, 97.9, 88.9, 90.3, 210.06, 82.9, 274.42, 216.36, 86.84, 88.68, 80.8, 84.3, 82.3, 120.96, 123.28, 131.06, 74.86, 159.66, 69.68, 68.36, 73.56, 74.64, 84.96, 376.54, 126.76, 128.22, 125.46, 444.62, 154.68, 11.68, 77.12, 74.6, 126.44, 128.08, 126.72, 74.36, 78.08, 120.18, 75.18, 30.38, 92.98, 30.38, 158.7, 348.8, 77.46, 77.32, 128.6, 161.92, 388.18, 123.62, 129.34, 157.76, 129.98, 14.76, 84.9, 74.76, 76.88, 124.6, 133.6, 154.96, 348.8, 160.28, 167.16, 132.3, 202.8, 80.4, 204.54, 88.12, 77.48, 207.28, 206.4, 284.3, 351.76, 83.44, 233.52, 81.78, 81.98, 82.18, 84.3, 86.56, 22.46, 20.74, 83.54, 15.08, 13.92, 64.24, 19.96, 21.84, 96.9, 113.88, 85.06, 93.24, 22.1, 318.52, 105.66, 99.38, 173.96, 84.58, 84.06, 24.82, 23.26, 18.66, 51.1, 20.7, 19.96, 20.36, 83.12, 22.24, 19.74, 19.02, 17.76, 49.6, 54.42, 87.22, 22.54, 19.96, 85.24, 212.46, 84.7, 83.58, 19.94, 17.58, 19.96, 21.46, 21.74, 22.5, 21.9, 24.94, 24.5, 23.62, 24.02, 25.82, 26.26, 20.4, 17.62, 15.34, 18.34, 19.3, 80.94, 167.76, 133.76, 162.44, 94.98, 172.52, 134.14, 82.94, 23.16, 22.98, 84.62, 24.04, 21.82, 22.58, 63.56, 24.82, 25.22, 139.18, 63.68, 173.82, 170.98, 19.46, 19.86, 20.82, 21.42, 23.78, 27.76, 58.14, 116.7, 115.82, 24.42, 22.82, 21.44, 24.38, 24.62, 111.78, 20.22, 22.78, 24.68, 83.28, 22.28, 22.22, 162.64, 13.08, 170.36, 84.68, 490, 11.36, 54.06, 78.68, 14.54, 78.28, 165.48, 168.18, 22, 84.04, 21.74, 58.86, 21.52, 19.84, 23.1, 14.84, 20.38, 20.28, 20.14, 15.48, 22.8, 19.46, 19.64, 57.28, 23.18, 21.62, 19.46, 17.1, 14.3, 14.44, 56.36, 52.38, 19.16, 22.46, 135.92, 89.18, 83.58, 80.6, 20.62, 19.52, 16.56, 82.62, 84.02, 21.74, 80.98, 20, 18.75, 275.19, 252.96, 63.58, 77.27, 81.04, 49.5, 18.42, 15.52, 120.14, 213.97, 367.19, 104.2, 78.65, 80.67, 158.84, 79.96, 58.98, 283.04, 166.48, '20.9.46', 238.98, 81.46, 79.52, 75.92, 44.92, 255.4, 161.98, 133.68, 52.42, 85.36, 55.5, 266.18, 162.38, 166.58, 133.56, 133, 81.76, 133.98, 48.16, 84.25, 20.62, 20.78, 21.38, 24.8, 54.58, 56.45, 26.7, 84.02, 81.28, 82.28, 50.18, 166.02, 166.42, 84.38, 51.4, 24.62, 24.62, 70, 68.3, 68.9, 25.22, 27.32, 25.9, 26.06, 70.32, 121.32, 69.6, 139.83, 88.9, 417, 164.75, 253.8, 77.54, 157.76, 309.58], dtype=object), array([660, 871, 908, 1140, 61, 62, 150.5, 134.22, 132.34, 164.94, 196.1, 156.44, 121.32, 218.44, 137.9, 85.22, 217.18, 138.41, 83.75, 300.35, 250.44, 137.77, 211.38, 302.38, 294.34, 195.72, 272.6, 165.26, 168.53, 80.68, 26.014, 3.06, 68.69, 152.5, 148.75, 242.66, 302.38, 192.35, 90, 282, 186, 251.6, 358.79, 86.44, 134.39, 114, 112, 62, 85.27, 25.26, 20.8, 119.72, 190, 114, 62, 209.58, 169.83, 475, 339, 376, 282, 188, 23.15, 23.15, 17.86, 75.91, 62, 188, 188, 58.14, 21.19, 25.7, 120.1, 0, 183.91, 117.48, 64.69, 14.27, 18.21, 132.88, 62, 62, 3.48, 3.19, 127.59, 24.95, 107.35, 61.5, 26.16, 15.14, 20.07, 15.31, 81.41, 19.35, 13.85, 133.89, 127.81, 164.34, 24.78, 142.46, 28.89, 88.7, 142.06, 27.1, 26.09, 28.87, 249.22, 51.36, 16.26, 26.3, 20.66, 17.78, 21.22, 20.44, 15.44, 20.08, 22.37, 20.88, 81.77, 137.64, 84.83, 53.3, 28.45, 24.04, 91.32, 88.07, 92.33, 88.77, 24.82, 22.72, 25.43, 24.43, 88.26, 139.22, 89.39, 171.22, 25.83, 25.86, 56.21, 23.58, 141.18, 89.92, 86.82, 28.2, 26.77, 25.68, 26.48, 90.61, 108.7, 89.36, 27.61, 15.5, 26.27, 25.64, 25.41, 23.45, 26.94, 91.06, 19.43, 24.12, 22.99, 24.22, 26.94, 24.08, 91.46, 89.08, 52.99, 24.44, 22.08, 21.52, 20.56, 18.72, 15.67, 69.62, 90.66, 46.66, 23.5, 13.8, 10.4, 22.9, 46.22, 21.52, 26.1, 21.12, 20.39, 15.78, 23.38, 138.63, 119.52, 37.43, 25.22, 25.87, 117.36, 87.34, 25.22, 138.24, 23.68, 43.39, 15.02, 28.46, 23.83, 87.52, 21.12, 27.38, 26.08, 22.88, 18.34, 11.16, 28.45, 14.82, 16.64, 13.32, 11.15, 15.1, 13.27, 11.75, 10.99, 10.35, 9.76, 9.62, 9.49, 8.58, 10.55, 92.33, 92.33, 23.21, 23.21, 18.92, 142.46, 27.99, 28.87, 15.82, 22.67, 16.79, 9.79, 9.6, 9.72, 55.83, 90.05, 143.45, 20.95, 24.48, 24.48, 23.18, 17.92, 12.7, 11.54, 10.79, 11.11, 7.1, 8.64, 9.99, 9.67, 137.01, 24.82, 19.4, 12.08, 13.56, 8.13, 8.03, 8.24, 10.97, 9.27, 9.55, 14.33, 21.25, 10.5, 12, 15.81, 173.13, 273.83, 106, 18.83, 83.98, 21.24, 20.23, 16.27, 59.05, 23.98, 25.83, 19.41, 81.77, 86.9, 84.58, 20.63, 84.77, 80.9, 24.42, 54.39, 188, 114, 62, 63, 148, 70, 62, 190, 171, 188, 146, 31, 0, 0, 0, 75.78, 72.57, 269.49, 153.97, 293.89, 115.73, 38.65, 10.82, 11, 74.76, 77.53, 23.93, 23.53, 21.15, 15.55, 22.1, 22.37, 20.67, 19.66, 21.6, 23.64, 20.88, 21.98, 89.37, 23.98, 25.04, 89.14, 25.93, 86.84, 22.18, 23.78, 24.52, 25.47, 89.77, 75.8, 81.69, 87.93, 87.87, 19.12, 93.88, 122.28, 22.48, 119.88, 383.53, 90.38, 169.25, 209.91, 235.5, 80.27, 22.77, 92.54, 22.77, 22.77, 52.37, 209.7, 172.14, 210.77, 22.37, 57.37, 70.45, 167.25], dtype=object)]
_have_invalid()
¶Menjawab: Apakah array
1D ini memiliki data yang tidak bisa diubah ke dalam bentuk check
?
array = _get_data_allyear(SINGLE_DATASET, data_format='uma.debit')
print(f'shape = {array.shape}; type = {array.dtype}')
<ipython-input-2-23e254b88fe0>:27: DeprecationWarning: _get_data_allyear is deprecated, use _get_data_all_year instead warnings.warn(
shape = (4018,); type = object
_have_invalid(array, check=float)
<ipython-input-2-23e254b88fe0>:27: DeprecationWarning: _have_invalid is deprecated, use have_invalid instead warnings.warn(
True
_get_invalid()
¶Tujuan: Memperoleh nilai invalid beserta indexnya dalam bentuk dictionary
_get_invalid(array, check=float)
<ipython-input-2-23e254b88fe0>:27: DeprecationWarning: _get_invalid is deprecated, use _get_invalid_elements_indices instead warnings.warn(
defaultdict(list, {'20.9.46': [309], 'NaN': [789], 'tad': [2974]})
_check_invalid()
¶Tujuan: Memeriksa array
memiliki nilai invalid, jika iya, apa saja?
_check_invalid(array, validation_func=float)
defaultdict(list, {'20.9.46': [309], 'NaN': [789], 'tad': [2974]})
pd.DataFrame(_)
20.9.46 | NaN | tad | |
---|---|---|---|
0 | 309 | 789 | 2974 |
read_folder()
¶Tujuan: Membaca seluruh berkas excel di dalam folder yang mengikuti pola pattern
, dan membaca isi berkas, kemudian menggabungkan seluruh hasil bacaan dalam bentuk dictionary
.
data, invalid = read_folder(DRIVE_DATASET_PATH, filename_pattern='hujan_*', data_format='uma.hujan', station_name_prefix='h_', check_for_invalid_data=True)
2024-04-13 23:36:47,589 - INFO - Found 8 file(s) 2024-04-13 23:36:47,593 - INFO - :: 0: hujan_gardu_tanjak_1998_2008.xls 2024-04-13 23:36:49,840 - INFO - :: 1: hujan_gunung_tunggal_1998_2008.xls 2024-04-13 23:36:52,125 - INFO - :: 2: hujan_bd_pamarayan_1998_2008.xls 2024-04-13 23:36:53,982 - INFO - :: 3: hujan_pasir_ona_1998_2008.xls 2024-04-13 23:36:55,738 - INFO - :: 4: hujan_bojong_manik_1998_2008.xls 2024-04-13 23:36:58,895 - INFO - :: 5: hujan_sampang_peundeuy_1998_2008.xls 2024-04-13 23:37:00,712 - INFO - :: 6: hujan_ciminyak_cilaki_1998_2008.xls 2024-04-13 23:37:02,528 - INFO - :: 7: hujan_cimarga_1998_2008.xls
pd.DataFrame(invalid).T
- | NaN | |
---|---|---|
h_gardu_tanjak | [0, 1, 3, 4, 5, 8, 11, 12, 17, 19, 20, 22, 45,... | NaN |
h_gunung_tunggal | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 16,... | NaN |
h_bd_pamarayan | [0, 1, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15... | [211, 576] |
h_pasir_ona | [0, 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 14, 17, 18... | NaN |
h_bojong_manik | [0, 1, 3, 4, 5, 6, 8, 9, 11, 14, 16, 17, 19, 2... | [2558] |
h_sampang_peundeuy | [2, 3, 4, 5, 16, 17, 18, 19, 20, 21, 22, 24, 2... | [0, 1] |
h_ciminyak_cilaki | [0, 2, 3, 4, 5, 7, 8, 10, 11, 12, 13, 15, 17, ... | NaN |
h_cimarga | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14,... | NaN |
pd.DataFrame(data, index=pd.date_range('19980101', '20081231'))
h_gardu_tanjak | h_gunung_tunggal | h_bd_pamarayan | h_pasir_ona | h_bojong_manik | h_sampang_peundeuy | h_ciminyak_cilaki | h_cimarga | |
---|---|---|---|---|---|---|---|---|
1998-01-01 | - | - | - | - | - | NaN | - | - |
1998-01-02 | - | - | - | - | - | NaN | 7 | - |
1998-01-03 | 5 | - | 64 | - | 5 | - | - | - |
1998-01-04 | - | - | - | - | - | - | - | - |
1998-01-05 | - | - | - | - | - | - | - | - |
... | ... | ... | ... | ... | ... | ... | ... | ... |
2008-12-27 | 14 | 15.7 | - | 7 | 12 | 21 | - | 36 |
2008-12-28 | - | 19 | - | - | 10 | 15 | - | - |
2008-12-29 | 12 | 21.7 | 2 | 11 | 7 | 50 | - | 32 |
2008-12-30 | 10 | 17.5 | 4 | 21 | 6.05 | 22 | - | 28 |
2008-12-31 | 13 | 29 | 6 | 4 | - | 18 | - | 39 |
4018 rows × 8 columns
- 20240414 - 2.0.0 / 0.5.0 - Refactor hk43 (new function name & documentation)
- 20190926 - 1.0.0 - Initial
Source code in this notebook is licensed under a MIT License. Data in this notebook is licensed under a Creative Common Attribution 4.0 International.