import scipy.stats as ss
from sklearn.datasets import make_classification
from sklearn.model_selection import RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from collections.abc import Iterable, Callable
from typing import Union
import pandas as pd
import numpy as np
from abc import ABC, abstractmethod
X, y = make_classification()
parameters = {"n_estimators": {"distribution": "randint", "args": (10, 100)},
"min_weight_fraction_leaf": {"distribution": "norm", "args": (0.25, 0.01)},
"criterion": {"distribution": "choice", "args": ["gini", "entropy"]},
}
def make_distributions(parameters: dict[str, dict[str, Union[str, tuple, Iterable]]]) -> dict[str, Union[Callable, Iterable]]:
params_dict = {}
for parameter, description in parameters.items():
try:
distr = getattr(ss, description['distribution'])
except AttributeError:
distribution = description['args']
else:
distribution = distr(*description['args'])
params_dict[parameter] = distribution
return params_dict
cv = RandomizedSearchCV(RandomForestClassifier(), params_dist, cv=11)
cv.fit(X, y)
RandomizedSearchCV(cv=11, estimator=RandomForestClassifier(), param_distributions={'criterion': ['gini', 'entropy'], 'min_weight_fraction_leaf': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f4ffa5190d0>, 'n_estimators': <scipy.stats._distn_infrastructure.rv_frozen object at 0x7f4ffa58fd90>})
cv.best_params_
{'criterion': 'entropy', 'min_weight_fraction_leaf': 0.23251444102460192, 'n_estimators': 79}