This is based on this question from Stackoverflow.
It doesn't make any real-world sense, but it's easy introductory example, and more complex issues can be explained using this problem.
The problem is to classify whether some numbers (from fixed range) are divisible by some fixed number M.
The key idea is to not just encode numbers as ints, but use their binary representation.
import numpy as np
from sklearn.model_selection import train_test_split
N = 1024
logN = int(np.log2(N))
M = 10
X_bin = [list(np.binary_repr(x, width=logN)) for x in range(N)]
X = np.array(X_bin)
y = np.arange(N) % M == 0
X_train, X_test, y_train, y_test = train_test_split(X_bin, y, test_size=0.125, stratify=y, random_state=0)
from sklearn.svm import SVC
svc = SVC(kernel='linear', C=1)
svc.fit(X_train, y_train)
svc.score(X_test, y_test)
0.8984375
from sklearn.tree import DecisionTreeClassifier
tree = DecisionTreeClassifier(max_features='log2', max_depth=2)
tree.fit(X_train, np.vstack([y_train, y_train]).T)
tree.score(X_test, y_test)
--------------------------------------------------------------------------- ValueError Traceback (most recent call last) <ipython-input-10-b84c593cddc7> in <module>() 4 tree.fit(X_train, np.vstack([y_train, y_train]).T) 5 ----> 6 tree.score(X_test, y_test) /opt/anaconda3/lib/python3.5/site-packages/sklearn/base.py in score(self, X, y, sample_weight) 347 """ 348 from .metrics import accuracy_score --> 349 return accuracy_score(y, self.predict(X), sample_weight=sample_weight) 350 351 /opt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py in accuracy_score(y_true, y_pred, normalize, sample_weight) 170 171 # Compute accuracy for each possible representation --> 172 y_type, y_true, y_pred = _check_targets(y_true, y_pred) 173 if y_type.startswith('multilabel'): 174 differing_labels = count_nonzero(y_true - y_pred, axis=1) /opt/anaconda3/lib/python3.5/site-packages/sklearn/metrics/classification.py in _check_targets(y_true, y_pred) 80 if len(y_type) > 1: 81 raise ValueError("Can't handle mix of {0} and {1}" ---> 82 "".format(type_true, type_pred)) 83 84 # We can't have more than one value on y_type => The set is no more needed ValueError: Can't handle mix of binary and multilabel-indicator
np.vstack([y_train, y_train]).shape
(2, 896)