import numpy as np
from sklearn.datasets import load_iris
from sklearn.metrics.pairwise import cosine_similarity as skcosine_similarity
def cosine_similarity(X, Y=None):
X_normalized = X / np.sqrt(np.sum(np.square(X), axis=1))[:, np.newaxis]
if Y is None:
Y_normalized = X_normalized
else:
Y_normalized = Y / np.sqrt(np.sum(np.square(Y), axis=1))[:, np.newaxis]
return np.dot(X_normalized, Y_normalized.T)
X, _ = load_iris(return_X_y=True)
ans1 = cosine_similarity(X)
ans2 = skcosine_similarity(X)
assert np.allclose(ans1, ans2)
ans1 = cosine_similarity(X[:100], X[100:])
ans2 = skcosine_similarity(X[:100], X[100:])
assert np.allclose(ans1, ans2)