import numpy as np
data = np.array([2, 3, 4,2,3,2,2,2,3,486])
m , s = data.mean(), data.std()
m , s
(50.9, 145.03478893010464)
def anomalyDetector(data, test_point):
m , s = data.mean(), data.std()
return np.abs(test_point - m) > 3 * s
anomalyDetector(data, test_point = 486)
False
50.9 + 3 * 145
485.9
import pandas as pd
df = pd.DataFrame(data)
df
0 | |
---|---|
0 | 2 |
1 | 3 |
2 | 4 |
3 | 2 |
4 | 3 |
5 | 2 |
6 | 2 |
7 | 2 |
8 | 3 |
9 | 486 |
df.describe()
0 | |
---|---|
count | 10.000000 |
mean | 50.900000 |
std | 152.880091 |
min | 2.000000 |
25% | 2.000000 |
50% | 2.500000 |
75% | 3.000000 |
max | 486.000000 |
qmin, qmax = float(df.quantile(.05)), float(df.quantile(.95))
qmin, qmax
(2.0, 269.0999999999995)
data[(data >= qmin) & (data <= qmax)]
array([2, 3, 4, 2, 3, 2, 2, 2, 3])
def anomalyDetector(data, test_point):
# Remove the max %5 of data points
# Remove the min %5 of data points
df = pd.DataFrame(data)
qmin, qmax = float(df.quantile(.05)), float(df.quantile(.95))
data = data[(data >= qmin) & (data <= qmax)]
m , s = data.mean(), data.std()
return np.abs(test_point - m) > 3 * s
data
array([ 2, 3, 4, 2, 3, 2, 2, 2, 3, 486])
anomalyDetector(data, test_point = 486)
True
df
0 | |
---|---|
0 | 2 |
1 | 3 |
2 | 4 |
3 | 2 |
4 | 3 |
5 | 2 |
6 | 2 |
7 | 2 |
8 | 3 |
9 | 486 |
def anomalyDetector(data, test_point = None):
df = pd.DataFrame(data)
qmin, qmax = float(df.quantile(.05)), float(df.quantile(.95))
# Remove the max %5 and min %5 of data points
data = data[(data >= qmin) & (data <= qmax)]
m , s = data.mean(), data.std()
if test_point:
return np.abs(test_point - m) > 3 * s
else:
anomalies = df.apply(lambda x: np.abs(x - m) > 3 * s)
idx = anomalies.values.reshape(-1)
return idx
idx = anomalyDetector(data)
idx
array([False, False, False, False, False, False, False, False, False, True])
data[idx]
array([486])
anomalyDetector(data, test_point = 5)
True