import json
import math, torch
from annoy import AnnoyIndex
import face_recognition
import numpy as np
from PIL import Image
import cv2
import imutils
celeb_mapping_filepath = "celeb_detector/models/celeb_mapping_117_18012022.json"
celeb_index_annpath = "celeb_detector/models/celeb_index_117_18012022.ann"
vggface_modelpath = "celeb_detector/models/vggface_resnet50.pt"
encoder_model = torch.load(vggface_modelpath)
ann_index = AnnoyIndex(2048, 'angular')
_ = ann_index.load(celeb_index_annpath)
with open(celeb_mapping_filepath) as json_file:
celeb_mapping_1_temp = json.load(json_file)
celeb_mapping_1 = {}
for key, value_list in celeb_mapping_1_temp.items():
for each_id in value_list:
celeb_mapping_1[each_id] = str(key)
def get_celeb_name_from_id(result_list, dist_threshold=0.9):
id_list = result_list[0]
dist_list = result_list[1]
counts = dict()
for each_id, each_dist in zip(id_list, dist_list):
if each_dist < dist_threshold:
output = celeb_mapping_1.get(each_id)
counts[output] = counts.get(output, 0) + 1
return counts
def face_distance_to_conf(face_distance, face_match_threshold=0.34):
if face_distance > face_match_threshold:
range = (1.0 - face_match_threshold)
linear_val = (1.0 - face_distance) / (range * 2.0)
return linear_val
else:
range = face_match_threshold
linear_val = 1.0 - (face_distance / (range * 2.0))
return linear_val + ((1.0 - linear_val) * math.pow((linear_val - 0.5) * 2, 0.2))
def get_encoding(img):
img = imutils.resize(img, height=1080)
results = face_recognition.face_locations(img)
if len(results)>0:
encodings = []
bbox = []
for result in results:
y1, x2, y2, x1 = result
if x1 <0:
x1 = 0
if y1 <0:
y1 = 0
face = img[y1:y2, x1:x2]
image = Image.fromarray(face)
image = image.resize((224,224))
face_array = np.asarray(image)
samples = np.asarray(face_array, 'float32')
samples = np.expand_dims(samples, axis=0)
encoding = encoder_model(torch.Tensor(samples))
encodings.append(encoding)
bbox.append((x1, y1, x2-x1, y2-y1))
return encodings, bbox
else:
return None, None
def get_celeb_prediction(img):
encs, bbox = get_encoding(img)
data = []
for index, enc in enumerate(encs):
cv2.rectangle(img, bbox[index], (255,0,0), 2)
temp_data = {}
temp_data["bbox"] = bbox[index]
results = ann_index.get_nns_by_vector(enc[0], 10, search_k=-1, include_distances=True)
dist_threshold = 0.9
celeb_count_dict = get_celeb_name_from_id(results, dist_threshold)
distance = results[1][0]
if len(celeb_count_dict)!=0 and max(celeb_count_dict.values()) > 3:
celeb_name = max(celeb_count_dict, key=celeb_count_dict.get)
cv2.putText(img, celeb_name.upper(), (bbox[index][0]-5, bbox[index][1] - 5), cv2.FONT_HERSHEY_DUPLEX, 1, (0,0,255), 1)
temp_data["celeb_name"] = celeb_name
temp_data["confidence"] = round(face_distance_to_conf(distance),2)
else:
temp_data["celeb_name"] = "unknown"
temp_data["confidence"] = 0.0
data.append(temp_data)
return data
image_path = "/Users/shobhit2.gupta/Downloads/test_images/alia6.jpg"
output = get_celeb_prediction(cv2.imread(image_path))
output
[{'bbox': (110, 233, 554, 555), 'celeb_name': 'Alia Bhatt', 'confidence': 0.23}]