Notebook

In [1]:

import json
import math, torch
from annoy import AnnoyIndex
import face_recognition
import numpy as np
from PIL import Image
import cv2
import imutils

In [2]:

celeb_mapping_filepath = "celeb_detector/models/celeb_mapping_117_18012022.json"
celeb_index_annpath = "celeb_detector/models/celeb_index_117_18012022.ann"
vggface_modelpath = "celeb_detector/models/vggface_resnet50.pt"

In [3]:

encoder_model = torch.load(vggface_modelpath)
ann_index = AnnoyIndex(2048, 'angular')
_ = ann_index.load(celeb_index_annpath)

In [4]:

with open(celeb_mapping_filepath) as json_file:
    celeb_mapping_1_temp = json.load(json_file)
celeb_mapping_1 = {}
for key, value_list in celeb_mapping_1_temp.items():
    for each_id in value_list:
        celeb_mapping_1[each_id] = str(key)

In [5]:

def get_celeb_name_from_id(result_list, dist_threshold=0.9):
    id_list = result_list[0]
    dist_list = result_list[1]
    counts = dict()
    for each_id, each_dist in zip(id_list, dist_list):
        if each_dist < dist_threshold:
            output = celeb_mapping_1.get(each_id)
            counts[output] = counts.get(output, 0) + 1
    return counts

In [6]:

def face_distance_to_conf(face_distance, face_match_threshold=0.34):
	if face_distance > face_match_threshold:
		range = (1.0 - face_match_threshold)
		linear_val = (1.0 - face_distance) / (range * 2.0)
		return linear_val
	else:
		range = face_match_threshold
		linear_val = 1.0 - (face_distance / (range * 2.0))
		return linear_val + ((1.0 - linear_val) * math.pow((linear_val - 0.5) * 2, 0.2))

In [7]:

def get_encoding(img):
	img = imutils.resize(img, height=1080)
	results = face_recognition.face_locations(img)
	if len(results)>0:
		encodings = []
		bbox = []
		for result in results:
			y1, x2, y2, x1 = result
			if x1 <0:
				x1 = 0
			if y1 <0:
				y1 = 0
			face = img[y1:y2, x1:x2]
			image = Image.fromarray(face)
			image = image.resize((224,224))
			face_array = np.asarray(image)

			samples = np.asarray(face_array, 'float32')
			samples = np.expand_dims(samples, axis=0)
			encoding = encoder_model(torch.Tensor(samples))
			encodings.append(encoding)
			bbox.append((x1, y1, x2-x1, y2-y1))
		return encodings, bbox
	else:
		return None, None

In [8]:

def get_celeb_prediction(img):
  encs, bbox = get_encoding(img)
  data = []
  for index, enc in enumerate(encs):
    cv2.rectangle(img, bbox[index], (255,0,0), 2)
    temp_data = {}
    temp_data["bbox"] = bbox[index]
    results = ann_index.get_nns_by_vector(enc[0], 10, search_k=-1, include_distances=True)
    dist_threshold = 0.9
    celeb_count_dict = get_celeb_name_from_id(results, dist_threshold)
    distance = results[1][0]
    if len(celeb_count_dict)!=0 and max(celeb_count_dict.values()) > 3:
        celeb_name = max(celeb_count_dict, key=celeb_count_dict.get)
        cv2.putText(img, celeb_name.upper(), (bbox[index][0]-5, bbox[index][1] - 5), cv2.FONT_HERSHEY_DUPLEX, 1, (0,0,255), 1)
        temp_data["celeb_name"] = celeb_name
        temp_data["confidence"] = round(face_distance_to_conf(distance),2)
    else:
        temp_data["celeb_name"] = "unknown"
        temp_data["confidence"] = 0.0
    data.append(temp_data)
  return data

In [9]:

image_path = "/Users/shobhit2.gupta/Downloads/test_images/alia6.jpg"
output = get_celeb_prediction(cv2.imread(image_path))
output

Out[9]:

[{'bbox': (110, 233, 554, 555),
  'celeb_name': 'Alia Bhatt',
  'confidence': 0.23}]

In [ ]: