#!/usr/bin/env python
# coding: utf-8
# Start by creating a new `conda` environment:
#
# ```bash
# $ conda create -n pyannote python=3.6 anaconda
# $ source activate pyannote
# ```
#
# Then, install `pyannote-video` and its dependencies:
#
# ```bash
# $ pip install pyannote-video
# ```
#
# Finally, download sample video and `dlib` models:
#
# ```bash
# $ git clone https://github.com/pyannote/pyannote-data.git
# $ git clone https://github.com/davisking/dlib-models.git
# $ bunzip2 dlib-models/dlib_face_recognition_resnet_model_v1.dat.bz2
# $ bunzip2 dlib-models/shape_predictor_68_face_landmarks.dat.bz2
# ```
#
# To execute this notebook locally:
# ```bash
# $ git clone https://github.com/pyannote/pyannote-video.git
# $ jupyter notebook --notebook-dir="pyannote-video/doc"
# ```
#
# In[4]:
get_ipython().run_line_magic('pylab', 'inline')
# # Shot segmentation
# In[5]:
get_ipython().system('pyannote-structure.py --help')
# In[7]:
get_ipython().system('pyannote-structure.py shot --verbose ../../pyannote-data/TheBigBangTheory.mkv ../../pyannote-data/TheBigBangTheory.shots.json')
# Detected shot boundaries can be visualized using `pyannote.core` notebook support:
# In[8]:
from pyannote.core.json import load_from
shots = load_from('../../pyannote-data/TheBigBangTheory.shots.json')
shots
# # Face processing
# In[9]:
get_ipython().system('pyannote-face.py --help')
# ### Face tracking
# In[10]:
get_ipython().system('pyannote-face.py track --verbose --every=0.5 ../../pyannote-data/TheBigBangTheory.mkv ../../pyannote-data/TheBigBangTheory.shots.json ../../pyannote-data/TheBigBangTheory.track.txt')
# Face tracks can be visualized using `demo` mode:
# In[12]:
get_ipython().system('pyannote-face.py demo ../../pyannote-data/TheBigBangTheory.mkv ../../pyannote-data/TheBigBangTheory.track.txt ../../pyannote-data/TheBigBangTheory.track.mp4')
# In[14]:
import io
import base64
from IPython.display import HTML
video = io.open('../../pyannote-data/TheBigBangTheory.track.mp4', 'r+b').read()
encoded = base64.b64encode(video)
HTML(data=''''''.format(encoded.decode('ascii')))
# ### Facial landmarks and face embedding
# In[15]:
get_ipython().system('pyannote-face.py extract --verbose ../../pyannote-data/TheBigBangTheory.mkv ../../pyannote-data/TheBigBangTheory.track.txt ../../dlib-models/shape_predictor_68_face_landmarks.dat ../../dlib-models/dlib_face_recognition_resnet_model_v1.dat ../../pyannote-data/TheBigBangTheory.landmarks.txt ../../pyannote-data/TheBigBangTheory.embedding.txt')
# ### Face clustering
# Once embeddings are extracted, let's apply face track hierarchical agglomerative clustering.
# The distance between two clusters is defined as the average euclidean distance between all embeddings.
# In[16]:
from pyannote.video.face.clustering import FaceClustering
clustering = FaceClustering(threshold=0.6)
# In[17]:
face_tracks, embeddings = clustering.model.preprocess('../../pyannote-data/TheBigBangTheory.embedding.txt')
face_tracks.get_timeline()
# In[18]:
result = clustering(face_tracks, features=embeddings)
# In[19]:
from pyannote.core import notebook, Segment
notebook.reset()
notebook.crop = Segment(0, 30)
mapping = {9: 'Leonard', 6: 'Sheldon', 14: 'Receptionist', 5: 'False_alarm'}
result = result.rename_labels(mapping=mapping)
result
# In[21]:
with open('../../pyannote-data/TheBigBangTheory.labels.txt', 'w') as fp:
for _, track_id, cluster in result.itertracks(yield_label=True):
fp.write(f'{track_id} {cluster}\n')
# In[23]:
get_ipython().system('pyannote-face.py demo ../../pyannote-data/TheBigBangTheory.mkv ../../pyannote-data/TheBigBangTheory.track.txt --label=../../pyannote-data/TheBigBangTheory.labels.txt ../../pyannote-data/TheBigBangTheory.final.mp4')
# In[25]:
import io
import base64
from IPython.display import HTML
video = io.open('../../pyannote-data/TheBigBangTheory.final.mp4', 'r+b').read()
encoded = base64.b64encode(video)
HTML(data=''''''.format(encoded.decode('ascii')))