#!/usr/bin/env python
# coding: utf-8
# # Handwriting OCR API example
# ### This Jupyter notebook shows you how to get started with the Cognitive Service Computer Vision Handwriting OCR API in Python, and how to visualize your results.
# To use this notebook, you will need to get keys to Computer Vision API. Visit https://www.microsoft.com/cognitive-services/en-us/computer-vision-api, and then the “Try for free” button. On the “Sign in” page, use your Microsoft account to sign in and you will be able to subscribe to Computer Vision API and get free keys (Code of Conduct and TOS). After completing the sign-up process, paste your key into the variables section below. (Either the primary or the secondary key works.)
# In[1]:
import time
import requests
import cv2
import operator
import numpy as np
# Import library to display results
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
get_ipython().run_line_magic('matplotlib', 'inline')
# Display images within Jupyter
# In[2]:
# Variables
_url = 'YOUR_ENDPOINT' # Here, paste your full endpoint from the Azure portal
_key = None # Here, paste your primary key
_maxNumRetries = 10
# # Helper functions
# In[3]:
def processRequest( json, data, headers, params ):
"""
Helper function to process the request to Project Oxford
Parameters:
json: Used when processing images from its URL. See API Documentation
data: Used when processing image read from disk. See API Documentation
headers: Used to pass the key information and the data type request
"""
retries = 0
result = None
while True:
response = requests.request( 'post', _url, json = json, data = data, headers = headers, params = params )
if response.status_code == 429:
print( "Message: %s" % ( response.json() ) )
if retries <= _maxNumRetries:
time.sleep(1)
retries += 1
continue
else:
print( 'Error: failed after retrying!' )
break
elif response.status_code == 202:
result = response.headers['Operation-Location']
else:
print( "Error code: %d" % ( response.status_code ) )
print( "Message: %s" % ( response.json() ) )
break
return result
# In[4]:
def getOCRTextResult( operationLocation, headers ):
"""
Helper function to get text result from operation location
Parameters:
operationLocation: operationLocation to get text result, See API Documentation
headers: Used to pass the key information
"""
retries = 0
result = None
while True:
response = requests.request('get', operationLocation, json=None, data=None, headers=headers, params=None)
if response.status_code == 429:
print("Message: %s" % (response.json()))
if retries <= _maxNumRetries:
time.sleep(1)
retries += 1
continue
else:
print('Error: failed after retrying!')
break
elif response.status_code == 200:
result = response.json()
else:
print("Error code: %d" % (response.status_code))
print("Message: %s" % (response.json()))
break
return result
# In[5]:
def showResultOnImage( result, img ):
"""Display the obtained results onto the input image"""
img = img[:, :, (2, 1, 0)]
fig, ax = plt.subplots(figsize=(12, 12))
ax.imshow(img, aspect='equal')
lines = result['recognitionResult']['lines']
for i in range(len(lines)):
words = lines[i]['words']
for j in range(len(words)):
tl = (words[j]['boundingBox'][0], words[j]['boundingBox'][1])
tr = (words[j]['boundingBox'][2], words[j]['boundingBox'][3])
br = (words[j]['boundingBox'][4], words[j]['boundingBox'][5])
bl = (words[j]['boundingBox'][6], words[j]['boundingBox'][7])
text = words[j]['text']
x = [tl[0], tr[0], tr[0], br[0], br[0], bl[0], bl[0], tl[0]]
y = [tl[1], tr[1], tr[1], br[1], br[1], bl[1], bl[1], tl[1]]
line = Line2D(x, y, linewidth=3.5, color='red')
ax.add_line(line)
ax.text(tl[0], tl[1] - 2, '{:s}'.format(text),
bbox=dict(facecolor='blue', alpha=0.5),
fontsize=14, color='white')
plt.axis('off')
plt.tight_layout()
plt.draw()
plt.show()
# # Analysis of an image stored on disk
# In[6]:
# Load raw image file into memory
pathToFileInDisk = r'D:\test.jpg'
with open(pathToFileInDisk, 'rb') as f:
data = f.read()
# Computer Vision parameters
params = {'mode' : 'Handwritten'}
headers = dict()
headers['Ocp-Apim-Subscription-Key'] = _key
headers['Content-Type'] = 'application/octet-stream'
json = None
operationLocation = processRequest(json, data, headers, params)
result = None
if (operationLocation != None):
headers = {}
headers['Ocp-Apim-Subscription-Key'] = _key
while True:
time.sleep(1)
result = getOCRTextResult(operationLocation, headers)
if result['status'] == 'Succeeded' or result['status'] == 'Failed':
break
# Load the original image, fetched from the URL
if result is not None and result['status'] == 'Succeeded':
data8uint = np.fromstring(data, np.uint8) # Convert string to an unsigned int array
img = cv2.cvtColor(cv2.imdecode(data8uint, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB)
showResultOnImage(result, img)
# # Analysis of an image retrieved via URL
# In[7]:
# URL direction to image
urlImage = 'https://portalstoragewuprod2.azureedge.net/vision/HandWritingOCR/2.jpg'
# Computer Vision parameters
params = { 'mode' : 'Handwritten'}
headers = dict()
headers['Ocp-Apim-Subscription-Key'] = _key
headers['Content-Type'] = 'application/json'
json = { 'url': urlImage }
data = None
result = None
operationLocation = processRequest(json, data, headers, params)
if (operationLocation != None):
headers = {}
headers['Ocp-Apim-Subscription-Key'] = _key
while True:
time.sleep(1)
result = getOCRTextResult(operationLocation, headers)
if result['status'] == 'Succeeded' or result['status'] == 'Failed':
break
if result is not None and result['status'] == 'Succeeded':
# Load the original image, fetched from the URL
arr = np.asarray( bytearray( requests.get( urlImage ).content ), dtype=np.uint8 )
img = cv2.cvtColor( cv2.imdecode( arr, -1 ), cv2.COLOR_BGR2RGB )
showResultOnImage( result, img )