#!/usr/bin/env python # coding: utf-8 # # Handwriting OCR API example # ### This Jupyter notebook shows you how to get started with the Cognitive Service Computer Vision Handwriting OCR API in Python, and how to visualize your results. # To use this notebook, you will need to get keys to Computer Vision API. Visit https://www.microsoft.com/cognitive-services/en-us/computer-vision-api, and then the “Try for free” button. On the “Sign in” page, use your Microsoft account to sign in and you will be able to subscribe to Computer Vision API and get free keys (Code of Conduct and TOS). After completing the sign-up process, paste your key into the variables section below. (Either the primary or the secondary key works.) # In[1]: import time import requests import cv2 import operator import numpy as np # Import library to display results import matplotlib.pyplot as plt from matplotlib.lines import Line2D get_ipython().run_line_magic('matplotlib', 'inline') # Display images within Jupyter # In[2]: # Variables _url = 'YOUR_ENDPOINT' # Here, paste your full endpoint from the Azure portal _key = None # Here, paste your primary key _maxNumRetries = 10 # # Helper functions # In[3]: def processRequest( json, data, headers, params ): """ Helper function to process the request to Project Oxford Parameters: json: Used when processing images from its URL. See API Documentation data: Used when processing image read from disk. See API Documentation headers: Used to pass the key information and the data type request """ retries = 0 result = None while True: response = requests.request( 'post', _url, json = json, data = data, headers = headers, params = params ) if response.status_code == 429: print( "Message: %s" % ( response.json() ) ) if retries <= _maxNumRetries: time.sleep(1) retries += 1 continue else: print( 'Error: failed after retrying!' ) break elif response.status_code == 202: result = response.headers['Operation-Location'] else: print( "Error code: %d" % ( response.status_code ) ) print( "Message: %s" % ( response.json() ) ) break return result # In[4]: def getOCRTextResult( operationLocation, headers ): """ Helper function to get text result from operation location Parameters: operationLocation: operationLocation to get text result, See API Documentation headers: Used to pass the key information """ retries = 0 result = None while True: response = requests.request('get', operationLocation, json=None, data=None, headers=headers, params=None) if response.status_code == 429: print("Message: %s" % (response.json())) if retries <= _maxNumRetries: time.sleep(1) retries += 1 continue else: print('Error: failed after retrying!') break elif response.status_code == 200: result = response.json() else: print("Error code: %d" % (response.status_code)) print("Message: %s" % (response.json())) break return result # In[5]: def showResultOnImage( result, img ): """Display the obtained results onto the input image""" img = img[:, :, (2, 1, 0)] fig, ax = plt.subplots(figsize=(12, 12)) ax.imshow(img, aspect='equal') lines = result['recognitionResult']['lines'] for i in range(len(lines)): words = lines[i]['words'] for j in range(len(words)): tl = (words[j]['boundingBox'][0], words[j]['boundingBox'][1]) tr = (words[j]['boundingBox'][2], words[j]['boundingBox'][3]) br = (words[j]['boundingBox'][4], words[j]['boundingBox'][5]) bl = (words[j]['boundingBox'][6], words[j]['boundingBox'][7]) text = words[j]['text'] x = [tl[0], tr[0], tr[0], br[0], br[0], bl[0], bl[0], tl[0]] y = [tl[1], tr[1], tr[1], br[1], br[1], bl[1], bl[1], tl[1]] line = Line2D(x, y, linewidth=3.5, color='red') ax.add_line(line) ax.text(tl[0], tl[1] - 2, '{:s}'.format(text), bbox=dict(facecolor='blue', alpha=0.5), fontsize=14, color='white') plt.axis('off') plt.tight_layout() plt.draw() plt.show() # # Analysis of an image stored on disk # In[6]: # Load raw image file into memory pathToFileInDisk = r'D:\test.jpg' with open(pathToFileInDisk, 'rb') as f: data = f.read() # Computer Vision parameters params = {'mode' : 'Handwritten'} headers = dict() headers['Ocp-Apim-Subscription-Key'] = _key headers['Content-Type'] = 'application/octet-stream' json = None operationLocation = processRequest(json, data, headers, params) result = None if (operationLocation != None): headers = {} headers['Ocp-Apim-Subscription-Key'] = _key while True: time.sleep(1) result = getOCRTextResult(operationLocation, headers) if result['status'] == 'Succeeded' or result['status'] == 'Failed': break # Load the original image, fetched from the URL if result is not None and result['status'] == 'Succeeded': data8uint = np.fromstring(data, np.uint8) # Convert string to an unsigned int array img = cv2.cvtColor(cv2.imdecode(data8uint, cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB) showResultOnImage(result, img) # # Analysis of an image retrieved via URL # In[7]: # URL direction to image urlImage = 'https://portalstoragewuprod2.azureedge.net/vision/HandWritingOCR/2.jpg' # Computer Vision parameters params = { 'mode' : 'Handwritten'} headers = dict() headers['Ocp-Apim-Subscription-Key'] = _key headers['Content-Type'] = 'application/json' json = { 'url': urlImage } data = None result = None operationLocation = processRequest(json, data, headers, params) if (operationLocation != None): headers = {} headers['Ocp-Apim-Subscription-Key'] = _key while True: time.sleep(1) result = getOCRTextResult(operationLocation, headers) if result['status'] == 'Succeeded' or result['status'] == 'Failed': break if result is not None and result['status'] == 'Succeeded': # Load the original image, fetched from the URL arr = np.asarray( bytearray( requests.get( urlImage ).content ), dtype=np.uint8 ) img = cv2.cvtColor( cv2.imdecode( arr, -1 ), cv2.COLOR_BGR2RGB ) showResultOnImage( result, img )