#!/usr/bin/python import sys import pytesseract from pytesseract import Output # from PIL import Image img = sys.argv[1] d = pytesseract.image_to_data(img, output_type=Output.DICT) # print(d) n_boxes = len(d['level']) for i in range(n_boxes): (x, y, w, h, text) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i], d['text'][i]) if text=="": continue print(x, y, w, h, text)