screenshot-service/Docker/annotate_header.py

38 lines
1.1 KiB
Python

# test with:
# docker image build -t headless-fox Docker && docker run -v $PWD/static:/opt/static -ti headless-fox python annotate_header.py screenshot.png 1 "content-type" "etag"
import os
import sys
import pytesseract
import uuid
from pytesseract import Output
from PIL import Image, ImageDraw
print(sys.argv)
image = sys.argv[1]
index = sys.argv[2]
needles = sys.argv[3:]
# print(d)
with Image.open(image) as im:
x_offset = 2054
y_offset = 313
cropped = im.crop((x_offset, y_offset, 2875, 1558))
cropped_filename = "/opt/static/" + uuid.uuid4().hex + ".png"
cropped.save(cropped_filename)
d = pytesseract.image_to_data(cropped_filename, output_type=Output.DICT)
os.remove(cropped_filename);
draw = ImageDraw.Draw(im)
n_boxes = len(d['level'])
print(needles)
for i in range(n_boxes):
(x, y, w, h, text) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i], d['text'][i])
for needle in needles:
if needle.lower() in text.lower():
print("needle", needle, text, x, y)
draw.line((x_offset, y + y_offset, x_offset + 200, y + y_offset), fill=128)
im.save("/opt/static/output.png", "PNG")