Checkpoint - begin works on faster, more integrated image processing

2022-05-29 22:00:28 +02:00 · 2022-05-29 22:00:28 +02:00 · 4eb946fe88
commit 4eb946fe88
parent 8fd169bc6c
4 changed files with 41 additions and 5 deletions
--- a/Docker/Dockerfile
+++ b/Docker/Dockerfile
@ -37,15 +37,14 @@ RUN pip install -U --no-cache-dir pytesseract
 RUN apk del .dev-deps
 RUN rm -f /usr/local/lib/*.a
 RUN rm -rf /tmp/* /var/cache/apk/*
-RUN apk add jq
-RUN apk add sed
-RUN apk add nodejs
+RUN apk add jq sed nodejs
 COPY ./mozilla /root/.mozilla
 RUN echo https://dl-cdn.alpinelinux.org/alpine/edge/testing >> /etc/apk/repositories
 RUN apk update
-RUN apk add wmctrl
-RUN apk add git make gcc musl-dev libx11-dev
+RUN apk add wmctrl git make gcc musl-dev libx11-dev zlib zlib-dev jpeg-dev
 RUN git clone https://github.com/muquit/grabc && cd grabc && make && make install
+RUN apk add clang
+RUN python3 -m pip install --upgrade Pillow
 COPY . /opt

 WORKDIR /opt
--- a/Docker/annotate_header.py
+++ b/Docker/annotate_header.py
@ -0,0 +1,37 @@
+# test with:
+
+# docker image build -t headless-fox Docker && docker run -v $PWD/static:/opt/static -ti headless-fox python annotate_header.py screenshot.png 1 "content-type" "etag"
+
+import os
+import sys
+import pytesseract
+import uuid
+from pytesseract import Output
+from PIL import Image, ImageDraw
+
+print(sys.argv)
+
+image = sys.argv[1]
+index = sys.argv[2]
+needles = sys.argv[3:]
+
+# print(d)
+
+with Image.open(image) as im:
+  x_offset = 2054
+  y_offset = 313
+  cropped = im.crop((x_offset, y_offset, 2875, 1558))
+  cropped_filename = "/opt/static/" + uuid.uuid4().hex + ".png"
+  cropped.save(cropped_filename)
+  d = pytesseract.image_to_data(cropped_filename, output_type=Output.DICT)
+  os.remove(cropped_filename);
+  draw = ImageDraw.Draw(im)
+  n_boxes = len(d['level'])
+  print(needles)
+  for i in range(n_boxes):
+      (x, y, w, h, text) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i], d['text'][i])
+      for needle in needles:
+        if needle.lower() in text.lower():
+          print("needle", needle, text, x, y)
+          draw.line((x_offset, y + y_offset, x_offset + 200, y + y_offset), fill=128)
+  im.save("/opt/static/output.png", "PNG")
--- a/Docker/cropped.png
+++ b/Docker/cropped.png
--- a/Docker/screenshot.png
+++ b/Docker/screenshot.png