Checkpoint - begin works on faster, more integrated image processing
This commit is contained in:
parent
8fd169bc6c
commit
4eb946fe88
|
@ -37,15 +37,14 @@ RUN pip install -U --no-cache-dir pytesseract
|
||||||
RUN apk del .dev-deps
|
RUN apk del .dev-deps
|
||||||
RUN rm -f /usr/local/lib/*.a
|
RUN rm -f /usr/local/lib/*.a
|
||||||
RUN rm -rf /tmp/* /var/cache/apk/*
|
RUN rm -rf /tmp/* /var/cache/apk/*
|
||||||
RUN apk add jq
|
RUN apk add jq sed nodejs
|
||||||
RUN apk add sed
|
|
||||||
RUN apk add nodejs
|
|
||||||
COPY ./mozilla /root/.mozilla
|
COPY ./mozilla /root/.mozilla
|
||||||
RUN echo https://dl-cdn.alpinelinux.org/alpine/edge/testing >> /etc/apk/repositories
|
RUN echo https://dl-cdn.alpinelinux.org/alpine/edge/testing >> /etc/apk/repositories
|
||||||
RUN apk update
|
RUN apk update
|
||||||
RUN apk add wmctrl
|
RUN apk add wmctrl git make gcc musl-dev libx11-dev zlib zlib-dev jpeg-dev
|
||||||
RUN apk add git make gcc musl-dev libx11-dev
|
|
||||||
RUN git clone https://github.com/muquit/grabc && cd grabc && make && make install
|
RUN git clone https://github.com/muquit/grabc && cd grabc && make && make install
|
||||||
|
RUN apk add clang
|
||||||
|
RUN python3 -m pip install --upgrade Pillow
|
||||||
COPY . /opt
|
COPY . /opt
|
||||||
|
|
||||||
WORKDIR /opt
|
WORKDIR /opt
|
||||||
|
|
37
Docker/annotate_header.py
Normal file
37
Docker/annotate_header.py
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
# test with:
|
||||||
|
|
||||||
|
# docker image build -t headless-fox Docker && docker run -v $PWD/static:/opt/static -ti headless-fox python annotate_header.py screenshot.png 1 "content-type" "etag"
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import pytesseract
|
||||||
|
import uuid
|
||||||
|
from pytesseract import Output
|
||||||
|
from PIL import Image, ImageDraw
|
||||||
|
|
||||||
|
print(sys.argv)
|
||||||
|
|
||||||
|
image = sys.argv[1]
|
||||||
|
index = sys.argv[2]
|
||||||
|
needles = sys.argv[3:]
|
||||||
|
|
||||||
|
# print(d)
|
||||||
|
|
||||||
|
with Image.open(image) as im:
|
||||||
|
x_offset = 2054
|
||||||
|
y_offset = 313
|
||||||
|
cropped = im.crop((x_offset, y_offset, 2875, 1558))
|
||||||
|
cropped_filename = "/opt/static/" + uuid.uuid4().hex + ".png"
|
||||||
|
cropped.save(cropped_filename)
|
||||||
|
d = pytesseract.image_to_data(cropped_filename, output_type=Output.DICT)
|
||||||
|
os.remove(cropped_filename);
|
||||||
|
draw = ImageDraw.Draw(im)
|
||||||
|
n_boxes = len(d['level'])
|
||||||
|
print(needles)
|
||||||
|
for i in range(n_boxes):
|
||||||
|
(x, y, w, h, text) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i], d['text'][i])
|
||||||
|
for needle in needles:
|
||||||
|
if needle.lower() in text.lower():
|
||||||
|
print("needle", needle, text, x, y)
|
||||||
|
draw.line((x_offset, y + y_offset, x_offset + 200, y + y_offset), fill=128)
|
||||||
|
im.save("/opt/static/output.png", "PNG")
|
BIN
Docker/cropped.png
Normal file
BIN
Docker/cropped.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 142 KiB |
BIN
Docker/screenshot.png
Normal file
BIN
Docker/screenshot.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.4 MiB |
Loading…
Reference in New Issue
Block a user