Faster image annotation

2022-06-15 20:27:15 +02:00 · 2022-06-15 20:27:15 +02:00 · 767ee58a0b
commit 767ee58a0b
parent 4eb946fe88
7 changed files with 83 additions and 34 deletions
--- a/Docker/Dockerfile
+++ b/Docker/Dockerfile
@ -44,6 +44,7 @@ RUN apk update
 RUN apk add wmctrl git make gcc musl-dev libx11-dev zlib zlib-dev jpeg-dev
 RUN git clone https://github.com/muquit/grabc && cd grabc && make && make install
 RUN apk add clang
+RUN apk add freetype-dev
 RUN python3 -m pip install --upgrade Pillow
 COPY . /opt

--- a/Docker/annotate_header.py
+++ b/Docker/annotate_header.py
@ -1,37 +1,81 @@
 # test with:

-# docker image build -t headless-fox Docker && docker run -v $PWD/static:/opt/static -ti headless-fox python annotate_header.py screenshot.png 1 "content-type" "etag"
+# docker image build -t headless-fox Docker && docker run -v $PWD/static:/opt/static -ti headless-fox time python annotate_header.py screenshot.png "/opt/static/output.png" "content-type" "Tutaj jest content-type" "etag" "Tutaj jest etag z długim opisem co ma wiele słów i wychodzi poza network inspector"

 import os
 import sys
 import pytesseract
 import uuid
 from pytesseract import Output
-from PIL import Image, ImageDraw
+from PIL import Image, ImageDraw, ImageFont

-print(sys.argv)
+output_file_relative = sys.argv[
+    1
+]  # this is also the existing source screenshot to annotate. It will be updated in-place
+
+output_file = "/opt/static/" + output_file_relative
+needles = sys.argv[2:]
+
+base_url = os.getenv("BASE_URL")
+
+# generator
+def partition(lst, size):
+    for i in range(0, len(lst), size):
+        yield lst[i : i + size]

-image = sys.argv[1]
-index = sys.argv[2]
-needles = sys.argv[3:]

 # print(d)

-with Image.open(image) as im:
-  x_offset = 2054
-  y_offset = 313
-  cropped = im.crop((x_offset, y_offset, 2875, 1558))
-  cropped_filename = "/opt/static/" + uuid.uuid4().hex + ".png"
-  cropped.save(cropped_filename)
-  d = pytesseract.image_to_data(cropped_filename, output_type=Output.DICT)
-  os.remove(cropped_filename);
-  draw = ImageDraw.Draw(im)
-  n_boxes = len(d['level'])
-  print(needles)
-  for i in range(n_boxes):
-      (x, y, w, h, text) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i], d['text'][i])
-      for needle in needles:
-        if needle.lower() in text.lower():
-          print("needle", needle, text, x, y)
-          draw.line((x_offset, y + y_offset, x_offset + 200, y + y_offset), fill=128)
-  im.save("/opt/static/output.png", "PNG")
+with Image.open(output_file) as im:
+    x_offset = 2054
+    y_offset = 313
+    cropped = im.crop((x_offset, y_offset, 2875, 1558))
+    cropped_filename = "/opt/static/" + uuid.uuid4().hex + ".png"
+    cropped.save(cropped_filename)
+    d = pytesseract.image_to_data(cropped_filename, output_type=Output.DICT)
+    os.remove(cropped_filename)
+    draw = ImageDraw.Draw(im)
+    n_boxes = len(d["level"])
+    print(needles)
+    font = ImageFont.truetype("/usr/share/fonts/noto/NotoSansDisplay-Medium.ttf", 48)
+    for i in range(n_boxes):
+        (x, y, w, h, text) = (
+            d["left"][i],
+            d["top"][i],
+            d["width"][i],
+            d["height"][i],
+            d["text"][i],
+        )
+        for [needle, comment] in partition(needles, 2):
+            if needle.lower() in text.lower():
+                print("needle", needle, text, x, y)
+                # modify y so it's aligned not with the top of the text, but with the midline
+                y = y + h / 2
+                radius = 30
+                # offset both y and x
+                y = y + y_offset
+                x = x + x_offset
+                fill = "red"
+                line_length = 200
+                draw.regular_polygon(
+                    ((x - radius - 5, y), radius), n_sides=3, rotation=270, fill=fill
+                )
+                draw.line((x - radius - 5, y, x - line_length, y), fill=fill, width=10)
+                text_w, text_h = draw.textsize(comment, font)
+                text_padding = 10
+                draw.rectangle(
+                    [
+                        (x - line_length - text_w - text_padding, y - text_h / 2),
+                        (x - line_length + text_padding, y + text_h / 2),
+                    ],
+                    fill="white",
+                )
+                draw.text(
+                    (x - line_length - 10, y),
+                    comment,
+                    fill=fill,
+                    anchor="rm",
+                    font=font,
+                )
+    im.save(output_file, "PNG")
+    print('{"new_file": "' + base_url + "/" + output_file_relative + "\"}")
--- a/Docker/script3.sh
+++ b/Docker/script3.sh
@ -53,16 +53,15 @@ while IFS= read -r DOMAIN; do
    fi
    echo "{\"current_action\": \"scanning for requests from $DOMAIN...\"}"
    network_inspector_search "domain:$DOMAIN" # can filter with more granularity: https://developer.mozilla.org/en-US/docs/Tools/Network_Monitor/request_list#filtering_by_properties
-    grab ni_search
-
-    scrot
+    # grab ni_search

    count=0
    while network_inspector_has_more_entries
    do
-      filename="/opt/static/$ID/${index}.png"
-      scrot "$filename"
-      annotate_header "$filename" "$DOMAIN" "$count" \
+      filename="$ID/${index}.png"
+      scrot "/opt/static/$filename"
+      grab "searching $DOMAIN"
+      BASE_URL="$BASE_URL" python annotate_header.py "$filename" \
        "set-cookie" "identyfikator internetowy z cookie" \
        "Cookie" "identyfikator internetowy z cookie" \
        "Referer" "Część mojej historii przeglądania" &
--- a/Docker/utils.sh
+++ b/Docker/utils.sh
@ -55,7 +55,7 @@ grab(){
  NOTE="$grab_no $@"
  #echo $@
  ((grab_no++))
-  scrot --note "-f '/usr/share/fonts/noto/NotoSansMono-Medium.ttf/40' -c 255,0,0,255 -t '$NOTE'" "/opt/static/$(date -Iminutes).png"
+  # scrot --note "-f '/usr/share/fonts/noto/NotoSansMono-Medium.ttf/40' -c 255,0,0,255 -t '$NOTE'" "/opt/static/$(date -Iminutes).png"
  #echo "not grabbing"
 }

@ -174,7 +174,7 @@ network_inspector_search(){
  keycombo Control_L a
  sleep 0.1
  xdotool type "$query" 
-  xdotool key Escape
+  #xdotool key Escape
  sleep 0.3
  seq 28 | xargs -I {} xdotool key Tab
  xdotool key Down
@ -184,6 +184,7 @@ network_inspector_search(){
 }

 network_inspector_has_more_entries(){
+  grab network_inspector_has_more_entries
  [ $(get_pixel_color 1267 1572) = "#f9f9fa" ]
 }

--- a/README.md
+++ b/README.md
@ -21,9 +21,11 @@ BASE_URL=http://localhost:3000 docker run -i -v $PWD/static:/opt/static  headles
 ## Running the server

 ```
-BASE_URL=http://localhost:3000 node .
+npm start
 ```

+Visit localhost:3000 to see the test form
+
 To set up a systemd daemon that starts with the server, use:

 ```
--- a/index.js
+++ b/index.js
@ -163,3 +163,4 @@ router.get("/api/requests/:id", async (ctx) => {

 app.use(router.routes()).use(router.allowedMethods());
 app.listen(3000);
+console.log("server started");
--- a/package.json
+++ b/package.json
@ -4,7 +4,8 @@
  "description": "## Dependencies",
  "main": "index.js",
  "scripts": {
-    "install-daemon": "cp screenshot-service.service /etc/systemd/system/ && systemctl start screenshot-service && systemctl enable screenshot-service"
+    "install-daemon": "cp screenshot-service.service /etc/systemd/system/ && systemctl start screenshot-service && systemctl enable screenshot-service",
+    "start": "docker image build -t headless-fox Docker && BASE_URL=http://localhost:3000 node ."
  },
  "repository": {
    "type": "git",