Add metadata to screenshots

2022-06-17 11:44:05 +02:00 · 2022-06-17 11:44:05 +02:00 · a96a68517b
commit a96a68517b
parent d06426e22f
2 changed files with 10 additions and 5 deletions
--- a/Docker/annotate_header.py
+++ b/Docker/annotate_header.py
@ -6,6 +6,7 @@ import os
 import sys
 import pytesseract
 import uuid
 import json
 from pytesseract import Output
 from PIL import Image, ImageDraw, ImageFont
@ -14,7 +15,8 @@ output_file_relative = sys.argv[
 ]  # this is also the existing source screenshot to annotate. It will be updated in-place
 output_file = "/opt/static/" + output_file_relative
-needles = sys.argv[2:]
+domain = sys.argv[2]
 needles = sys.argv[3:]
 base_url = os.getenv("BASE_URL")
@ -36,8 +38,8 @@ with Image.open(output_file) as im:
    os.remove(cropped_filename)
    draw = ImageDraw.Draw(im)
    n_boxes = len(d["level"])
    print(needles)
    font = ImageFont.truetype("/usr/share/fonts/noto/NotoSansDisplay-Medium.ttf", 48)
    found_needles = []
    for i in range(n_boxes):
        (x, y, w, h, text) = (
            d["left"][i],
@ -48,7 +50,7 @@ with Image.open(output_file) as im:
        )
        for [needle, comment] in partition(needles, 2):
            if needle.lower() in text.lower():
-                print("needle", needle, text, x, y)
+                found_needles.append(needle)
                # modify y so it's aligned not with the top of the text, but with the midline
                y = y + h / 2
                radius = 30
@ -78,4 +80,7 @@ with Image.open(output_file) as im:
                    font=font,
                )
    im.save(output_file, "PNG")
-    print('{"new_file": "' + base_url + "/static/" + output_file_relative + "\"}")
+    print(json.dumps({"new_file":
                      {"url": base_url + "/static/" + output_file_relative,
                       "domain": domain,
                       "found_headers": found_needles}}))
--- a/Docker/run-analysis.sh
+++ b/Docker/run-analysis.sh
@ -53,7 +53,7 @@ while IFS= read -r DOMAIN; do
      filename="$ID/${index}.png"
      scrot "/opt/static/$filename"
      grab "searching $DOMAIN"
-      BASE_URL="$BASE_URL" python annotate_header.py "$filename" \
+      BASE_URL="$BASE_URL" python annotate_header.py "$filename" "$DOMAIN" \
        "set-cookie" "identyfikator internetowy z cookie" \
        "Cookie" "identyfikator internetowy z cookie" \
        "Referer" "Część mojej historii przeglądania" &