diff --git a/Docker/annotate_header.py b/Docker/annotate_header.py index 663b323..a876569 100644 --- a/Docker/annotate_header.py +++ b/Docker/annotate_header.py @@ -6,6 +6,7 @@ import os import sys import pytesseract import uuid +import json from pytesseract import Output from PIL import Image, ImageDraw, ImageFont @@ -14,7 +15,8 @@ output_file_relative = sys.argv[ ] # this is also the existing source screenshot to annotate. It will be updated in-place output_file = "/opt/static/" + output_file_relative -needles = sys.argv[2:] +domain = sys.argv[2] +needles = sys.argv[3:] base_url = os.getenv("BASE_URL") @@ -36,8 +38,8 @@ with Image.open(output_file) as im: os.remove(cropped_filename) draw = ImageDraw.Draw(im) n_boxes = len(d["level"]) - print(needles) font = ImageFont.truetype("/usr/share/fonts/noto/NotoSansDisplay-Medium.ttf", 48) + found_needles = [] for i in range(n_boxes): (x, y, w, h, text) = ( d["left"][i], @@ -48,7 +50,7 @@ with Image.open(output_file) as im: ) for [needle, comment] in partition(needles, 2): if needle.lower() in text.lower(): - print("needle", needle, text, x, y) + found_needles.append(needle) # modify y so it's aligned not with the top of the text, but with the midline y = y + h / 2 radius = 30 @@ -78,4 +80,7 @@ with Image.open(output_file) as im: font=font, ) im.save(output_file, "PNG") - print('{"new_file": "' + base_url + "/static/" + output_file_relative + "\"}") + print(json.dumps({"new_file": + {"url": base_url + "/static/" + output_file_relative, + "domain": domain, + "found_headers": found_needles}})) diff --git a/Docker/run-analysis.sh b/Docker/run-analysis.sh index c6c3729..da23eb8 100755 --- a/Docker/run-analysis.sh +++ b/Docker/run-analysis.sh @@ -53,7 +53,7 @@ while IFS= read -r DOMAIN; do filename="$ID/${index}.png" scrot "/opt/static/$filename" grab "searching $DOMAIN" - BASE_URL="$BASE_URL" python annotate_header.py "$filename" \ + BASE_URL="$BASE_URL" python annotate_header.py "$filename" "$DOMAIN" \ "set-cookie" "identyfikator internetowy z cookie" \ "Cookie" "identyfikator internetowy z cookie" \ "Referer" "Część mojej historii przeglądania" &