From 767ee58a0b867ded1f404f331026fbb6cc81f777 Mon Sep 17 00:00:00 2001 From: Kuba Orlik Date: Wed, 15 Jun 2022 20:27:15 +0200 Subject: [PATCH] Faster image annotation --- Docker/Dockerfile | 1 + Docker/annotate_header.py | 92 +++++++++++++++++++++++++++++---------- Docker/script3.sh | 11 +++-- Docker/utils.sh | 5 ++- README.md | 4 +- index.js | 1 + package.json | 3 +- 7 files changed, 83 insertions(+), 34 deletions(-) diff --git a/Docker/Dockerfile b/Docker/Dockerfile index 5d51b2f..96c4e5d 100644 --- a/Docker/Dockerfile +++ b/Docker/Dockerfile @@ -44,6 +44,7 @@ RUN apk update RUN apk add wmctrl git make gcc musl-dev libx11-dev zlib zlib-dev jpeg-dev RUN git clone https://github.com/muquit/grabc && cd grabc && make && make install RUN apk add clang +RUN apk add freetype-dev RUN python3 -m pip install --upgrade Pillow COPY . /opt diff --git a/Docker/annotate_header.py b/Docker/annotate_header.py index f1ad1c8..7715b09 100644 --- a/Docker/annotate_header.py +++ b/Docker/annotate_header.py @@ -1,37 +1,81 @@ # test with: -# docker image build -t headless-fox Docker && docker run -v $PWD/static:/opt/static -ti headless-fox python annotate_header.py screenshot.png 1 "content-type" "etag" +# docker image build -t headless-fox Docker && docker run -v $PWD/static:/opt/static -ti headless-fox time python annotate_header.py screenshot.png "/opt/static/output.png" "content-type" "Tutaj jest content-type" "etag" "Tutaj jest etag z długim opisem co ma wiele słów i wychodzi poza network inspector" import os import sys import pytesseract import uuid from pytesseract import Output -from PIL import Image, ImageDraw +from PIL import Image, ImageDraw, ImageFont -print(sys.argv) +output_file_relative = sys.argv[ + 1 +] # this is also the existing source screenshot to annotate. It will be updated in-place + +output_file = "/opt/static/" + output_file_relative +needles = sys.argv[2:] + +base_url = os.getenv("BASE_URL") + +# generator +def partition(lst, size): + for i in range(0, len(lst), size): + yield lst[i : i + size] -image = sys.argv[1] -index = sys.argv[2] -needles = sys.argv[3:] # print(d) -with Image.open(image) as im: - x_offset = 2054 - y_offset = 313 - cropped = im.crop((x_offset, y_offset, 2875, 1558)) - cropped_filename = "/opt/static/" + uuid.uuid4().hex + ".png" - cropped.save(cropped_filename) - d = pytesseract.image_to_data(cropped_filename, output_type=Output.DICT) - os.remove(cropped_filename); - draw = ImageDraw.Draw(im) - n_boxes = len(d['level']) - print(needles) - for i in range(n_boxes): - (x, y, w, h, text) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i], d['text'][i]) - for needle in needles: - if needle.lower() in text.lower(): - print("needle", needle, text, x, y) - draw.line((x_offset, y + y_offset, x_offset + 200, y + y_offset), fill=128) - im.save("/opt/static/output.png", "PNG") +with Image.open(output_file) as im: + x_offset = 2054 + y_offset = 313 + cropped = im.crop((x_offset, y_offset, 2875, 1558)) + cropped_filename = "/opt/static/" + uuid.uuid4().hex + ".png" + cropped.save(cropped_filename) + d = pytesseract.image_to_data(cropped_filename, output_type=Output.DICT) + os.remove(cropped_filename) + draw = ImageDraw.Draw(im) + n_boxes = len(d["level"]) + print(needles) + font = ImageFont.truetype("/usr/share/fonts/noto/NotoSansDisplay-Medium.ttf", 48) + for i in range(n_boxes): + (x, y, w, h, text) = ( + d["left"][i], + d["top"][i], + d["width"][i], + d["height"][i], + d["text"][i], + ) + for [needle, comment] in partition(needles, 2): + if needle.lower() in text.lower(): + print("needle", needle, text, x, y) + # modify y so it's aligned not with the top of the text, but with the midline + y = y + h / 2 + radius = 30 + # offset both y and x + y = y + y_offset + x = x + x_offset + fill = "red" + line_length = 200 + draw.regular_polygon( + ((x - radius - 5, y), radius), n_sides=3, rotation=270, fill=fill + ) + draw.line((x - radius - 5, y, x - line_length, y), fill=fill, width=10) + text_w, text_h = draw.textsize(comment, font) + text_padding = 10 + draw.rectangle( + [ + (x - line_length - text_w - text_padding, y - text_h / 2), + (x - line_length + text_padding, y + text_h / 2), + ], + fill="white", + ) + draw.text( + (x - line_length - 10, y), + comment, + fill=fill, + anchor="rm", + font=font, + ) + im.save(output_file, "PNG") + print('{"new_file": "' + base_url + "/" + output_file_relative + "\"}") diff --git a/Docker/script3.sh b/Docker/script3.sh index 4890bd7..7715007 100755 --- a/Docker/script3.sh +++ b/Docker/script3.sh @@ -53,16 +53,15 @@ while IFS= read -r DOMAIN; do fi echo "{\"current_action\": \"scanning for requests from $DOMAIN...\"}" network_inspector_search "domain:$DOMAIN" # can filter with more granularity: https://developer.mozilla.org/en-US/docs/Tools/Network_Monitor/request_list#filtering_by_properties - grab ni_search - - scrot + # grab ni_search count=0 while network_inspector_has_more_entries do - filename="/opt/static/$ID/${index}.png" - scrot "$filename" - annotate_header "$filename" "$DOMAIN" "$count" \ + filename="$ID/${index}.png" + scrot "/opt/static/$filename" + grab "searching $DOMAIN" + BASE_URL="$BASE_URL" python annotate_header.py "$filename" \ "set-cookie" "identyfikator internetowy z cookie" \ "Cookie" "identyfikator internetowy z cookie" \ "Referer" "Część mojej historii przeglądania" & diff --git a/Docker/utils.sh b/Docker/utils.sh index 548249f..ad00eaa 100644 --- a/Docker/utils.sh +++ b/Docker/utils.sh @@ -55,7 +55,7 @@ grab(){ NOTE="$grab_no $@" #echo $@ ((grab_no++)) - scrot --note "-f '/usr/share/fonts/noto/NotoSansMono-Medium.ttf/40' -c 255,0,0,255 -t '$NOTE'" "/opt/static/$(date -Iminutes).png" + # scrot --note "-f '/usr/share/fonts/noto/NotoSansMono-Medium.ttf/40' -c 255,0,0,255 -t '$NOTE'" "/opt/static/$(date -Iminutes).png" #echo "not grabbing" } @@ -174,7 +174,7 @@ network_inspector_search(){ keycombo Control_L a sleep 0.1 xdotool type "$query" - xdotool key Escape + #xdotool key Escape sleep 0.3 seq 28 | xargs -I {} xdotool key Tab xdotool key Down @@ -184,6 +184,7 @@ network_inspector_search(){ } network_inspector_has_more_entries(){ + grab network_inspector_has_more_entries [ $(get_pixel_color 1267 1572) = "#f9f9fa" ] } diff --git a/README.md b/README.md index 8e7457d..003d443 100644 --- a/README.md +++ b/README.md @@ -21,9 +21,11 @@ BASE_URL=http://localhost:3000 docker run -i -v $PWD/static:/opt/static headles ## Running the server ``` -BASE_URL=http://localhost:3000 node . +npm start ``` +Visit localhost:3000 to see the test form + To set up a systemd daemon that starts with the server, use: ``` diff --git a/index.js b/index.js index 639244f..48b28bd 100644 --- a/index.js +++ b/index.js @@ -163,3 +163,4 @@ router.get("/api/requests/:id", async (ctx) => { app.use(router.routes()).use(router.allowedMethods()); app.listen(3000); +console.log("server started"); diff --git a/package.json b/package.json index f80be10..e01785e 100644 --- a/package.json +++ b/package.json @@ -4,7 +4,8 @@ "description": "## Dependencies", "main": "index.js", "scripts": { - "install-daemon": "cp screenshot-service.service /etc/systemd/system/ && systemctl start screenshot-service && systemctl enable screenshot-service" + "install-daemon": "cp screenshot-service.service /etc/systemd/system/ && systemctl start screenshot-service && systemctl enable screenshot-service", + "start": "docker image build -t headless-fox Docker && BASE_URL=http://localhost:3000 node ." }, "repository": { "type": "git",