Faster image annotation

This commit is contained in:
Kuba Orlik 2022-06-15 20:27:15 +02:00
parent 4eb946fe88
commit 767ee58a0b
7 changed files with 83 additions and 34 deletions

View File

@ -44,6 +44,7 @@ RUN apk update
RUN apk add wmctrl git make gcc musl-dev libx11-dev zlib zlib-dev jpeg-dev RUN apk add wmctrl git make gcc musl-dev libx11-dev zlib zlib-dev jpeg-dev
RUN git clone https://github.com/muquit/grabc && cd grabc && make && make install RUN git clone https://github.com/muquit/grabc && cd grabc && make && make install
RUN apk add clang RUN apk add clang
RUN apk add freetype-dev
RUN python3 -m pip install --upgrade Pillow RUN python3 -m pip install --upgrade Pillow
COPY . /opt COPY . /opt

View File

@ -1,37 +1,81 @@
# test with: # test with:
# docker image build -t headless-fox Docker && docker run -v $PWD/static:/opt/static -ti headless-fox python annotate_header.py screenshot.png 1 "content-type" "etag" # docker image build -t headless-fox Docker && docker run -v $PWD/static:/opt/static -ti headless-fox time python annotate_header.py screenshot.png "/opt/static/output.png" "content-type" "Tutaj jest content-type" "etag" "Tutaj jest etag z długim opisem co ma wiele słów i wychodzi poza network inspector"
import os import os
import sys import sys
import pytesseract import pytesseract
import uuid import uuid
from pytesseract import Output from pytesseract import Output
from PIL import Image, ImageDraw from PIL import Image, ImageDraw, ImageFont
print(sys.argv) output_file_relative = sys.argv[
1
] # this is also the existing source screenshot to annotate. It will be updated in-place
output_file = "/opt/static/" + output_file_relative
needles = sys.argv[2:]
base_url = os.getenv("BASE_URL")
# generator
def partition(lst, size):
for i in range(0, len(lst), size):
yield lst[i : i + size]
image = sys.argv[1]
index = sys.argv[2]
needles = sys.argv[3:]
# print(d) # print(d)
with Image.open(image) as im: with Image.open(output_file) as im:
x_offset = 2054 x_offset = 2054
y_offset = 313 y_offset = 313
cropped = im.crop((x_offset, y_offset, 2875, 1558)) cropped = im.crop((x_offset, y_offset, 2875, 1558))
cropped_filename = "/opt/static/" + uuid.uuid4().hex + ".png" cropped_filename = "/opt/static/" + uuid.uuid4().hex + ".png"
cropped.save(cropped_filename) cropped.save(cropped_filename)
d = pytesseract.image_to_data(cropped_filename, output_type=Output.DICT) d = pytesseract.image_to_data(cropped_filename, output_type=Output.DICT)
os.remove(cropped_filename); os.remove(cropped_filename)
draw = ImageDraw.Draw(im) draw = ImageDraw.Draw(im)
n_boxes = len(d['level']) n_boxes = len(d["level"])
print(needles) print(needles)
for i in range(n_boxes): font = ImageFont.truetype("/usr/share/fonts/noto/NotoSansDisplay-Medium.ttf", 48)
(x, y, w, h, text) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i], d['text'][i]) for i in range(n_boxes):
for needle in needles: (x, y, w, h, text) = (
if needle.lower() in text.lower(): d["left"][i],
print("needle", needle, text, x, y) d["top"][i],
draw.line((x_offset, y + y_offset, x_offset + 200, y + y_offset), fill=128) d["width"][i],
im.save("/opt/static/output.png", "PNG") d["height"][i],
d["text"][i],
)
for [needle, comment] in partition(needles, 2):
if needle.lower() in text.lower():
print("needle", needle, text, x, y)
# modify y so it's aligned not with the top of the text, but with the midline
y = y + h / 2
radius = 30
# offset both y and x
y = y + y_offset
x = x + x_offset
fill = "red"
line_length = 200
draw.regular_polygon(
((x - radius - 5, y), radius), n_sides=3, rotation=270, fill=fill
)
draw.line((x - radius - 5, y, x - line_length, y), fill=fill, width=10)
text_w, text_h = draw.textsize(comment, font)
text_padding = 10
draw.rectangle(
[
(x - line_length - text_w - text_padding, y - text_h / 2),
(x - line_length + text_padding, y + text_h / 2),
],
fill="white",
)
draw.text(
(x - line_length - 10, y),
comment,
fill=fill,
anchor="rm",
font=font,
)
im.save(output_file, "PNG")
print('{"new_file": "' + base_url + "/" + output_file_relative + "\"}")

View File

@ -53,16 +53,15 @@ while IFS= read -r DOMAIN; do
fi fi
echo "{\"current_action\": \"scanning for requests from $DOMAIN...\"}" echo "{\"current_action\": \"scanning for requests from $DOMAIN...\"}"
network_inspector_search "domain:$DOMAIN" # can filter with more granularity: https://developer.mozilla.org/en-US/docs/Tools/Network_Monitor/request_list#filtering_by_properties network_inspector_search "domain:$DOMAIN" # can filter with more granularity: https://developer.mozilla.org/en-US/docs/Tools/Network_Monitor/request_list#filtering_by_properties
grab ni_search # grab ni_search
scrot
count=0 count=0
while network_inspector_has_more_entries while network_inspector_has_more_entries
do do
filename="/opt/static/$ID/${index}.png" filename="$ID/${index}.png"
scrot "$filename" scrot "/opt/static/$filename"
annotate_header "$filename" "$DOMAIN" "$count" \ grab "searching $DOMAIN"
BASE_URL="$BASE_URL" python annotate_header.py "$filename" \
"set-cookie" "identyfikator internetowy z cookie" \ "set-cookie" "identyfikator internetowy z cookie" \
"Cookie" "identyfikator internetowy z cookie" \ "Cookie" "identyfikator internetowy z cookie" \
"Referer" "Część mojej historii przeglądania" & "Referer" "Część mojej historii przeglądania" &

View File

@ -55,7 +55,7 @@ grab(){
NOTE="$grab_no $@" NOTE="$grab_no $@"
#echo $@ #echo $@
((grab_no++)) ((grab_no++))
scrot --note "-f '/usr/share/fonts/noto/NotoSansMono-Medium.ttf/40' -c 255,0,0,255 -t '$NOTE'" "/opt/static/$(date -Iminutes).png" # scrot --note "-f '/usr/share/fonts/noto/NotoSansMono-Medium.ttf/40' -c 255,0,0,255 -t '$NOTE'" "/opt/static/$(date -Iminutes).png"
#echo "not grabbing" #echo "not grabbing"
} }
@ -174,7 +174,7 @@ network_inspector_search(){
keycombo Control_L a keycombo Control_L a
sleep 0.1 sleep 0.1
xdotool type "$query" xdotool type "$query"
xdotool key Escape #xdotool key Escape
sleep 0.3 sleep 0.3
seq 28 | xargs -I {} xdotool key Tab seq 28 | xargs -I {} xdotool key Tab
xdotool key Down xdotool key Down
@ -184,6 +184,7 @@ network_inspector_search(){
} }
network_inspector_has_more_entries(){ network_inspector_has_more_entries(){
grab network_inspector_has_more_entries
[ $(get_pixel_color 1267 1572) = "#f9f9fa" ] [ $(get_pixel_color 1267 1572) = "#f9f9fa" ]
} }

View File

@ -21,9 +21,11 @@ BASE_URL=http://localhost:3000 docker run -i -v $PWD/static:/opt/static headles
## Running the server ## Running the server
``` ```
BASE_URL=http://localhost:3000 node . npm start
``` ```
Visit localhost:3000 to see the test form
To set up a systemd daemon that starts with the server, use: To set up a systemd daemon that starts with the server, use:
``` ```

View File

@ -163,3 +163,4 @@ router.get("/api/requests/:id", async (ctx) => {
app.use(router.routes()).use(router.allowedMethods()); app.use(router.routes()).use(router.allowedMethods());
app.listen(3000); app.listen(3000);
console.log("server started");

View File

@ -4,7 +4,8 @@
"description": "## Dependencies", "description": "## Dependencies",
"main": "index.js", "main": "index.js",
"scripts": { "scripts": {
"install-daemon": "cp screenshot-service.service /etc/systemd/system/ && systemctl start screenshot-service && systemctl enable screenshot-service" "install-daemon": "cp screenshot-service.service /etc/systemd/system/ && systemctl start screenshot-service && systemctl enable screenshot-service",
"start": "docker image build -t headless-fox Docker && BASE_URL=http://localhost:3000 node ."
}, },
"repository": { "repository": {
"type": "git", "type": "git",