Faster image annotation

This commit is contained in:
Kuba Orlik 2022-06-15 20:27:15 +02:00
parent 4eb946fe88
commit 767ee58a0b
7 changed files with 83 additions and 34 deletions

View File

@ -44,6 +44,7 @@ RUN apk update
RUN apk add wmctrl git make gcc musl-dev libx11-dev zlib zlib-dev jpeg-dev
RUN git clone https://github.com/muquit/grabc && cd grabc && make && make install
RUN apk add clang
RUN apk add freetype-dev
RUN python3 -m pip install --upgrade Pillow
COPY . /opt

View File

@ -1,37 +1,81 @@
# test with:
# docker image build -t headless-fox Docker && docker run -v $PWD/static:/opt/static -ti headless-fox python annotate_header.py screenshot.png 1 "content-type" "etag"
# docker image build -t headless-fox Docker && docker run -v $PWD/static:/opt/static -ti headless-fox time python annotate_header.py screenshot.png "/opt/static/output.png" "content-type" "Tutaj jest content-type" "etag" "Tutaj jest etag z długim opisem co ma wiele słów i wychodzi poza network inspector"
import os
import sys
import pytesseract
import uuid
from pytesseract import Output
from PIL import Image, ImageDraw
from PIL import Image, ImageDraw, ImageFont
print(sys.argv)
output_file_relative = sys.argv[
1
] # this is also the existing source screenshot to annotate. It will be updated in-place
output_file = "/opt/static/" + output_file_relative
needles = sys.argv[2:]
base_url = os.getenv("BASE_URL")
# generator
def partition(lst, size):
for i in range(0, len(lst), size):
yield lst[i : i + size]
image = sys.argv[1]
index = sys.argv[2]
needles = sys.argv[3:]
# print(d)
with Image.open(image) as im:
with Image.open(output_file) as im:
x_offset = 2054
y_offset = 313
cropped = im.crop((x_offset, y_offset, 2875, 1558))
cropped_filename = "/opt/static/" + uuid.uuid4().hex + ".png"
cropped.save(cropped_filename)
d = pytesseract.image_to_data(cropped_filename, output_type=Output.DICT)
os.remove(cropped_filename);
os.remove(cropped_filename)
draw = ImageDraw.Draw(im)
n_boxes = len(d['level'])
n_boxes = len(d["level"])
print(needles)
font = ImageFont.truetype("/usr/share/fonts/noto/NotoSansDisplay-Medium.ttf", 48)
for i in range(n_boxes):
(x, y, w, h, text) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i], d['text'][i])
for needle in needles:
(x, y, w, h, text) = (
d["left"][i],
d["top"][i],
d["width"][i],
d["height"][i],
d["text"][i],
)
for [needle, comment] in partition(needles, 2):
if needle.lower() in text.lower():
print("needle", needle, text, x, y)
draw.line((x_offset, y + y_offset, x_offset + 200, y + y_offset), fill=128)
im.save("/opt/static/output.png", "PNG")
# modify y so it's aligned not with the top of the text, but with the midline
y = y + h / 2
radius = 30
# offset both y and x
y = y + y_offset
x = x + x_offset
fill = "red"
line_length = 200
draw.regular_polygon(
((x - radius - 5, y), radius), n_sides=3, rotation=270, fill=fill
)
draw.line((x - radius - 5, y, x - line_length, y), fill=fill, width=10)
text_w, text_h = draw.textsize(comment, font)
text_padding = 10
draw.rectangle(
[
(x - line_length - text_w - text_padding, y - text_h / 2),
(x - line_length + text_padding, y + text_h / 2),
],
fill="white",
)
draw.text(
(x - line_length - 10, y),
comment,
fill=fill,
anchor="rm",
font=font,
)
im.save(output_file, "PNG")
print('{"new_file": "' + base_url + "/" + output_file_relative + "\"}")

View File

@ -53,16 +53,15 @@ while IFS= read -r DOMAIN; do
fi
echo "{\"current_action\": \"scanning for requests from $DOMAIN...\"}"
network_inspector_search "domain:$DOMAIN" # can filter with more granularity: https://developer.mozilla.org/en-US/docs/Tools/Network_Monitor/request_list#filtering_by_properties
grab ni_search
scrot
# grab ni_search
count=0
while network_inspector_has_more_entries
do
filename="/opt/static/$ID/${index}.png"
scrot "$filename"
annotate_header "$filename" "$DOMAIN" "$count" \
filename="$ID/${index}.png"
scrot "/opt/static/$filename"
grab "searching $DOMAIN"
BASE_URL="$BASE_URL" python annotate_header.py "$filename" \
"set-cookie" "identyfikator internetowy z cookie" \
"Cookie" "identyfikator internetowy z cookie" \
"Referer" "Część mojej historii przeglądania" &

View File

@ -55,7 +55,7 @@ grab(){
NOTE="$grab_no $@"
#echo $@
((grab_no++))
scrot --note "-f '/usr/share/fonts/noto/NotoSansMono-Medium.ttf/40' -c 255,0,0,255 -t '$NOTE'" "/opt/static/$(date -Iminutes).png"
# scrot --note "-f '/usr/share/fonts/noto/NotoSansMono-Medium.ttf/40' -c 255,0,0,255 -t '$NOTE'" "/opt/static/$(date -Iminutes).png"
#echo "not grabbing"
}
@ -174,7 +174,7 @@ network_inspector_search(){
keycombo Control_L a
sleep 0.1
xdotool type "$query"
xdotool key Escape
#xdotool key Escape
sleep 0.3
seq 28 | xargs -I {} xdotool key Tab
xdotool key Down
@ -184,6 +184,7 @@ network_inspector_search(){
}
network_inspector_has_more_entries(){
grab network_inspector_has_more_entries
[ $(get_pixel_color 1267 1572) = "#f9f9fa" ]
}

View File

@ -21,9 +21,11 @@ BASE_URL=http://localhost:3000 docker run -i -v $PWD/static:/opt/static headles
## Running the server
```
BASE_URL=http://localhost:3000 node .
npm start
```
Visit localhost:3000 to see the test form
To set up a systemd daemon that starts with the server, use:
```

View File

@ -163,3 +163,4 @@ router.get("/api/requests/:id", async (ctx) => {
app.use(router.routes()).use(router.allowedMethods());
app.listen(3000);
console.log("server started");

View File

@ -4,7 +4,8 @@
"description": "## Dependencies",
"main": "index.js",
"scripts": {
"install-daemon": "cp screenshot-service.service /etc/systemd/system/ && systemctl start screenshot-service && systemctl enable screenshot-service"
"install-daemon": "cp screenshot-service.service /etc/systemd/system/ && systemctl start screenshot-service && systemctl enable screenshot-service",
"start": "docker image build -t headless-fox Docker && BASE_URL=http://localhost:3000 node ."
},
"repository": {
"type": "git",