Faster image annotation
This commit is contained in:
parent
4eb946fe88
commit
767ee58a0b
@ -44,6 +44,7 @@ RUN apk update
|
||||
RUN apk add wmctrl git make gcc musl-dev libx11-dev zlib zlib-dev jpeg-dev
|
||||
RUN git clone https://github.com/muquit/grabc && cd grabc && make && make install
|
||||
RUN apk add clang
|
||||
RUN apk add freetype-dev
|
||||
RUN python3 -m pip install --upgrade Pillow
|
||||
COPY . /opt
|
||||
|
||||
|
@ -1,37 +1,81 @@
|
||||
# test with:
|
||||
|
||||
# docker image build -t headless-fox Docker && docker run -v $PWD/static:/opt/static -ti headless-fox python annotate_header.py screenshot.png 1 "content-type" "etag"
|
||||
# docker image build -t headless-fox Docker && docker run -v $PWD/static:/opt/static -ti headless-fox time python annotate_header.py screenshot.png "/opt/static/output.png" "content-type" "Tutaj jest content-type" "etag" "Tutaj jest etag z długim opisem co ma wiele słów i wychodzi poza network inspector"
|
||||
|
||||
import os
|
||||
import sys
|
||||
import pytesseract
|
||||
import uuid
|
||||
from pytesseract import Output
|
||||
from PIL import Image, ImageDraw
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
|
||||
print(sys.argv)
|
||||
output_file_relative = sys.argv[
|
||||
1
|
||||
] # this is also the existing source screenshot to annotate. It will be updated in-place
|
||||
|
||||
output_file = "/opt/static/" + output_file_relative
|
||||
needles = sys.argv[2:]
|
||||
|
||||
base_url = os.getenv("BASE_URL")
|
||||
|
||||
# generator
|
||||
def partition(lst, size):
|
||||
for i in range(0, len(lst), size):
|
||||
yield lst[i : i + size]
|
||||
|
||||
image = sys.argv[1]
|
||||
index = sys.argv[2]
|
||||
needles = sys.argv[3:]
|
||||
|
||||
# print(d)
|
||||
|
||||
with Image.open(image) as im:
|
||||
x_offset = 2054
|
||||
y_offset = 313
|
||||
cropped = im.crop((x_offset, y_offset, 2875, 1558))
|
||||
cropped_filename = "/opt/static/" + uuid.uuid4().hex + ".png"
|
||||
cropped.save(cropped_filename)
|
||||
d = pytesseract.image_to_data(cropped_filename, output_type=Output.DICT)
|
||||
os.remove(cropped_filename);
|
||||
draw = ImageDraw.Draw(im)
|
||||
n_boxes = len(d['level'])
|
||||
print(needles)
|
||||
for i in range(n_boxes):
|
||||
(x, y, w, h, text) = (d['left'][i], d['top'][i], d['width'][i], d['height'][i], d['text'][i])
|
||||
for needle in needles:
|
||||
if needle.lower() in text.lower():
|
||||
print("needle", needle, text, x, y)
|
||||
draw.line((x_offset, y + y_offset, x_offset + 200, y + y_offset), fill=128)
|
||||
im.save("/opt/static/output.png", "PNG")
|
||||
with Image.open(output_file) as im:
|
||||
x_offset = 2054
|
||||
y_offset = 313
|
||||
cropped = im.crop((x_offset, y_offset, 2875, 1558))
|
||||
cropped_filename = "/opt/static/" + uuid.uuid4().hex + ".png"
|
||||
cropped.save(cropped_filename)
|
||||
d = pytesseract.image_to_data(cropped_filename, output_type=Output.DICT)
|
||||
os.remove(cropped_filename)
|
||||
draw = ImageDraw.Draw(im)
|
||||
n_boxes = len(d["level"])
|
||||
print(needles)
|
||||
font = ImageFont.truetype("/usr/share/fonts/noto/NotoSansDisplay-Medium.ttf", 48)
|
||||
for i in range(n_boxes):
|
||||
(x, y, w, h, text) = (
|
||||
d["left"][i],
|
||||
d["top"][i],
|
||||
d["width"][i],
|
||||
d["height"][i],
|
||||
d["text"][i],
|
||||
)
|
||||
for [needle, comment] in partition(needles, 2):
|
||||
if needle.lower() in text.lower():
|
||||
print("needle", needle, text, x, y)
|
||||
# modify y so it's aligned not with the top of the text, but with the midline
|
||||
y = y + h / 2
|
||||
radius = 30
|
||||
# offset both y and x
|
||||
y = y + y_offset
|
||||
x = x + x_offset
|
||||
fill = "red"
|
||||
line_length = 200
|
||||
draw.regular_polygon(
|
||||
((x - radius - 5, y), radius), n_sides=3, rotation=270, fill=fill
|
||||
)
|
||||
draw.line((x - radius - 5, y, x - line_length, y), fill=fill, width=10)
|
||||
text_w, text_h = draw.textsize(comment, font)
|
||||
text_padding = 10
|
||||
draw.rectangle(
|
||||
[
|
||||
(x - line_length - text_w - text_padding, y - text_h / 2),
|
||||
(x - line_length + text_padding, y + text_h / 2),
|
||||
],
|
||||
fill="white",
|
||||
)
|
||||
draw.text(
|
||||
(x - line_length - 10, y),
|
||||
comment,
|
||||
fill=fill,
|
||||
anchor="rm",
|
||||
font=font,
|
||||
)
|
||||
im.save(output_file, "PNG")
|
||||
print('{"new_file": "' + base_url + "/" + output_file_relative + "\"}")
|
||||
|
@ -53,16 +53,15 @@ while IFS= read -r DOMAIN; do
|
||||
fi
|
||||
echo "{\"current_action\": \"scanning for requests from $DOMAIN...\"}"
|
||||
network_inspector_search "domain:$DOMAIN" # can filter with more granularity: https://developer.mozilla.org/en-US/docs/Tools/Network_Monitor/request_list#filtering_by_properties
|
||||
grab ni_search
|
||||
|
||||
scrot
|
||||
# grab ni_search
|
||||
|
||||
count=0
|
||||
while network_inspector_has_more_entries
|
||||
do
|
||||
filename="/opt/static/$ID/${index}.png"
|
||||
scrot "$filename"
|
||||
annotate_header "$filename" "$DOMAIN" "$count" \
|
||||
filename="$ID/${index}.png"
|
||||
scrot "/opt/static/$filename"
|
||||
grab "searching $DOMAIN"
|
||||
BASE_URL="$BASE_URL" python annotate_header.py "$filename" \
|
||||
"set-cookie" "identyfikator internetowy z cookie" \
|
||||
"Cookie" "identyfikator internetowy z cookie" \
|
||||
"Referer" "Część mojej historii przeglądania" &
|
||||
|
@ -55,7 +55,7 @@ grab(){
|
||||
NOTE="$grab_no $@"
|
||||
#echo $@
|
||||
((grab_no++))
|
||||
scrot --note "-f '/usr/share/fonts/noto/NotoSansMono-Medium.ttf/40' -c 255,0,0,255 -t '$NOTE'" "/opt/static/$(date -Iminutes).png"
|
||||
# scrot --note "-f '/usr/share/fonts/noto/NotoSansMono-Medium.ttf/40' -c 255,0,0,255 -t '$NOTE'" "/opt/static/$(date -Iminutes).png"
|
||||
#echo "not grabbing"
|
||||
}
|
||||
|
||||
@ -174,7 +174,7 @@ network_inspector_search(){
|
||||
keycombo Control_L a
|
||||
sleep 0.1
|
||||
xdotool type "$query"
|
||||
xdotool key Escape
|
||||
#xdotool key Escape
|
||||
sleep 0.3
|
||||
seq 28 | xargs -I {} xdotool key Tab
|
||||
xdotool key Down
|
||||
@ -184,6 +184,7 @@ network_inspector_search(){
|
||||
}
|
||||
|
||||
network_inspector_has_more_entries(){
|
||||
grab network_inspector_has_more_entries
|
||||
[ $(get_pixel_color 1267 1572) = "#f9f9fa" ]
|
||||
}
|
||||
|
||||
|
@ -21,9 +21,11 @@ BASE_URL=http://localhost:3000 docker run -i -v $PWD/static:/opt/static headles
|
||||
## Running the server
|
||||
|
||||
```
|
||||
BASE_URL=http://localhost:3000 node .
|
||||
npm start
|
||||
```
|
||||
|
||||
Visit localhost:3000 to see the test form
|
||||
|
||||
To set up a systemd daemon that starts with the server, use:
|
||||
|
||||
```
|
||||
|
1
index.js
1
index.js
@ -163,3 +163,4 @@ router.get("/api/requests/:id", async (ctx) => {
|
||||
|
||||
app.use(router.routes()).use(router.allowedMethods());
|
||||
app.listen(3000);
|
||||
console.log("server started");
|
||||
|
@ -4,7 +4,8 @@
|
||||
"description": "## Dependencies",
|
||||
"main": "index.js",
|
||||
"scripts": {
|
||||
"install-daemon": "cp screenshot-service.service /etc/systemd/system/ && systemctl start screenshot-service && systemctl enable screenshot-service"
|
||||
"install-daemon": "cp screenshot-service.service /etc/systemd/system/ && systemctl start screenshot-service && systemctl enable screenshot-service",
|
||||
"start": "docker image build -t headless-fox Docker && BASE_URL=http://localhost:3000 node ."
|
||||
},
|
||||
"repository": {
|
||||
"type": "git",
|
||||
|
Loading…
Reference in New Issue
Block a user