Add header values to output

This commit is contained in:
Kuba Orlik 2022-07-08 10:40:16 +02:00
parent 36fd166c99
commit eccd7dce2b
2 changed files with 40 additions and 13 deletions

View File

@ -39,7 +39,7 @@ with Image.open(output_file) as im:
draw = ImageDraw.Draw(im)
n_boxes = len(d["level"])
font = ImageFont.truetype("/usr/share/fonts/noto/NotoSansDisplay-Medium.ttf", 48)
found_needles = []
found_needles = {}
for i in range(n_boxes):
(x, y, w, h, text) = (
d["left"][i],
@ -48,32 +48,58 @@ with Image.open(output_file) as im:
d["height"][i],
d["text"][i],
)
for [needle, comment] in partition(needles, 2):
if abs(x - 59) > 2:
# it means that this is not a header name
continue
for [needle, comment, min_value_length, value_must_include] in partition(needles, 4):
if needle.lower() in text.lower():
found_needles.append(needle)
header_value_chunks = []
for i in range(n_boxes):
(other_x, other_y, other_w, other_h, other_text) = (
d["left"][i],
d["top"][i],
d["width"][i],
d["height"][i],
d["text"][i],
)
if abs(other_x - x) <= 4:
# it means that this is a header name
continue
if abs(other_y - y) <= 4:
header_value_chunks.append((other_text, other_x))
header_value_chunks.sort(key=lambda y: y[1])
header_value = ""
for chunk in header_value_chunks:
header_value = header_value + chunk[0]
if len(header_value) < int(min_value_length):
continue
if not (value_must_include.lower() in header_value.lower()):
continue
found_needles[needle] = header_value
# modify y so it's aligned not with the top of the text, but with the midline
y = y + h / 2
radius = 30
# offset both y and x
y = y + y_offset
x = x + x_offset
print_y = y + y_offset
print_x = x + x_offset
fill = "red"
line_length = 200
draw.regular_polygon(
((x - radius - 5, y), radius), n_sides=3, rotation=270, fill=fill
((print_x - radius - 5, print_y), radius), n_sides=3, rotation=270, fill=fill
)
draw.line((x - radius - 5, y, x - line_length, y), fill=fill, width=10)
draw.line((print_x - radius - 5, print_y, print_x - line_length, print_y), fill=fill, width=10)
text_w, text_h = draw.textsize(comment, font)
text_padding = 10
draw.rectangle(
[
(x - line_length - text_w - text_padding, y - text_h / 2),
(x - line_length + text_padding, y + text_h / 2),
(print_x - line_length - text_w - text_padding, print_y - text_h / 2),
(print_x - line_length + text_padding, print_y + text_h / 2),
],
fill="white",
)
draw.text(
(x - line_length - 10, y),
(print_x - line_length - 10, print_y),
comment,
fill=fill,
anchor="rm",

View File

@ -25,6 +25,7 @@ then
refresher_pid=$!;
fi
ORIGIN_DOMAIN=$(sed -e 's/[^/]*\/\/\([^@]*@\)\?\([^:/]*\).*/\2/' <<< "$URL")
load_website "$URL"
grab load_website
@ -55,9 +56,9 @@ while IFS= read -r DOMAIN; do
scrot "/opt/static/$filename"
grab "searching $DOMAIN"
BASE_URL="$BASE_URL" python annotate_header.py "$filename" "$DOMAIN" \
"set-cookie" "identyfikator internetowy z cookie" \
"Cookie" "identyfikator internetowy z cookie" \
"Referer" "Część mojej historii przeglądania" &
"set-cookie" "identyfikator internetowy z cookie" 11 ""\
"Cookie" "identyfikator internetowy z cookie" 11 ""\
"Referer" "Część mojej historii przeglądania" 0 "$ORIGIN_DOMAIN" &
pids+=($!)
network_inspector_next_entry
((index++))