Add header values to output
This commit is contained in:
parent
36fd166c99
commit
eccd7dce2b
|
@ -39,7 +39,7 @@ with Image.open(output_file) as im:
|
||||||
draw = ImageDraw.Draw(im)
|
draw = ImageDraw.Draw(im)
|
||||||
n_boxes = len(d["level"])
|
n_boxes = len(d["level"])
|
||||||
font = ImageFont.truetype("/usr/share/fonts/noto/NotoSansDisplay-Medium.ttf", 48)
|
font = ImageFont.truetype("/usr/share/fonts/noto/NotoSansDisplay-Medium.ttf", 48)
|
||||||
found_needles = []
|
found_needles = {}
|
||||||
for i in range(n_boxes):
|
for i in range(n_boxes):
|
||||||
(x, y, w, h, text) = (
|
(x, y, w, h, text) = (
|
||||||
d["left"][i],
|
d["left"][i],
|
||||||
|
@ -48,32 +48,58 @@ with Image.open(output_file) as im:
|
||||||
d["height"][i],
|
d["height"][i],
|
||||||
d["text"][i],
|
d["text"][i],
|
||||||
)
|
)
|
||||||
for [needle, comment] in partition(needles, 2):
|
|
||||||
|
if abs(x - 59) > 2:
|
||||||
|
# it means that this is not a header name
|
||||||
|
continue
|
||||||
|
for [needle, comment, min_value_length, value_must_include] in partition(needles, 4):
|
||||||
if needle.lower() in text.lower():
|
if needle.lower() in text.lower():
|
||||||
found_needles.append(needle)
|
header_value_chunks = []
|
||||||
|
for i in range(n_boxes):
|
||||||
|
(other_x, other_y, other_w, other_h, other_text) = (
|
||||||
|
d["left"][i],
|
||||||
|
d["top"][i],
|
||||||
|
d["width"][i],
|
||||||
|
d["height"][i],
|
||||||
|
d["text"][i],
|
||||||
|
)
|
||||||
|
if abs(other_x - x) <= 4:
|
||||||
|
# it means that this is a header name
|
||||||
|
continue
|
||||||
|
if abs(other_y - y) <= 4:
|
||||||
|
header_value_chunks.append((other_text, other_x))
|
||||||
|
header_value_chunks.sort(key=lambda y: y[1])
|
||||||
|
header_value = ""
|
||||||
|
for chunk in header_value_chunks:
|
||||||
|
header_value = header_value + chunk[0]
|
||||||
|
if len(header_value) < int(min_value_length):
|
||||||
|
continue
|
||||||
|
if not (value_must_include.lower() in header_value.lower()):
|
||||||
|
continue
|
||||||
|
found_needles[needle] = header_value
|
||||||
# modify y so it's aligned not with the top of the text, but with the midline
|
# modify y so it's aligned not with the top of the text, but with the midline
|
||||||
y = y + h / 2
|
y = y + h / 2
|
||||||
radius = 30
|
radius = 30
|
||||||
# offset both y and x
|
# offset both y and x
|
||||||
y = y + y_offset
|
print_y = y + y_offset
|
||||||
x = x + x_offset
|
print_x = x + x_offset
|
||||||
fill = "red"
|
fill = "red"
|
||||||
line_length = 200
|
line_length = 200
|
||||||
draw.regular_polygon(
|
draw.regular_polygon(
|
||||||
((x - radius - 5, y), radius), n_sides=3, rotation=270, fill=fill
|
((print_x - radius - 5, print_y), radius), n_sides=3, rotation=270, fill=fill
|
||||||
)
|
)
|
||||||
draw.line((x - radius - 5, y, x - line_length, y), fill=fill, width=10)
|
draw.line((print_x - radius - 5, print_y, print_x - line_length, print_y), fill=fill, width=10)
|
||||||
text_w, text_h = draw.textsize(comment, font)
|
text_w, text_h = draw.textsize(comment, font)
|
||||||
text_padding = 10
|
text_padding = 10
|
||||||
draw.rectangle(
|
draw.rectangle(
|
||||||
[
|
[
|
||||||
(x - line_length - text_w - text_padding, y - text_h / 2),
|
(print_x - line_length - text_w - text_padding, print_y - text_h / 2),
|
||||||
(x - line_length + text_padding, y + text_h / 2),
|
(print_x - line_length + text_padding, print_y + text_h / 2),
|
||||||
],
|
],
|
||||||
fill="white",
|
fill="white",
|
||||||
)
|
)
|
||||||
draw.text(
|
draw.text(
|
||||||
(x - line_length - 10, y),
|
(print_x - line_length - 10, print_y),
|
||||||
comment,
|
comment,
|
||||||
fill=fill,
|
fill=fill,
|
||||||
anchor="rm",
|
anchor="rm",
|
||||||
|
|
|
@ -25,6 +25,7 @@ then
|
||||||
refresher_pid=$!;
|
refresher_pid=$!;
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
ORIGIN_DOMAIN=$(sed -e 's/[^/]*\/\/\([^@]*@\)\?\([^:/]*\).*/\2/' <<< "$URL")
|
||||||
|
|
||||||
load_website "$URL"
|
load_website "$URL"
|
||||||
grab load_website
|
grab load_website
|
||||||
|
@ -55,9 +56,9 @@ while IFS= read -r DOMAIN; do
|
||||||
scrot "/opt/static/$filename"
|
scrot "/opt/static/$filename"
|
||||||
grab "searching $DOMAIN"
|
grab "searching $DOMAIN"
|
||||||
BASE_URL="$BASE_URL" python annotate_header.py "$filename" "$DOMAIN" \
|
BASE_URL="$BASE_URL" python annotate_header.py "$filename" "$DOMAIN" \
|
||||||
"set-cookie" "identyfikator internetowy z cookie" \
|
"set-cookie" "identyfikator internetowy z cookie" 11 ""\
|
||||||
"Cookie" "identyfikator internetowy z cookie" \
|
"Cookie" "identyfikator internetowy z cookie" 11 ""\
|
||||||
"Referer" "Część mojej historii przeglądania" &
|
"Referer" "Część mojej historii przeglądania" 0 "$ORIGIN_DOMAIN" &
|
||||||
pids+=($!)
|
pids+=($!)
|
||||||
network_inspector_next_entry
|
network_inspector_next_entry
|
||||||
((index++))
|
((index++))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user