diff --git a/Docker/annotate_header.py b/Docker/annotate_header.py index fc3c2b5..6bcca04 100644 --- a/Docker/annotate_header.py +++ b/Docker/annotate_header.py @@ -39,7 +39,7 @@ with Image.open(output_file) as im: draw = ImageDraw.Draw(im) n_boxes = len(d["level"]) font = ImageFont.truetype("/usr/share/fonts/noto/NotoSansDisplay-Medium.ttf", 48) - found_needles = [] + found_needles = {} for i in range(n_boxes): (x, y, w, h, text) = ( d["left"][i], @@ -48,32 +48,58 @@ with Image.open(output_file) as im: d["height"][i], d["text"][i], ) - for [needle, comment] in partition(needles, 2): + + if abs(x - 59) > 2: + # it means that this is not a header name + continue + for [needle, comment, min_value_length, value_must_include] in partition(needles, 4): if needle.lower() in text.lower(): - found_needles.append(needle) + header_value_chunks = [] + for i in range(n_boxes): + (other_x, other_y, other_w, other_h, other_text) = ( + d["left"][i], + d["top"][i], + d["width"][i], + d["height"][i], + d["text"][i], + ) + if abs(other_x - x) <= 4: + # it means that this is a header name + continue + if abs(other_y - y) <= 4: + header_value_chunks.append((other_text, other_x)) + header_value_chunks.sort(key=lambda y: y[1]) + header_value = "" + for chunk in header_value_chunks: + header_value = header_value + chunk[0] + if len(header_value) < int(min_value_length): + continue + if not (value_must_include.lower() in header_value.lower()): + continue + found_needles[needle] = header_value # modify y so it's aligned not with the top of the text, but with the midline y = y + h / 2 radius = 30 # offset both y and x - y = y + y_offset - x = x + x_offset + print_y = y + y_offset + print_x = x + x_offset fill = "red" line_length = 200 draw.regular_polygon( - ((x - radius - 5, y), radius), n_sides=3, rotation=270, fill=fill + ((print_x - radius - 5, print_y), radius), n_sides=3, rotation=270, fill=fill ) - draw.line((x - radius - 5, y, x - line_length, y), fill=fill, width=10) + draw.line((print_x - radius - 5, print_y, print_x - line_length, print_y), fill=fill, width=10) text_w, text_h = draw.textsize(comment, font) text_padding = 10 draw.rectangle( [ - (x - line_length - text_w - text_padding, y - text_h / 2), - (x - line_length + text_padding, y + text_h / 2), + (print_x - line_length - text_w - text_padding, print_y - text_h / 2), + (print_x - line_length + text_padding, print_y + text_h / 2), ], fill="white", ) draw.text( - (x - line_length - 10, y), + (print_x - line_length - 10, print_y), comment, fill=fill, anchor="rm", diff --git a/Docker/run-analysis.sh b/Docker/run-analysis.sh index f20bd18..3a75b75 100755 --- a/Docker/run-analysis.sh +++ b/Docker/run-analysis.sh @@ -25,6 +25,7 @@ then refresher_pid=$!; fi +ORIGIN_DOMAIN=$(sed -e 's/[^/]*\/\/\([^@]*@\)\?\([^:/]*\).*/\2/' <<< "$URL") load_website "$URL" grab load_website @@ -55,9 +56,9 @@ while IFS= read -r DOMAIN; do scrot "/opt/static/$filename" grab "searching $DOMAIN" BASE_URL="$BASE_URL" python annotate_header.py "$filename" "$DOMAIN" \ - "set-cookie" "identyfikator internetowy z cookie" \ - "Cookie" "identyfikator internetowy z cookie" \ - "Referer" "Część mojej historii przeglądania" & + "set-cookie" "identyfikator internetowy z cookie" 11 ""\ + "Cookie" "identyfikator internetowy z cookie" 11 ""\ + "Referer" "Część mojej historii przeglądania" 0 "$ORIGIN_DOMAIN" & pids+=($!) network_inspector_next_entry ((index++))