From 3f8e106bf5f96a453b7be8f4d83a8282e00f08f7 Mon Sep 17 00:00:00 2001 From: Kuba Orlik Date: Tue, 16 Aug 2022 14:32:33 +0200 Subject: [PATCH 1/6] Update click-all script to handle "accept" buttons hidden in shadow-dom --- Docker/click-accept-all.js | 44 +++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/Docker/click-accept-all.js b/Docker/click-accept-all.js index cc701b6..67753e4 100644 --- a/Docker/click-accept-all.js +++ b/Docker/click-accept-all.js @@ -1,12 +1,34 @@ -regexes = ["allow", "accept", "agree", "akceptuj", /przejdź(?! do główn).*/]; -avoid = ["dostosuj", "don't"]; +console.log("start"); -buttons = Array.from(document.querySelectorAll("*")).filter( +regexes = [ + "allow", + "accept", + "agree", + "akceptuj", + "zgadzam", + "zezwól", + /przejdź(?! do główn).*/, +]; +avoid = ["dostosuj", "don't", "nie zga", "nie zezw", "tylko"]; + +elements = Array.from(document.querySelectorAll("*")); + +// Tik Tok hides the "accept" button within shadowRoot, so it we need to do some digging +elements.forEach((element) => { + if (element.shadowRoot !== null) { + elements.push(...Array.from(element.shadowRoot.querySelectorAll("*"))); + } +}); + +buttons = elements.filter( (e) => - e.textContent.length < 50 && + e.textContent.length < + 70 /* FB has a really long one: Zezwól na korzystanie z niezbędnych i opcjonalnych plików cookie */ && regexes.some((regex) => e.textContent.toLowerCase().match(regex) !== null) ); +console.log("buttons after first filter", buttons); + operations = [ (buttons) => buttons.filter((button) => { @@ -17,16 +39,17 @@ operations = [ !(rect.width == 0 && rect.height == 0) ); }), + (buttons) => + buttons.filter((e) => + avoid.every((word) => !e.textContent.toLowerCase().includes(word)) + ), (buttons) => buttons.filter((e) => !e.textContent.toLowerCase().includes("only")), (buttons) => buttons.filter((e) => e.tagName.toLowerCase() === "button"), (buttons) => buttons.filter((e) => !e.textContent.toLowerCase().includes("do not")), (buttons) => buttons.filter((e) => e.tagName.toLowerCase() === "a"), - (buttons) => - buttons.filter((e) => - avoid.every((word) => !e.textContent.toLowerCase().includes(word)) - ), + (buttons) => buttons.filter( (e) => e.tagName.toLowerCase() === "input" && e.type === "submit" @@ -38,14 +61,15 @@ for (const operation of operations) { break; } const result = operation(buttons); + console.log("RESULT", operation, result); if (result.length) { buttons = result; } } +buttons; + buttons.forEach((button) => button.click()); buttons.forEach((button) => { button.querySelectorAll("input").forEach((child) => child.click()); }); - -buttons; From 558d071fdd1ff4e3991f1d71861c0ae91ee274d5 Mon Sep 17 00:00:00 2001 From: Kuba Orlik Date: Thu, 25 Aug 2022 17:03:19 +0200 Subject: [PATCH 2/6] Fix cookie click script not working --- Docker/click-accept-all.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Docker/click-accept-all.js b/Docker/click-accept-all.js index 67753e4..ee30bf8 100644 --- a/Docker/click-accept-all.js +++ b/Docker/click-accept-all.js @@ -13,7 +13,7 @@ avoid = ["dostosuj", "don't", "nie zga", "nie zezw", "tylko"]; elements = Array.from(document.querySelectorAll("*")); -// Tik Tok hides the "accept" button within shadowRoot, so it we need to do some digging +/* Tik Tok hides the "accept" button within shadowRoot, so it we need to do some digging */ elements.forEach((element) => { if (element.shadowRoot !== null) { elements.push(...Array.from(element.shadowRoot.querySelectorAll("*"))); From 5f6991b08f0247dd3efbb1705e6de5e5af78a8bd Mon Sep 17 00:00:00 2001 From: Kuba Orlik Date: Thu, 25 Aug 2022 17:03:39 +0200 Subject: [PATCH 3/6] Easier way to configure wether or not the previews are scaled down --- Docker/run-analysis.sh | 1 + Docker/utils.sh | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/Docker/run-analysis.sh b/Docker/run-analysis.sh index 229f580..6747051 100755 --- a/Docker/run-analysis.sh +++ b/Docker/run-analysis.sh @@ -1,6 +1,7 @@ #!/bin/bash export DISPLAY=:0 +export SCALE_PREVIEW=true # make the previews really small so you only have a small idea of what the server sees INPUT="$1" ID=$2 diff --git a/Docker/utils.sh b/Docker/utils.sh index 95b4367..5afc66f 100644 --- a/Docker/utils.sh +++ b/Docker/utils.sh @@ -75,8 +75,12 @@ grab_screen_to_public(){ rm -f "$tempfile" scrot "$tempfile" vips crop "$tempfile" "$croppedfile" 0 24 2856 1564 - vips resize "$croppedfile" "$scaledfile" 0.1 - mv -f "$scaledfile" "$filepath" + if [ "$SCALE_PREVIEW" = "true" ]; then + vips resize "$croppedfile" "$scaledfile" 0.1 + mv -f "$scaledfile" "$filepath" + else + mv -f "$croppedfile" "$filepath" + fi } keycombo(){ From edcc966ab90e603fe3942628539c5535cb8db17f Mon Sep 17 00:00:00 2001 From: Kuba Orlik Date: Thu, 25 Aug 2022 17:28:29 +0200 Subject: [PATCH 4/6] Convert screenshots to jpg so more of them can fit in an email body --- Docker/annotate_header.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Docker/annotate_header.py b/Docker/annotate_header.py index 37c7b6e..f9ad263 100644 --- a/Docker/annotate_header.py +++ b/Docker/annotate_header.py @@ -113,9 +113,9 @@ with Image.open(output_file) as im: im = im.crop((0, 24, 2880, 1588)) im = im.resize((im.width // 2, im.height // 2)) thumbnail = im.resize((im.width // 5, im.height // 5)) - output_filename = domain.replace(".", "_") + "_" + output_suffix + output_filename = domain.replace(".", "_") + "_" + output_suffix.replace("png", "jpg") thumbnail_filename = domain.replace(".", "_") + "_thumb_" + output_suffix.replace("png", "jpg") - im.save(output_dir + "/" + output_filename, "PNG") + im.save(output_dir + "/" + output_filename, "JPEG") thumbnail.save(output_dir + "/" + thumbnail_filename, "JPEG") print(json.dumps({"new_file": { From f380aa78bd0a8af44eeaa0a78efc59b807abe573 Mon Sep 17 00:00:00 2001 From: Kuba Orlik Date: Wed, 31 Aug 2022 21:51:57 +0200 Subject: [PATCH 5/6] Make the click script handle brealtime.com --- Docker/click-accept-all.js | 1 + 1 file changed, 1 insertion(+) diff --git a/Docker/click-accept-all.js b/Docker/click-accept-all.js index ee30bf8..7b30bb3 100644 --- a/Docker/click-accept-all.js +++ b/Docker/click-accept-all.js @@ -7,6 +7,7 @@ regexes = [ "akceptuj", "zgadzam", "zezwól", + "zgoda", /przejdź(?! do główn).*/, ]; avoid = ["dostosuj", "don't", "nie zga", "nie zezw", "tylko"]; From 5034516abf5d25a4e7e083cf18fafe2c9dc53172 Mon Sep 17 00:00:00 2001 From: Kuba Orlik Date: Fri, 2 Sep 2022 21:42:38 +0200 Subject: [PATCH 6/6] Instead of just waiting 7 seconds for the website to load, scroll down and then up in hopes of triggering more scripts --- Docker/run-analysis.sh | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/Docker/run-analysis.sh b/Docker/run-analysis.sh index 6747051..7908f5b 100755 --- a/Docker/run-analysis.sh +++ b/Docker/run-analysis.sh @@ -47,8 +47,18 @@ done <<< "$DOMAINS" click 1270 217 # the "trash" icon, so requests from plamienie don't appear in the screenshots load_website "$URL" "$URL" -sleep 7 # sometimes the consent popup needs a little time + +for i in 1 2 3 4 5 6 7 +do + xdotool mousemove 28 812 # left side, middle + xdotool click 5 click 5 click 5 click 5 click 5 click 5 # scroll down + sleep 1 +done; + +keycombo Control_L Home + echo "{\"current_action\": \"Strona $ORIGIN_DOMAIN wczytana. Przygotowywanie do analizy...\"}" + grab load_website open_network_inspector grab open_network_inspector