Merge branch 'master' into #29

This commit is contained in:
Kuba Orlik 2022-09-12 19:26:24 +02:00
commit a509085786
4 changed files with 55 additions and 15 deletions

View File

@ -113,9 +113,9 @@ with Image.open(output_file) as im:
im = im.crop((0, 24, 2880, 1588))
im = im.resize((im.width // 2, im.height // 2))
thumbnail = im.resize((im.width // 5, im.height // 5))
output_filename = domain.replace(".", "_") + "_" + output_suffix
output_filename = domain.replace(".", "_") + "_" + output_suffix.replace("png", "jpg")
thumbnail_filename = domain.replace(".", "_") + "_thumb_" + output_suffix.replace("png", "jpg")
im.save(output_dir + "/" + output_filename, "PNG")
im.save(output_dir + "/" + output_filename, "JPEG")
thumbnail.save(output_dir + "/" + thumbnail_filename, "JPEG")
print(json.dumps({"new_file":
{

View File

@ -1,12 +1,35 @@
regexes = ["allow", "accept", "agree", "akceptuj", /przejdź(?! do główn).*/];
avoid = ["dostosuj", "don't"];
console.log("start");
buttons = Array.from(document.querySelectorAll("*")).filter(
regexes = [
"allow",
"accept",
"agree",
"akceptuj",
"zgadzam",
"zezwól",
"zgoda",
/przejdź(?! do główn).*/,
];
avoid = ["dostosuj", "don't", "nie zga", "nie zezw", "tylko"];
elements = Array.from(document.querySelectorAll("*"));
/* Tik Tok hides the "accept" button within shadowRoot, so it we need to do some digging */
elements.forEach((element) => {
if (element.shadowRoot !== null) {
elements.push(...Array.from(element.shadowRoot.querySelectorAll("*")));
}
});
buttons = elements.filter(
(e) =>
e.textContent.length < 50 &&
e.textContent.length <
70 /* FB has a really long one: Zezwól na korzystanie z niezbędnych i opcjonalnych plików cookie */ &&
regexes.some((regex) => e.textContent.toLowerCase().match(regex) !== null)
);
console.log("buttons after first filter", buttons);
operations = [
(buttons) =>
buttons.filter((button) => {
@ -17,16 +40,17 @@ operations = [
!(rect.width == 0 && rect.height == 0)
);
}),
(buttons) =>
buttons.filter((e) =>
avoid.every((word) => !e.textContent.toLowerCase().includes(word))
),
(buttons) =>
buttons.filter((e) => !e.textContent.toLowerCase().includes("only")),
(buttons) => buttons.filter((e) => e.tagName.toLowerCase() === "button"),
(buttons) =>
buttons.filter((e) => !e.textContent.toLowerCase().includes("do not")),
(buttons) => buttons.filter((e) => e.tagName.toLowerCase() === "a"),
(buttons) =>
buttons.filter((e) =>
avoid.every((word) => !e.textContent.toLowerCase().includes(word))
),
(buttons) =>
buttons.filter(
(e) => e.tagName.toLowerCase() === "input" && e.type === "submit"
@ -38,14 +62,15 @@ for (const operation of operations) {
break;
}
const result = operation(buttons);
console.log("RESULT", operation, result);
if (result.length) {
buttons = result;
}
}
buttons;
buttons.forEach((button) => button.click());
buttons.forEach((button) => {
button.querySelectorAll("input").forEach((child) => child.click());
});
buttons;

View File

@ -1,6 +1,7 @@
#!/bin/bash
export DISPLAY=:0
export SCALE_PREVIEW=true # make the previews really small so you only have a small idea of what the server sees
INPUT="$1"
ID=$2
@ -36,8 +37,18 @@ grab bloat_firefox
click 1270 217 # the "trash" icon, so requests from plamienie don't appear in the screenshots
load_website "$URL" "$URL"
sleep 7 # sometimes the consent popup needs a little time
for i in 1 2 3 4 5 6 7
do
xdotool mousemove 28 812 # left side, middle
xdotool click 5 click 5 click 5 click 5 click 5 click 5 # scroll down
sleep 1
done;
keycombo Control_L Home
echo "{\"current_action\": \"Strona $ORIGIN_DOMAIN wczytana. Przygotowywanie do analizy...\"}"
grab load_website
open_network_inspector
grab open_network_inspector

View File

@ -75,8 +75,12 @@ grab_screen_to_public(){
rm -f "$tempfile"
scrot "$tempfile"
vips crop "$tempfile" "$croppedfile" 0 24 2856 1564
vips resize "$croppedfile" "$scaledfile" 0.1
mv -f "$scaledfile" "$filepath"
if [ "$SCALE_PREVIEW" = "true" ]; then
vips resize "$croppedfile" "$scaledfile" 0.1
mv -f "$scaledfile" "$filepath"
else
mv -f "$croppedfile" "$filepath"
fi
}
keycombo(){