Merge branch 'master' of git.internet-czas-dzialac.pl:icd/screenshot-service into #29

This commit is contained in:
Arkadiusz Wieczorek 2022-11-20 11:31:06 +01:00
commit c8dc0ff70d
6 changed files with 61 additions and 18 deletions

View File

@ -1,4 +1,4 @@
FROM docker.io/python:3.11.0a5-alpine3.15 FROM docker.io/python:3.11.0rc1-alpine3.16
# inspired by https://github.com/darktohka/pytesseract-docker/blob/master/Dockerfile # inspired by https://github.com/darktohka/pytesseract-docker/blob/master/Dockerfile
@ -38,7 +38,6 @@ RUN apk del .dev-deps
RUN rm -f /usr/local/lib/*.a RUN rm -f /usr/local/lib/*.a
RUN rm -rf /tmp/* /var/cache/apk/* RUN rm -rf /tmp/* /var/cache/apk/*
RUN apk add jq sed nodejs RUN apk add jq sed nodejs
COPY ./mozilla /root/.mozilla
RUN echo https://dl-cdn.alpinelinux.org/alpine/edge/testing >> /etc/apk/repositories RUN echo https://dl-cdn.alpinelinux.org/alpine/edge/testing >> /etc/apk/repositories
RUN apk update RUN apk update
RUN apk add wmctrl git make gcc musl-dev libx11-dev zlib zlib-dev jpeg-dev RUN apk add wmctrl git make gcc musl-dev libx11-dev zlib zlib-dev jpeg-dev
@ -51,6 +50,8 @@ RUN apk add xclip
RUN apk add curl RUN apk add curl
RUN apk add vips RUN apk add vips
COPY ./mozilla /root/.mozilla
COPY . /opt COPY . /opt
CMD /opt/prepare-firefox.sh CMD /opt/prepare-firefox.sh

View File

@ -113,9 +113,9 @@ with Image.open(output_file) as im:
im = im.crop((0, 24, 2880, 1588)) im = im.crop((0, 24, 2880, 1588))
im = im.resize((im.width // 2, im.height // 2)) im = im.resize((im.width // 2, im.height // 2))
thumbnail = im.resize((im.width // 5, im.height // 5)) thumbnail = im.resize((im.width // 5, im.height // 5))
output_filename = domain.replace(".", "_") + "_" + output_suffix output_filename = domain.replace(".", "_") + "_" + output_suffix.replace("png", "jpg")
thumbnail_filename = domain.replace(".", "_") + "_thumb_" + output_suffix.replace("png", "jpg") thumbnail_filename = domain.replace(".", "_") + "_thumb_" + output_suffix.replace("png", "jpg")
im.save(output_dir + "/" + output_filename, "PNG") im.save(output_dir + "/" + output_filename, "JPEG")
thumbnail.save(output_dir + "/" + thumbnail_filename, "JPEG") thumbnail.save(output_dir + "/" + thumbnail_filename, "JPEG")
print(json.dumps({"new_file": print(json.dumps({"new_file":
{ {

View File

@ -1,12 +1,35 @@
regexes = ["allow", "accept", "agree", "akceptuj", /przejdź(?! do główn).*/]; console.log("start");
avoid = ["dostosuj", "don't"];
buttons = Array.from(document.querySelectorAll("*")).filter( regexes = [
"allow",
"accept",
"agree",
"akceptuj",
"zgadzam",
"zezwól",
"zgoda",
/przejdź(?! do główn).*/,
];
avoid = ["dostosuj", "don't", "nie zga", "nie zezw", "tylko"];
elements = Array.from(document.querySelectorAll("*"));
/* Tik Tok hides the "accept" button within shadowRoot, so it we need to do some digging */
elements.forEach((element) => {
if (element.shadowRoot !== null) {
elements.push(...Array.from(element.shadowRoot.querySelectorAll("*")));
}
});
buttons = elements.filter(
(e) => (e) =>
e.textContent.length < 50 && e.textContent.length <
70 /* FB has a really long one: Zezwól na korzystanie z niezbędnych i opcjonalnych plików cookie */ &&
regexes.some((regex) => e.textContent.toLowerCase().match(regex) !== null) regexes.some((regex) => e.textContent.toLowerCase().match(regex) !== null)
); );
console.log("buttons after first filter", buttons);
operations = [ operations = [
(buttons) => (buttons) =>
buttons.filter((button) => { buttons.filter((button) => {
@ -17,16 +40,17 @@ operations = [
!(rect.width == 0 && rect.height == 0) !(rect.width == 0 && rect.height == 0)
); );
}), }),
(buttons) =>
buttons.filter((e) =>
avoid.every((word) => !e.textContent.toLowerCase().includes(word))
),
(buttons) => (buttons) =>
buttons.filter((e) => !e.textContent.toLowerCase().includes("only")), buttons.filter((e) => !e.textContent.toLowerCase().includes("only")),
(buttons) => buttons.filter((e) => e.tagName.toLowerCase() === "button"), (buttons) => buttons.filter((e) => e.tagName.toLowerCase() === "button"),
(buttons) => (buttons) =>
buttons.filter((e) => !e.textContent.toLowerCase().includes("do not")), buttons.filter((e) => !e.textContent.toLowerCase().includes("do not")),
(buttons) => buttons.filter((e) => e.tagName.toLowerCase() === "a"), (buttons) => buttons.filter((e) => e.tagName.toLowerCase() === "a"),
(buttons) =>
buttons.filter((e) =>
avoid.every((word) => !e.textContent.toLowerCase().includes(word))
),
(buttons) => (buttons) =>
buttons.filter( buttons.filter(
(e) => e.tagName.toLowerCase() === "input" && e.type === "submit" (e) => e.tagName.toLowerCase() === "input" && e.type === "submit"
@ -38,14 +62,15 @@ for (const operation of operations) {
break; break;
} }
const result = operation(buttons); const result = operation(buttons);
console.log("RESULT", operation, result);
if (result.length) { if (result.length) {
buttons = result; buttons = result;
} }
} }
buttons;
buttons.forEach((button) => button.click()); buttons.forEach((button) => button.click());
buttons.forEach((button) => { buttons.forEach((button) => {
button.querySelectorAll("input").forEach((child) => child.click()); button.querySelectorAll("input").forEach((child) => child.click());
}); });
buttons;

View File

@ -357,3 +357,5 @@ user_pref("trailhead.firstrun.didSeeAboutWelcome", true);
user_pref("browser.disableResetPrompt", true); user_pref("browser.disableResetPrompt", true);
user_pref("devtools.selfxss.count", 100); user_pref("devtools.selfxss.count", 100);
user_pref("intl.accept_languages", "pl,en-us"); user_pref("intl.accept_languages", "pl,en-us");
// user_pref("widget.non-native-theme.scrollbar.style", 4);
user_pref("widget.gtk.overlay-scrollbars.enabled", false); // make the scrollbars visible even if not scrolling

View File

@ -1,6 +1,7 @@
#!/bin/bash #!/bin/bash
export DISPLAY=:0 export DISPLAY=:0
export SCALE_PREVIEW=true # make the previews really small so you only have a small idea of what the server sees
INPUT="$1" INPUT="$1"
ID=$2 ID=$2
@ -36,8 +37,18 @@ grab bloat_firefox
click 1270 217 # the "trash" icon, so requests from plamienie don't appear in the screenshots click 1270 217 # the "trash" icon, so requests from plamienie don't appear in the screenshots
load_website "$URL" "$URL" load_website "$URL" "$URL"
sleep 7 # sometimes the consent popup needs a little time
for i in 1 2 3 4 5 6 7
do
xdotool mousemove 28 812 # left side, middle
xdotool click 5 click 5 click 5 click 5 click 5 click 5 # scroll down
sleep 1
done;
keycombo Control_L Home
echo "{\"current_action\": \"Strona $ORIGIN_DOMAIN wczytana. Przygotowywanie do analizy...\"}" echo "{\"current_action\": \"Strona $ORIGIN_DOMAIN wczytana. Przygotowywanie do analizy...\"}"
grab load_website grab load_website
open_network_inspector open_network_inspector
grab open_network_inspector grab open_network_inspector

View File

@ -75,8 +75,12 @@ grab_screen_to_public(){
rm -f "$tempfile" rm -f "$tempfile"
scrot "$tempfile" scrot "$tempfile"
vips crop "$tempfile" "$croppedfile" 0 24 2856 1564 vips crop "$tempfile" "$croppedfile" 0 24 2856 1564
vips resize "$croppedfile" "$scaledfile" 0.1 if [ "$SCALE_PREVIEW" = "true" ]; then
mv -f "$scaledfile" "$filepath" vips resize "$croppedfile" "$scaledfile" 0.1
mv -f "$scaledfile" "$filepath"
else
mv -f "$croppedfile" "$filepath"
fi
} }
keycombo(){ keycombo(){
@ -228,7 +232,7 @@ network_inspector_next_entry(){
# } # }
network_inspector_headers_need_scrolling (){ network_inspector_headers_need_scrolling (){
color=$(get_pixel_color 2870 1573) color=$(get_pixel_color 2875 1583)
[ "$color" = "#ededf0" ] [ "$color" = "#ededf0" ]
} }