diff --git a/@types/src/request.d.ts b/@types/src/request.d.ts index 024a620..0e63332 100644 --- a/@types/src/request.d.ts +++ b/@types/src/request.d.ts @@ -1,5 +1,7 @@ /// +/// import { ChildProcessWithoutNullStreams } from "child_process"; +import { Readable } from "stream"; export declare type Image = { url: string; domain: string; @@ -40,7 +42,9 @@ export default class ScreenshotRequest { current_action: string; preview: string; }>; + getPreviewURL(): Promise; getGoodImages(): Image[]; setFinished(): void; exec(): Promise; + getZIP(): Readable; } diff --git a/Docker/array-to-lines.js b/Docker/array-to-lines.js index b5d4e51..d9cd1be 100644 --- a/Docker/array-to-lines.js +++ b/Docker/array-to-lines.js @@ -1,5 +1,5 @@ const input = process.argv[2]; const array = JSON.parse(input); for (let i in array) { - console.log(array[i]); + console.log(array[i]); } diff --git a/Docker/bloater.sh b/Docker/bloater.sh new file mode 100755 index 0000000..9fe7016 --- /dev/null +++ b/Docker/bloater.sh @@ -0,0 +1,35 @@ +#!/bin/bash +BLOATING_DOMAINS=$(cat bloating-domains.txt) + +bloat_firefox(){ + if [ "$#" = 0 ]; then + echo "Bloating Firefox by bloating defined domain list..." + DOMAINS_LIST=$(printf '%s\n' "${BLOATING_DOMAINS[@]}") + else + echo "Bloating Firefox by requested domain list..." + DOMAINS_LIST=`node filter-requested-domains.js "$1"` + echo "selected domains" + echo $DOMAINS_LIST + fi + + if [ -n "$DOMAINS_LIST" ]; then + while IFS= read -r DOMAIN; do + # these domains return a 404 anyways, no need to waste time on them: + if is_http_error "$DOMAIN"; then echo "skipping $DOMAIN"; continue; fi + load_website "$DOMAIN?hl=pl" "$DOMAIN" + sleep 1 # sometimes the consent popup needs a little time + open_console + grab "$DOMAIN before" + (tr '\n' ' ' < click-accept-all.js) | xclip -sel clip + keycombo Control_L v + sleep 0.3 + xdotool key Return + sleep 1.5 + grab "$DOMAIN after" + done <<< "$DOMAINS_LIST" + else + echo "No need to bloat" + fi +} + + diff --git a/Docker/bloating-domains.txt b/Docker/bloating-domains.txt new file mode 100644 index 0000000..17cf387 --- /dev/null +++ b/Docker/bloating-domains.txt @@ -0,0 +1,6 @@ +facebook.com +google.com +hotjar.com +maps.google.com +linkedin.com +cookielaw.org diff --git a/Docker/filter-requested-domains.js b/Docker/filter-requested-domains.js new file mode 100644 index 0000000..75e13a2 --- /dev/null +++ b/Docker/filter-requested-domains.js @@ -0,0 +1,15 @@ +const fs = require("fs"); +const pth = require("path"); +const BLOATING_DOMAINS = ( + fs.readFileSync(pth.join(__dirname, "bloating-domains.txt")) + "" +).split("\n"); +const input = process.argv[2]; +const REQUESTED_DOMAINS = input.split('\n'); + +const array_diff = REQUESTED_DOMAINS.filter( + (v) => !BLOATING_DOMAINS.includes(v) + ); + +for (let i in array_diff) { + console.log(array_diff[i]); +} diff --git a/Docker/prepare-firefox.sh b/Docker/prepare-firefox.sh index 7763a30..a1bdd0e 100755 --- a/Docker/prepare-firefox.sh +++ b/Docker/prepare-firefox.sh @@ -3,6 +3,7 @@ source ./ephemeral-x.sh source ./annotate_header.sh source ./utils.sh +source ./bloater.sh echo "{\"current_action\": \"Uruchamianie serwera X\"}" @@ -11,6 +12,9 @@ start_firefox grab start_firefox prepare_firefox grab prepare_firefox +bloat_firefox +grab bloat_firefox + echo "{\"current_action\": \"Oczekiwanie na URL do analizy...\", \"code\": \"ready\"}" ./eternal-sleep.sh & wait diff --git a/Docker/run-analysis.sh b/Docker/run-analysis.sh index 7908f5b..68a060a 100755 --- a/Docker/run-analysis.sh +++ b/Docker/run-analysis.sh @@ -10,10 +10,13 @@ unquote(){ echo $1 | sed 's/"//g' } +echo $INPUT + URL=$(unquote $(echo $INPUT | jq .url)) DOMAINS=`node array-to-lines.js "$(echo $INPUT | jq .third_party_domains)"` source ./utils.sh +source ./bloater.sh PREVIEW="TRUE" # set to "TRUE" in order to enable automatic screenshots kept in preview.png @@ -28,21 +31,8 @@ fi ORIGIN_DOMAIN=$(sed -e 's/[^/]*\/\/\([^@]*@\)\?\([^:/]*\).*/\2/' <<< "$URL") - -while IFS= read -r DOMAIN; do - # these domains return a 404 anyways, no need to waste time on them: - if is_http_error "$DOMAIN"; then echo "skipping $DOMAIN"; continue; fi - load_website "$DOMAIN?hl=pl" "$DOMAIN" - sleep 1 # sometimes the consent popup needs a little time - open_console - grab "$DOMAIN before" - (tr '\n' ' ' < click-accept-all.js) | xclip -sel clip - keycombo Control_L v - sleep 0.3 - xdotool key Return - sleep 1.5 - grab "$DOMAIN after" -done <<< "$DOMAINS" +bloat_firefox "$DOMAINS" +grab bloat_firefox click 1270 217 # the "trash" icon, so requests from plamienie don't appear in the screenshots diff --git a/package-lock.json b/package-lock.json index 2dcc549..171ac4f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -5,6 +5,7 @@ "requires": true, "packages": { "": { + "name": "screenshot-service", "version": "1.0.0", "license": "ISC", "dependencies": { diff --git a/src/index.ts b/src/index.ts index b5237b4..d43eed8 100644 --- a/src/index.ts +++ b/src/index.ts @@ -58,6 +58,7 @@ router.get("/", async (ctx) => { type="text" name="domains" id="domains" + style="width: calc(100vw - 30%)" value="doubleclick.net,facebook.com" />