diff --git a/@types/src/request.d.ts b/@types/src/request.d.ts index 024a620..0e63332 100644 --- a/@types/src/request.d.ts +++ b/@types/src/request.d.ts @@ -1,5 +1,7 @@ /// +/// import { ChildProcessWithoutNullStreams } from "child_process"; +import { Readable } from "stream"; export declare type Image = { url: string; domain: string; @@ -40,7 +42,9 @@ export default class ScreenshotRequest { current_action: string; preview: string; }>; + getPreviewURL(): Promise; getGoodImages(): Image[]; setFinished(): void; exec(): Promise; + getZIP(): Readable; } diff --git a/Docker/array-to-lines.js b/Docker/array-to-lines.js index b5d4e51..d9cd1be 100644 --- a/Docker/array-to-lines.js +++ b/Docker/array-to-lines.js @@ -1,5 +1,5 @@ const input = process.argv[2]; const array = JSON.parse(input); for (let i in array) { - console.log(array[i]); + console.log(array[i]); } diff --git a/Docker/bloating-domains.txt b/Docker/bloating-domains.txt new file mode 100644 index 0000000..9803b74 --- /dev/null +++ b/Docker/bloating-domains.txt @@ -0,0 +1,15 @@ +facebook.com +google.com +hotjar.com +maps.google.com +linkedin.com +cookielaw.org +googletagmanager.com +googleapis.com +www.google.com +sirdata.com +xandr.com +site.adform.com +adtonos.com/pl/home-pl +adtraction.com/pl +www.cookiebot.com \ No newline at end of file diff --git a/Docker/bloatter.sh b/Docker/bloatter.sh new file mode 100755 index 0000000..be8b65b --- /dev/null +++ b/Docker/bloatter.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +BLOATING_DOMAINS=$(while IFS= read -r line; do echo "$line" +done !BLOATING_DOMAINS.includes(v) +); + +for (let i in array_diff) { + console.log(array_diff[i]); +} diff --git a/Docker/prepare-firefox.sh b/Docker/prepare-firefox.sh index 7763a30..193288d 100755 --- a/Docker/prepare-firefox.sh +++ b/Docker/prepare-firefox.sh @@ -3,6 +3,7 @@ source ./ephemeral-x.sh source ./annotate_header.sh source ./utils.sh +source ./bloatter.sh echo "{\"current_action\": \"Uruchamianie serwera X\"}" @@ -11,6 +12,9 @@ start_firefox grab start_firefox prepare_firefox grab prepare_firefox +bloat_firefox 0 +grab bloat_firefox + echo "{\"current_action\": \"Oczekiwanie na URL do analizy...\", \"code\": \"ready\"}" ./eternal-sleep.sh & wait diff --git a/Docker/run-analysis.sh b/Docker/run-analysis.sh index 229f580..5f61dac 100755 --- a/Docker/run-analysis.sh +++ b/Docker/run-analysis.sh @@ -9,10 +9,14 @@ unquote(){ echo $1 | sed 's/"//g' } +echo $INPUT + URL=$(unquote $(echo $INPUT | jq .url)) DOMAINS=`node array-to-lines.js "$(echo $INPUT | jq .third_party_domains)"` +FILTERED_DOMAINS=`node filter-requested-domains.js "$(echo $INPUT | jq .third_party_domains)"` source ./utils.sh +source ./bloatter.sh PREVIEW="TRUE" # set to "TRUE" in order to enable automatic screenshots kept in preview.png @@ -28,20 +32,28 @@ fi ORIGIN_DOMAIN=$(sed -e 's/[^/]*\/\/\([^@]*@\)\?\([^:/]*\).*/\2/' <<< "$URL") -while IFS= read -r DOMAIN; do - # these domains return a 404 anyways, no need to waste time on them: - if is_http_error "$DOMAIN"; then echo "skipping $DOMAIN"; continue; fi - load_website "$DOMAIN?hl=pl" "$DOMAIN" - sleep 1 # sometimes the consent popup needs a little time - open_console - grab "$DOMAIN before" - (tr '\n' ' ' < click-accept-all.js) | xclip -sel clip - keycombo Control_L v - sleep 0.3 - xdotool key Return - sleep 1.5 - grab "$DOMAIN after" -done <<< "$DOMAINS" +if [ -z "$FILTERED_DOMAINS" ] +then + echo "No need to blot" +else + bloat_firefox 1 + grab bloat_firefox +fi + +# while IFS= read -r DOMAIN; do +# # these domains return a 404 anyways, no need to waste time on them: +# if is_http_error "$DOMAIN"; then echo "skipping $DOMAIN"; continue; fi +# load_website "$DOMAIN?hl=pl" "$DOMAIN" +# sleep 1 # sometimes the consent popup needs a little time +# open_console +# grab "$DOMAIN before" +# (tr '\n' ' ' < click-accept-all.js) | xclip -sel clip +# keycombo Control_L v +# sleep 0.3 +# xdotool key Return +# sleep 1.5 +# grab "$DOMAIN after" +# done <<< "$DOMAINS" click 1270 217 # the "trash" icon, so requests from plamienie don't appear in the screenshots diff --git a/package-lock.json b/package-lock.json index 2dcc549..171ac4f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -5,6 +5,7 @@ "requires": true, "packages": { "": { + "name": "screenshot-service", "version": "1.0.0", "license": "ISC", "dependencies": { diff --git a/src/index.ts b/src/index.ts index b5237b4..d43eed8 100644 --- a/src/index.ts +++ b/src/index.ts @@ -58,6 +58,7 @@ router.get("/", async (ctx) => { type="text" name="domains" id="domains" + style="width: calc(100vw - 30%)" value="doubleclick.net,facebook.com" />