diff --git a/@types/src/request.d.ts b/@types/src/request.d.ts
index 024a620..0e63332 100644
--- a/@types/src/request.d.ts
+++ b/@types/src/request.d.ts
@@ -1,5 +1,7 @@
///
+///
import { ChildProcessWithoutNullStreams } from "child_process";
+import { Readable } from "stream";
export declare type Image = {
url: string;
domain: string;
@@ -40,7 +42,9 @@ export default class ScreenshotRequest {
current_action: string;
preview: string;
}>;
+ getPreviewURL(): Promise;
getGoodImages(): Image[];
setFinished(): void;
exec(): Promise;
+ getZIP(): Readable;
}
diff --git a/Docker/array-to-lines.js b/Docker/array-to-lines.js
index b5d4e51..d9cd1be 100644
--- a/Docker/array-to-lines.js
+++ b/Docker/array-to-lines.js
@@ -1,5 +1,5 @@
const input = process.argv[2];
const array = JSON.parse(input);
for (let i in array) {
- console.log(array[i]);
+ console.log(array[i]);
}
diff --git a/Docker/bloater.sh b/Docker/bloater.sh
new file mode 100755
index 0000000..9fe7016
--- /dev/null
+++ b/Docker/bloater.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+BLOATING_DOMAINS=$(cat bloating-domains.txt)
+
+bloat_firefox(){
+ if [ "$#" = 0 ]; then
+ echo "Bloating Firefox by bloating defined domain list..."
+ DOMAINS_LIST=$(printf '%s\n' "${BLOATING_DOMAINS[@]}")
+ else
+ echo "Bloating Firefox by requested domain list..."
+ DOMAINS_LIST=`node filter-requested-domains.js "$1"`
+ echo "selected domains"
+ echo $DOMAINS_LIST
+ fi
+
+ if [ -n "$DOMAINS_LIST" ]; then
+ while IFS= read -r DOMAIN; do
+ # these domains return a 404 anyways, no need to waste time on them:
+ if is_http_error "$DOMAIN"; then echo "skipping $DOMAIN"; continue; fi
+ load_website "$DOMAIN?hl=pl" "$DOMAIN"
+ sleep 1 # sometimes the consent popup needs a little time
+ open_console
+ grab "$DOMAIN before"
+ (tr '\n' ' ' < click-accept-all.js) | xclip -sel clip
+ keycombo Control_L v
+ sleep 0.3
+ xdotool key Return
+ sleep 1.5
+ grab "$DOMAIN after"
+ done <<< "$DOMAINS_LIST"
+ else
+ echo "No need to bloat"
+ fi
+}
+
+
diff --git a/Docker/bloating-domains.txt b/Docker/bloating-domains.txt
new file mode 100644
index 0000000..17cf387
--- /dev/null
+++ b/Docker/bloating-domains.txt
@@ -0,0 +1,6 @@
+facebook.com
+google.com
+hotjar.com
+maps.google.com
+linkedin.com
+cookielaw.org
diff --git a/Docker/filter-requested-domains.js b/Docker/filter-requested-domains.js
new file mode 100644
index 0000000..75e13a2
--- /dev/null
+++ b/Docker/filter-requested-domains.js
@@ -0,0 +1,15 @@
+const fs = require("fs");
+const pth = require("path");
+const BLOATING_DOMAINS = (
+ fs.readFileSync(pth.join(__dirname, "bloating-domains.txt")) + ""
+).split("\n");
+const input = process.argv[2];
+const REQUESTED_DOMAINS = input.split('\n');
+
+const array_diff = REQUESTED_DOMAINS.filter(
+ (v) => !BLOATING_DOMAINS.includes(v)
+ );
+
+for (let i in array_diff) {
+ console.log(array_diff[i]);
+}
diff --git a/Docker/prepare-firefox.sh b/Docker/prepare-firefox.sh
index 7763a30..a1bdd0e 100755
--- a/Docker/prepare-firefox.sh
+++ b/Docker/prepare-firefox.sh
@@ -3,6 +3,7 @@
source ./ephemeral-x.sh
source ./annotate_header.sh
source ./utils.sh
+source ./bloater.sh
echo "{\"current_action\": \"Uruchamianie serwera X\"}"
@@ -11,6 +12,9 @@ start_firefox
grab start_firefox
prepare_firefox
grab prepare_firefox
+bloat_firefox
+grab bloat_firefox
+
echo "{\"current_action\": \"Oczekiwanie na URL do analizy...\", \"code\": \"ready\"}"
./eternal-sleep.sh &
wait
diff --git a/Docker/run-analysis.sh b/Docker/run-analysis.sh
index 7908f5b..68a060a 100755
--- a/Docker/run-analysis.sh
+++ b/Docker/run-analysis.sh
@@ -10,10 +10,13 @@ unquote(){
echo $1 | sed 's/"//g'
}
+echo $INPUT
+
URL=$(unquote $(echo $INPUT | jq .url))
DOMAINS=`node array-to-lines.js "$(echo $INPUT | jq .third_party_domains)"`
source ./utils.sh
+source ./bloater.sh
PREVIEW="TRUE" # set to "TRUE" in order to enable automatic screenshots kept in preview.png
@@ -28,21 +31,8 @@ fi
ORIGIN_DOMAIN=$(sed -e 's/[^/]*\/\/\([^@]*@\)\?\([^:/]*\).*/\2/' <<< "$URL")
-
-while IFS= read -r DOMAIN; do
- # these domains return a 404 anyways, no need to waste time on them:
- if is_http_error "$DOMAIN"; then echo "skipping $DOMAIN"; continue; fi
- load_website "$DOMAIN?hl=pl" "$DOMAIN"
- sleep 1 # sometimes the consent popup needs a little time
- open_console
- grab "$DOMAIN before"
- (tr '\n' ' ' < click-accept-all.js) | xclip -sel clip
- keycombo Control_L v
- sleep 0.3
- xdotool key Return
- sleep 1.5
- grab "$DOMAIN after"
-done <<< "$DOMAINS"
+bloat_firefox "$DOMAINS"
+grab bloat_firefox
click 1270 217 # the "trash" icon, so requests from plamienie don't appear in the screenshots
diff --git a/package-lock.json b/package-lock.json
index 2dcc549..171ac4f 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -5,6 +5,7 @@
"requires": true,
"packages": {
"": {
+ "name": "screenshot-service",
"version": "1.0.0",
"license": "ISC",
"dependencies": {
diff --git a/src/index.ts b/src/index.ts
index b5237b4..d43eed8 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -58,6 +58,7 @@ router.get("/", async (ctx) => {
type="text"
name="domains"
id="domains"
+ style="width: calc(100vw - 30%)"
value="doubleclick.net,facebook.com"
/>