From e9213004d4e1fb4ac504a3f3d8782d3da3bc479e Mon Sep 17 00:00:00 2001 From: Kuba Orlik Date: Thu, 14 Jul 2022 22:19:34 +0200 Subject: [PATCH] Skip plamienie for dimans that return an error upon http request --- Docker/Dockerfile | 1 + Docker/run-analysis.sh | 4 +++- Docker/utils.sh | 11 +++++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/Docker/Dockerfile b/Docker/Dockerfile index bc3f723..d4db82b 100644 --- a/Docker/Dockerfile +++ b/Docker/Dockerfile @@ -48,6 +48,7 @@ RUN apk add freetype-dev RUN python3 -m pip install --upgrade Pillow RUN apk add zip RUN apk add xclip +RUN apk add curl COPY . /opt CMD /opt/prepare-firefox.sh diff --git a/Docker/run-analysis.sh b/Docker/run-analysis.sh index ec7937f..9fa7622 100755 --- a/Docker/run-analysis.sh +++ b/Docker/run-analysis.sh @@ -14,7 +14,7 @@ DOMAINS=`node array-to-lines.js "$(echo $INPUT | jq .third_party_domains)"` source ./utils.sh -PREVIEW="TRUE" # set to "TRUE" in order to enable automatic screenshots kept in preview.png +PREVIEW="FALSE" # set to "TRUE" in order to enable automatic screenshots kept in preview.png if [ "$PREVIEW" = "TRUE" ]; then @@ -29,6 +29,8 @@ ORIGIN_DOMAIN=$(sed -e 's/[^/]*\/\/\([^@]*@\)\?\([^:/]*\).*/\2/' <<< "$URL") while IFS= read -r DOMAIN; do + # these domains return a 404 anyways, no need to waste time on them: + if is_http_error "$DOMAIN"; then echo "skipping $DOMAIN"; continue; fi load_website "$DOMAIN?hl=pl" "$DOMAIN" open_console grab "$DOMAIN before" diff --git a/Docker/utils.sh b/Docker/utils.sh index 61d32a4..4041a98 100644 --- a/Docker/utils.sh +++ b/Docker/utils.sh @@ -241,3 +241,14 @@ screenshot_and_annotate(){ "Cookie" "identyfikator internetowy z cookie" 11 ""\ "Referer" "Część mojej historii przeglądania" 0 "$ORIGIN_DOMAIN" } + +get_http_status(){ + _url="$1" + curl -L -s -o /dev/null --head -w "%{http_code}" "$_url" +} + +is_http_error(){ + _url="$1" + status=$(get_http_status "$_url") + [ "${status:0:1}" = "4" ] +}