From e003190c2f347e63f856bfd040002c92498d4014 Mon Sep 17 00:00:00 2001 From: Kuba Orlik Date: Fri, 15 Jul 2022 11:36:44 +0200 Subject: [PATCH] Generate thumbnails of screenshots --- .gitignore | 1 + Docker/annotate_header.py | 11 +++++++++-- Docker/run-analysis.sh | 2 +- src/index.ts | 18 +++--------------- src/request.ts | 35 ++++++++++++++++++++++++++++++++++- 5 files changed, 48 insertions(+), 19 deletions(-) diff --git a/.gitignore b/.gitignore index 0865e46..741ab03 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,4 @@ /.log/ /yarn-error.log /lib/ +@types diff --git a/Docker/annotate_header.py b/Docker/annotate_header.py index 11d678a..37c7b6e 100644 --- a/Docker/annotate_header.py +++ b/Docker/annotate_header.py @@ -112,10 +112,17 @@ with Image.open(output_file) as im: os.remove(output_file) im = im.crop((0, 24, 2880, 1588)) im = im.resize((im.width // 2, im.height // 2)) + thumbnail = im.resize((im.width // 5, im.height // 5)) output_filename = domain.replace(".", "_") + "_" + output_suffix + thumbnail_filename = domain.replace(".", "_") + "_thumb_" + output_suffix.replace("png", "jpg") im.save(output_dir + "/" + output_filename, "PNG") + thumbnail.save(output_dir + "/" + thumbnail_filename, "JPEG") print(json.dumps({"new_file": - {"url": base_url + "/static/" + os.path.dirname(output_file_relative) + "/" + output_filename, + { + "url": base_url + "/static/" + os.path.dirname(output_file_relative) + "/" + output_filename, + "thumb_url": base_url + "/static/" + os.path.dirname(output_file_relative) + "/" + thumbnail_filename, "domain": domain, "found_headers": found_needles, - "filename": output_filename}})) + "filename": output_filename, + "thumb_filename": thumbnail_filename + }})) diff --git a/Docker/run-analysis.sh b/Docker/run-analysis.sh index 66ac252..49da1dc 100755 --- a/Docker/run-analysis.sh +++ b/Docker/run-analysis.sh @@ -45,7 +45,7 @@ done <<< "$DOMAINS" click 1270 217 # the "trash" icon, so requests from plamienie don't appear in the screenshots load_website "$URL" "$URL" -echo "{\"current_action\": \"Strona wczytana\"}" +echo "{\"current_action\": \"Strona $ORIGIN_DOMAIN wczytana. Przygotowywanie do analizy...\"}" grab load_website open_network_inspector grab open_network_inspector diff --git a/src/index.ts b/src/index.ts index f8ddc7f..b5237b4 100644 --- a/src/index.ts +++ b/src/index.ts @@ -81,11 +81,11 @@ router.get("/", async (ctx) => { do { response = await (await fetch(\`/api/requests/\${id}\`)).json(); output.innerHTML = JSON.stringify(response, null, " ").replace( - /\\/(static|api)\\/.*(.png|all-screenshots|v=[0-9]+)/g, + /\\/(static|api)\\/.*(.png|.jpg|all-screenshots|v=[0-9]+)/g, '$&' ); stdout.innerHTML = response.output.replace( - /\\/(static|api)\\/.*(.png|all-screenshots)/g, + /\\/(static|api)\\/.*(.png|.jpg|all-screenshots)/g, '$&' ); await sleep(1000); @@ -181,19 +181,7 @@ router.get("/api/requests/:id/all-screenshots", async (ctx) => { } ctx.attachment(`${request.url.replace(/\W/g, "_")}_all-screenshots.zip`); ctx.response.set("content-type", "application/zip"); - const process = spawn("docker", [ - "run", - "-v", - VOLUME_MOUNT, - IMAGE_NAME, - "zip", - "--junk-paths", - "-", - ...request - .getGoodImages() - .map((image) => `/opt/static/${request.id}/${image.filename}`), - ]); - ctx.body = process.stdout; + ctx.body = request.getZIP(); }); app.use(router.routes()).use(router.allowedMethods()); diff --git a/src/request.ts b/src/request.ts index 3a64d43..e0c0650 100644 --- a/src/request.ts +++ b/src/request.ts @@ -1,7 +1,9 @@ import { is, predicates } from "@sealcode/ts-predicates"; import { ChildProcessWithoutNullStreams, spawn } from "child_process"; +import { Readable } from "stream"; import { v4 as uuid } from "uuid"; import containerPool from "./container-pool"; +import { IMAGE_NAME, VOLUME_MOUNT } from "./docker-args"; import { q, requests } from "./memory"; let queue_order: ScreenshotRequest[] = []; @@ -90,10 +92,25 @@ export default class ScreenshotRequest { this.status === "finished" ? `/api/requests/${this.id}/all-screenshots` : null, - preview: `/static/${this.id}/preview.jpg?v=${Date.now()}`, + preview: await this.getPreviewURL(), }; } + async getPreviewURL(): Promise { + const process = spawn("stat", [ + "-c", + "%Y", + `${__dirname}/../../static/${this.id}/preview.jpg`, + ]); + let result = ""; + process.stdout.on("data", (data) => (result += data.toString().trim())); + process.stderr.on("data", (data) => console.log(data.toString())); + const mtime = await new Promise((resolve) => { + process.on("close", () => resolve(result)); + }); + return `/static/${this.id}/preview.jpg?v=${mtime}`; + } + getGoodImages(): Image[] { /* find the best set of screenshots, that is: a set of screenshots that contain all the header values that appear in the headers, but with as little @@ -232,4 +249,20 @@ export default class ScreenshotRequest { }); }); } + + getZIP(): Readable { + const process = spawn("docker", [ + "run", + "-v", + VOLUME_MOUNT, + IMAGE_NAME, + "zip", + "--junk-paths", + "-", + ...this.getGoodImages().map( + (image) => `/opt/static/${this.id}/${image.filename}` + ), + ]); + return process.stdout; + } }