From a4d6d7d2d2029ef529b64d31bf6e0333161bbed8 Mon Sep 17 00:00:00 2001 From: Kuba Orlik Date: Fri, 8 Jul 2022 16:51:33 +0200 Subject: [PATCH] Only include the good files in the zip --- @types/src/request.d.ts | 5 +++-- Docker/annotate_header.py | 6 ++++-- src/index.ts | 5 +++-- src/request.ts | 34 +++++++++++++++++++++++----------- 4 files changed, 33 insertions(+), 17 deletions(-) diff --git a/@types/src/request.d.ts b/@types/src/request.d.ts index 89c705d..47dfb65 100644 --- a/@types/src/request.d.ts +++ b/@types/src/request.d.ts @@ -4,6 +4,7 @@ export declare type Image = { url: string; domain: string; found_headers: Record; + filename: string; }; export default class ScreenshotRequest { url: string; @@ -27,7 +28,7 @@ export default class ScreenshotRequest { id: string; status: string; output: string; - images: Record; + images: Image[]; request_time: number; started_time: number | null; finished_time: number | null; @@ -36,7 +37,7 @@ export default class ScreenshotRequest { elapsed_time_s: number; zip_url: string | null; }>; - getGoodImages(): Record; + getGoodImages(): Image[]; setFinished(): void; exec(): Promise; } diff --git a/Docker/annotate_header.py b/Docker/annotate_header.py index dd9ee66..1b4fec3 100644 --- a/Docker/annotate_header.py +++ b/Docker/annotate_header.py @@ -111,8 +111,10 @@ with Image.open(output_file) as im: exit(0) os.remove(output_file) im = im.resize((im.width // 2, im.height // 2)) - im.save(output_dir + "/" + domain.replace(".", "_") + "_" + output_suffix, "PNG") + output_filename = domain.replace(".", "_") + "_" + output_suffix + im.save(output_dir + "/" + output_filename, "PNG") print(json.dumps({"new_file": {"url": base_url + "/static/" + output_file_relative, "domain": domain, - "found_headers": found_needles}})) + "found_headers": found_needles, + "filename": output_filename}})) diff --git a/src/index.ts b/src/index.ts index b57049d..0164eab 100644 --- a/src/index.ts +++ b/src/index.ts @@ -187,10 +187,11 @@ router.get("/api/requests/:id/all-screenshots", async (ctx) => { VOLUME_MOUNT, IMAGE_NAME, "zip", - "--recurse-paths", "--junk-paths", "-", - `/opt/static/${request.id}`, + ...request + .getGoodImages() + .map((image) => `/opt/static/${request.id}/${image.filename}`), ]); ctx.body = process.stdout; }); diff --git a/src/request.ts b/src/request.ts index 2503a9f..08e7179 100644 --- a/src/request.ts +++ b/src/request.ts @@ -10,6 +10,7 @@ export type Image = { url: string; domain: string; found_headers: Record; + filename: string; }; export default class ScreenshotRequest { @@ -53,7 +54,7 @@ export default class ScreenshotRequest { id: string; status: string; output: string; - images: Record; + images: Image[]; request_time: number; started_time: number | null; finished_time: number | null; @@ -87,8 +88,8 @@ export default class ScreenshotRequest { }; } - getGoodImages(): Record { - const result: Record = {}; + getGoodImages(): Image[] { + const result: Image[] = []; const domains = Array.from( new Set(this.images.map((image) => image.domain)) ); @@ -107,7 +108,20 @@ export default class ScreenshotRequest { ) { return 1; } else { - return 0; + // same amount of headers, see who has longest values + if ( + Object.values(image1.found_headers).join("").length > + Object.values(image2.found_headers).join("").length + ) { + return -1; + } else if ( + Object.values(image1.found_headers).join("").length < + Object.values(image2.found_headers).join("").length + ) { + return 1; + } else { + return 0; + } } }); const all_values = Array.from( @@ -117,25 +131,23 @@ export default class ScreenshotRequest { .reduce((a, b) => a.concat(b)) ) ); - const images_to_show = []; - const shown_values = new Set(); + const shown_values_for_domain = new Set(); for (const image of images) { const values_in_image = Object.values(image.found_headers); let any_new_values = false; for (const value of values_in_image) { - if (!shown_values.has(value)) { - shown_values.add(value); + if (!shown_values_for_domain.has(value)) { + shown_values_for_domain.add(value); any_new_values = true; } } if (any_new_values) { - images_to_show.push(image); + result.push(image); } - if (shown_values.size == all_values.length) { + if (shown_values_for_domain.size == all_values.length) { break; } } - result[domain] = images_to_show; } return result; }