Only include the good files in the zip

This commit is contained in:
Kuba Orlik 2022-07-08 16:51:33 +02:00
parent 89d6134f30
commit a4d6d7d2d2
4 changed files with 33 additions and 17 deletions

View File

@ -4,6 +4,7 @@ export declare type Image = {
url: string; url: string;
domain: string; domain: string;
found_headers: Record<string, string>; found_headers: Record<string, string>;
filename: string;
}; };
export default class ScreenshotRequest { export default class ScreenshotRequest {
url: string; url: string;
@ -27,7 +28,7 @@ export default class ScreenshotRequest {
id: string; id: string;
status: string; status: string;
output: string; output: string;
images: Record<string, Image[]>; images: Image[];
request_time: number; request_time: number;
started_time: number | null; started_time: number | null;
finished_time: number | null; finished_time: number | null;
@ -36,7 +37,7 @@ export default class ScreenshotRequest {
elapsed_time_s: number; elapsed_time_s: number;
zip_url: string | null; zip_url: string | null;
}>; }>;
getGoodImages(): Record<string, Image[]>; getGoodImages(): Image[];
setFinished(): void; setFinished(): void;
exec(): Promise<void>; exec(): Promise<void>;
} }

View File

@ -111,8 +111,10 @@ with Image.open(output_file) as im:
exit(0) exit(0)
os.remove(output_file) os.remove(output_file)
im = im.resize((im.width // 2, im.height // 2)) im = im.resize((im.width // 2, im.height // 2))
im.save(output_dir + "/" + domain.replace(".", "_") + "_" + output_suffix, "PNG") output_filename = domain.replace(".", "_") + "_" + output_suffix
im.save(output_dir + "/" + output_filename, "PNG")
print(json.dumps({"new_file": print(json.dumps({"new_file":
{"url": base_url + "/static/" + output_file_relative, {"url": base_url + "/static/" + output_file_relative,
"domain": domain, "domain": domain,
"found_headers": found_needles}})) "found_headers": found_needles,
"filename": output_filename}}))

View File

@ -187,10 +187,11 @@ router.get("/api/requests/:id/all-screenshots", async (ctx) => {
VOLUME_MOUNT, VOLUME_MOUNT,
IMAGE_NAME, IMAGE_NAME,
"zip", "zip",
"--recurse-paths",
"--junk-paths", "--junk-paths",
"-", "-",
`/opt/static/${request.id}`, ...request
.getGoodImages()
.map((image) => `/opt/static/${request.id}/${image.filename}`),
]); ]);
ctx.body = process.stdout; ctx.body = process.stdout;
}); });

View File

@ -10,6 +10,7 @@ export type Image = {
url: string; url: string;
domain: string; domain: string;
found_headers: Record<string, string>; found_headers: Record<string, string>;
filename: string;
}; };
export default class ScreenshotRequest { export default class ScreenshotRequest {
@ -53,7 +54,7 @@ export default class ScreenshotRequest {
id: string; id: string;
status: string; status: string;
output: string; output: string;
images: Record<string, Image[]>; images: Image[];
request_time: number; request_time: number;
started_time: number | null; started_time: number | null;
finished_time: number | null; finished_time: number | null;
@ -87,8 +88,8 @@ export default class ScreenshotRequest {
}; };
} }
getGoodImages(): Record<string, Image[]> { getGoodImages(): Image[] {
const result: Record<string, Image[]> = {}; const result: Image[] = [];
const domains = Array.from( const domains = Array.from(
new Set(this.images.map((image) => image.domain)) new Set(this.images.map((image) => image.domain))
); );
@ -106,9 +107,22 @@ export default class ScreenshotRequest {
Object.values(image2.found_headers).length Object.values(image2.found_headers).length
) { ) {
return 1; return 1;
} else {
// same amount of headers, see who has longest values
if (
Object.values(image1.found_headers).join("").length >
Object.values(image2.found_headers).join("").length
) {
return -1;
} else if (
Object.values(image1.found_headers).join("").length <
Object.values(image2.found_headers).join("").length
) {
return 1;
} else { } else {
return 0; return 0;
} }
}
}); });
const all_values = Array.from( const all_values = Array.from(
new Set( new Set(
@ -117,25 +131,23 @@ export default class ScreenshotRequest {
.reduce((a, b) => a.concat(b)) .reduce((a, b) => a.concat(b))
) )
); );
const images_to_show = []; const shown_values_for_domain = new Set();
const shown_values = new Set();
for (const image of images) { for (const image of images) {
const values_in_image = Object.values(image.found_headers); const values_in_image = Object.values(image.found_headers);
let any_new_values = false; let any_new_values = false;
for (const value of values_in_image) { for (const value of values_in_image) {
if (!shown_values.has(value)) { if (!shown_values_for_domain.has(value)) {
shown_values.add(value); shown_values_for_domain.add(value);
any_new_values = true; any_new_values = true;
} }
} }
if (any_new_values) { if (any_new_values) {
images_to_show.push(image); result.push(image);
} }
if (shown_values.size == all_values.length) { if (shown_values_for_domain.size == all_values.length) {
break; break;
} }
} }
result[domain] = images_to_show;
} }
return result; return result;
} }