Only include the good files in the zip

This commit is contained in:
Kuba Orlik 2022-07-08 16:51:33 +02:00
parent 89d6134f30
commit a4d6d7d2d2
4 changed files with 33 additions and 17 deletions

View File

@ -4,6 +4,7 @@ export declare type Image = {
url: string;
domain: string;
found_headers: Record<string, string>;
filename: string;
};
export default class ScreenshotRequest {
url: string;
@ -27,7 +28,7 @@ export default class ScreenshotRequest {
id: string;
status: string;
output: string;
images: Record<string, Image[]>;
images: Image[];
request_time: number;
started_time: number | null;
finished_time: number | null;
@ -36,7 +37,7 @@ export default class ScreenshotRequest {
elapsed_time_s: number;
zip_url: string | null;
}>;
getGoodImages(): Record<string, Image[]>;
getGoodImages(): Image[];
setFinished(): void;
exec(): Promise<void>;
}

View File

@ -111,8 +111,10 @@ with Image.open(output_file) as im:
exit(0)
os.remove(output_file)
im = im.resize((im.width // 2, im.height // 2))
im.save(output_dir + "/" + domain.replace(".", "_") + "_" + output_suffix, "PNG")
output_filename = domain.replace(".", "_") + "_" + output_suffix
im.save(output_dir + "/" + output_filename, "PNG")
print(json.dumps({"new_file":
{"url": base_url + "/static/" + output_file_relative,
"domain": domain,
"found_headers": found_needles}}))
"found_headers": found_needles,
"filename": output_filename}}))

View File

@ -187,10 +187,11 @@ router.get("/api/requests/:id/all-screenshots", async (ctx) => {
VOLUME_MOUNT,
IMAGE_NAME,
"zip",
"--recurse-paths",
"--junk-paths",
"-",
`/opt/static/${request.id}`,
...request
.getGoodImages()
.map((image) => `/opt/static/${request.id}/${image.filename}`),
]);
ctx.body = process.stdout;
});

View File

@ -10,6 +10,7 @@ export type Image = {
url: string;
domain: string;
found_headers: Record<string, string>;
filename: string;
};
export default class ScreenshotRequest {
@ -53,7 +54,7 @@ export default class ScreenshotRequest {
id: string;
status: string;
output: string;
images: Record<string, Image[]>;
images: Image[];
request_time: number;
started_time: number | null;
finished_time: number | null;
@ -87,8 +88,8 @@ export default class ScreenshotRequest {
};
}
getGoodImages(): Record<string, Image[]> {
const result: Record<string, Image[]> = {};
getGoodImages(): Image[] {
const result: Image[] = [];
const domains = Array.from(
new Set(this.images.map((image) => image.domain))
);
@ -106,9 +107,22 @@ export default class ScreenshotRequest {
Object.values(image2.found_headers).length
) {
return 1;
} else {
// same amount of headers, see who has longest values
if (
Object.values(image1.found_headers).join("").length >
Object.values(image2.found_headers).join("").length
) {
return -1;
} else if (
Object.values(image1.found_headers).join("").length <
Object.values(image2.found_headers).join("").length
) {
return 1;
} else {
return 0;
}
}
});
const all_values = Array.from(
new Set(
@ -117,25 +131,23 @@ export default class ScreenshotRequest {
.reduce((a, b) => a.concat(b))
)
);
const images_to_show = [];
const shown_values = new Set();
const shown_values_for_domain = new Set();
for (const image of images) {
const values_in_image = Object.values(image.found_headers);
let any_new_values = false;
for (const value of values_in_image) {
if (!shown_values.has(value)) {
shown_values.add(value);
if (!shown_values_for_domain.has(value)) {
shown_values_for_domain.add(value);
any_new_values = true;
}
}
if (any_new_values) {
images_to_show.push(image);
result.push(image);
}
if (shown_values.size == all_values.length) {
if (shown_values_for_domain.size == all_values.length) {
break;
}
}
result[domain] = images_to_show;
}
return result;
}