Allow for zip downloads. Show only interesting screenshots

This commit is contained in:
Kuba Orlik 2022-07-08 16:36:48 +02:00
parent 04670e3236
commit 89d6134f30
7 changed files with 114 additions and 13 deletions

View File

@ -1,2 +1,3 @@
export const DOCKER_ARGS: string[];
export const IMAGE_NAME: "headless-fox";
export const VOLUME_MOUNT: string;

View File

@ -1,12 +1,17 @@
/// <reference types="node" />
import { ChildProcessWithoutNullStreams } from "child_process";
export declare type Image = {
url: string;
domain: string;
found_headers: Record<string, string>;
};
export default class ScreenshotRequest {
url: string;
domains: string[];
id: string;
status: string;
output: string;
images: Record<string, unknown>[];
images: Image[];
request_time: number;
started_time: number | null;
finished_time: number | null;
@ -22,14 +27,16 @@ export default class ScreenshotRequest {
id: string;
status: string;
output: string;
images: Record<string, unknown>[];
images: Record<string, Image[]>;
request_time: number;
started_time: number | null;
finished_time: number | null;
processing_took: number | null;
waiting_took: number | null;
elapsed_time_s: number;
zip_url: string | null;
}>;
getGoodImages(): Record<string, Image[]>;
setFinished(): void;
exec(): Promise<void>;
}

View File

@ -46,6 +46,8 @@ RUN git clone https://github.com/muquit/grabc && cd grabc && make && make instal
RUN apk add clang
RUN apk add freetype-dev
RUN python3 -m pip install --upgrade Pillow
RUN apk add zip
COPY . /opt
CMD /opt/prepare-firefox.sh

View File

@ -12,9 +12,11 @@ from PIL import Image, ImageDraw, ImageFont
output_file_relative = sys.argv[
1
] # this is also the existing source screenshot to annotate. It will be updated in-place
]
output_file = "/opt/static/" + output_file_relative
output_dir = os.path.dirname(output_file)
output_suffix = os.path.basename(output_file)
domain = sys.argv[2]
needles = sys.argv[3:]
@ -107,8 +109,9 @@ with Image.open(output_file) as im:
)
if len(found_needles) == 0:
exit(0)
os.remove(output_file)
im = im.resize((im.width // 2, im.height // 2))
im.save(output_file, "PNG")
im.save(output_dir + "/" + domain.replace(".", "_") + "_" + output_suffix, "PNG")
print(json.dumps({"new_file":
{"url": base_url + "/static/" + output_file_relative,
"domain": domain,

View File

@ -1,12 +1,14 @@
const IMAGE_NAME = "headless-fox";
const VOLUME_MOUNT = `${process.cwd()}/static:/opt/static`;
const DOCKER_ARGS = [
"run",
"-i",
"-v",
`${process.cwd()}/static:/opt/static`,
VOLUME_MOUNT,
IMAGE_NAME,
"./script3.sh",
];
module.exports = { DOCKER_ARGS, IMAGE_NAME };
module.exports = { DOCKER_ARGS, IMAGE_NAME, VOLUME_MOUNT };

View File

@ -7,7 +7,7 @@ import serve from "koa-static";
import qs from "qs";
import { Readable } from "stream";
import { v4 as uuid } from "uuid";
import { DOCKER_ARGS } from "./docker-args";
import { DOCKER_ARGS, IMAGE_NAME, VOLUME_MOUNT } from "./docker-args";
import { requests } from "./memory";
import ScreenshotRequest from "./request";
@ -81,11 +81,11 @@ router.get("/", async (ctx) => {
do {
response = await (await fetch(\`/api/requests/\${id}\`)).json();
output.innerHTML = JSON.stringify(response, null, " ").replace(
/\\/static\\/.*.png/g,
/\\/(static|api)\\/.*(.png|all-screenshots)/g,
'<a href="$&">$&</a>'
);
stdout.innerHTML = response.output.replace(
/\\/static\\/.*.png/g,
/\\/(static|api)\\/.*(.png|all-screenshots)/g,
'<a href="$&">$&</a>'
);
await sleep(1000);
@ -173,6 +173,28 @@ router.get("/api/requests/:id", async (ctx) => {
ctx.body = await request.getJSON();
});
router.get("/api/requests/:id/all-screenshots", async (ctx) => {
const request = requests[ctx.params.id];
if (!request || request.status != "finished") {
ctx.status = 404;
return;
}
ctx.attachment(`${request.url.replace(/\W/g, "_")}_all-screenshots.zip`);
ctx.response.set("content-type", "application/zip");
const process = spawn("docker", [
"run",
"-v",
VOLUME_MOUNT,
IMAGE_NAME,
"zip",
"--recurse-paths",
"--junk-paths",
"-",
`/opt/static/${request.id}`,
]);
ctx.body = process.stdout;
});
app.use(router.routes()).use(router.allowedMethods());
const port = 3000;
app.listen(port);

View File

@ -6,11 +6,17 @@ import { q, requests } from "./memory";
let queue_order: ScreenshotRequest[] = [];
export type Image = {
url: string;
domain: string;
found_headers: Record<string, string>;
};
export default class ScreenshotRequest {
public id = uuid();
public status = "waiting";
public output = "";
public images: Record<string, unknown>[] = [];
public images: Image[] = [];
public request_time: number = Date.now();
public started_time: number | null = null;
public finished_time: number | null = null;
@ -47,13 +53,14 @@ export default class ScreenshotRequest {
id: string;
status: string;
output: string;
images: Record<string, unknown>[];
images: Record<string, Image[]>;
request_time: number;
started_time: number | null;
finished_time: number | null;
processing_took: number | null;
waiting_took: number | null;
elapsed_time_s: number;
zip_url: string | null;
}> {
return {
url: this.url,
@ -62,7 +69,7 @@ export default class ScreenshotRequest {
id: this.id,
status: this.status,
output: this.output,
images: this.images,
images: this.getGoodImages(),
request_time: this.request_time,
started_time: this.started_time,
finished_time: this.finished_time,
@ -73,9 +80,66 @@ export default class ScreenshotRequest {
this.request_time) /
1000
),
zip_url:
this.status === "finished"
? `/api/requests/${this.id}/all-screenshots`
: null,
};
}
getGoodImages(): Record<string, Image[]> {
const result: Record<string, Image[]> = {};
const domains = Array.from(
new Set(this.images.map((image) => image.domain))
);
for (const domain of domains) {
const images = this.images
.filter((image) => image.domain === domain)
.sort((image1, image2) => {
if (
Object.values(image1.found_headers).length >
Object.values(image2.found_headers).length
) {
return -1;
} else if (
Object.values(image1.found_headers).length <
Object.values(image2.found_headers).length
) {
return 1;
} else {
return 0;
}
});
const all_values = Array.from(
new Set(
images
.map((image) => Object.values(image.found_headers))
.reduce((a, b) => a.concat(b))
)
);
const images_to_show = [];
const shown_values = new Set();
for (const image of images) {
const values_in_image = Object.values(image.found_headers);
let any_new_values = false;
for (const value of values_in_image) {
if (!shown_values.has(value)) {
shown_values.add(value);
any_new_values = true;
}
}
if (any_new_values) {
images_to_show.push(image);
}
if (shown_values.size == all_values.length) {
break;
}
}
result[domain] = images_to_show;
}
return result;
}
setFinished(): void {
this.status = "finished";
this.finished_time = Date.now();
@ -122,7 +186,7 @@ export default class ScreenshotRequest {
is(parsed, predicates.object) &&
is(parsed.new_file, predicates.object)
) {
this.images.push(parsed.new_file);
this.images.push(parsed.new_file as Image);
}
} catch (e) {
//noop