Allow for zip downloads. Show only interesting screenshots

This commit is contained in:
Kuba Orlik 2022-07-08 16:36:48 +02:00
parent 04670e3236
commit 89d6134f30
7 changed files with 114 additions and 13 deletions

View File

@ -1,2 +1,3 @@
export const DOCKER_ARGS: string[]; export const DOCKER_ARGS: string[];
export const IMAGE_NAME: "headless-fox"; export const IMAGE_NAME: "headless-fox";
export const VOLUME_MOUNT: string;

View File

@ -1,12 +1,17 @@
/// <reference types="node" /> /// <reference types="node" />
import { ChildProcessWithoutNullStreams } from "child_process"; import { ChildProcessWithoutNullStreams } from "child_process";
export declare type Image = {
url: string;
domain: string;
found_headers: Record<string, string>;
};
export default class ScreenshotRequest { export default class ScreenshotRequest {
url: string; url: string;
domains: string[]; domains: string[];
id: string; id: string;
status: string; status: string;
output: string; output: string;
images: Record<string, unknown>[]; images: Image[];
request_time: number; request_time: number;
started_time: number | null; started_time: number | null;
finished_time: number | null; finished_time: number | null;
@ -22,14 +27,16 @@ export default class ScreenshotRequest {
id: string; id: string;
status: string; status: string;
output: string; output: string;
images: Record<string, unknown>[]; images: Record<string, Image[]>;
request_time: number; request_time: number;
started_time: number | null; started_time: number | null;
finished_time: number | null; finished_time: number | null;
processing_took: number | null; processing_took: number | null;
waiting_took: number | null; waiting_took: number | null;
elapsed_time_s: number; elapsed_time_s: number;
zip_url: string | null;
}>; }>;
getGoodImages(): Record<string, Image[]>;
setFinished(): void; setFinished(): void;
exec(): Promise<void>; exec(): Promise<void>;
} }

View File

@ -46,6 +46,8 @@ RUN git clone https://github.com/muquit/grabc && cd grabc && make && make instal
RUN apk add clang RUN apk add clang
RUN apk add freetype-dev RUN apk add freetype-dev
RUN python3 -m pip install --upgrade Pillow RUN python3 -m pip install --upgrade Pillow
RUN apk add zip
COPY . /opt COPY . /opt
CMD /opt/prepare-firefox.sh CMD /opt/prepare-firefox.sh

View File

@ -12,9 +12,11 @@ from PIL import Image, ImageDraw, ImageFont
output_file_relative = sys.argv[ output_file_relative = sys.argv[
1 1
] # this is also the existing source screenshot to annotate. It will be updated in-place ]
output_file = "/opt/static/" + output_file_relative output_file = "/opt/static/" + output_file_relative
output_dir = os.path.dirname(output_file)
output_suffix = os.path.basename(output_file)
domain = sys.argv[2] domain = sys.argv[2]
needles = sys.argv[3:] needles = sys.argv[3:]
@ -107,8 +109,9 @@ with Image.open(output_file) as im:
) )
if len(found_needles) == 0: if len(found_needles) == 0:
exit(0) exit(0)
os.remove(output_file)
im = im.resize((im.width // 2, im.height // 2)) im = im.resize((im.width // 2, im.height // 2))
im.save(output_file, "PNG") im.save(output_dir + "/" + domain.replace(".", "_") + "_" + output_suffix, "PNG")
print(json.dumps({"new_file": print(json.dumps({"new_file":
{"url": base_url + "/static/" + output_file_relative, {"url": base_url + "/static/" + output_file_relative,
"domain": domain, "domain": domain,

View File

@ -1,12 +1,14 @@
const IMAGE_NAME = "headless-fox"; const IMAGE_NAME = "headless-fox";
const VOLUME_MOUNT = `${process.cwd()}/static:/opt/static`;
const DOCKER_ARGS = [ const DOCKER_ARGS = [
"run", "run",
"-i", "-i",
"-v", "-v",
`${process.cwd()}/static:/opt/static`, VOLUME_MOUNT,
IMAGE_NAME, IMAGE_NAME,
"./script3.sh", "./script3.sh",
]; ];
module.exports = { DOCKER_ARGS, IMAGE_NAME }; module.exports = { DOCKER_ARGS, IMAGE_NAME, VOLUME_MOUNT };

View File

@ -7,7 +7,7 @@ import serve from "koa-static";
import qs from "qs"; import qs from "qs";
import { Readable } from "stream"; import { Readable } from "stream";
import { v4 as uuid } from "uuid"; import { v4 as uuid } from "uuid";
import { DOCKER_ARGS } from "./docker-args"; import { DOCKER_ARGS, IMAGE_NAME, VOLUME_MOUNT } from "./docker-args";
import { requests } from "./memory"; import { requests } from "./memory";
import ScreenshotRequest from "./request"; import ScreenshotRequest from "./request";
@ -81,11 +81,11 @@ router.get("/", async (ctx) => {
do { do {
response = await (await fetch(\`/api/requests/\${id}\`)).json(); response = await (await fetch(\`/api/requests/\${id}\`)).json();
output.innerHTML = JSON.stringify(response, null, " ").replace( output.innerHTML = JSON.stringify(response, null, " ").replace(
/\\/static\\/.*.png/g, /\\/(static|api)\\/.*(.png|all-screenshots)/g,
'<a href="$&">$&</a>' '<a href="$&">$&</a>'
); );
stdout.innerHTML = response.output.replace( stdout.innerHTML = response.output.replace(
/\\/static\\/.*.png/g, /\\/(static|api)\\/.*(.png|all-screenshots)/g,
'<a href="$&">$&</a>' '<a href="$&">$&</a>'
); );
await sleep(1000); await sleep(1000);
@ -173,6 +173,28 @@ router.get("/api/requests/:id", async (ctx) => {
ctx.body = await request.getJSON(); ctx.body = await request.getJSON();
}); });
router.get("/api/requests/:id/all-screenshots", async (ctx) => {
const request = requests[ctx.params.id];
if (!request || request.status != "finished") {
ctx.status = 404;
return;
}
ctx.attachment(`${request.url.replace(/\W/g, "_")}_all-screenshots.zip`);
ctx.response.set("content-type", "application/zip");
const process = spawn("docker", [
"run",
"-v",
VOLUME_MOUNT,
IMAGE_NAME,
"zip",
"--recurse-paths",
"--junk-paths",
"-",
`/opt/static/${request.id}`,
]);
ctx.body = process.stdout;
});
app.use(router.routes()).use(router.allowedMethods()); app.use(router.routes()).use(router.allowedMethods());
const port = 3000; const port = 3000;
app.listen(port); app.listen(port);

View File

@ -6,11 +6,17 @@ import { q, requests } from "./memory";
let queue_order: ScreenshotRequest[] = []; let queue_order: ScreenshotRequest[] = [];
export type Image = {
url: string;
domain: string;
found_headers: Record<string, string>;
};
export default class ScreenshotRequest { export default class ScreenshotRequest {
public id = uuid(); public id = uuid();
public status = "waiting"; public status = "waiting";
public output = ""; public output = "";
public images: Record<string, unknown>[] = []; public images: Image[] = [];
public request_time: number = Date.now(); public request_time: number = Date.now();
public started_time: number | null = null; public started_time: number | null = null;
public finished_time: number | null = null; public finished_time: number | null = null;
@ -47,13 +53,14 @@ export default class ScreenshotRequest {
id: string; id: string;
status: string; status: string;
output: string; output: string;
images: Record<string, unknown>[]; images: Record<string, Image[]>;
request_time: number; request_time: number;
started_time: number | null; started_time: number | null;
finished_time: number | null; finished_time: number | null;
processing_took: number | null; processing_took: number | null;
waiting_took: number | null; waiting_took: number | null;
elapsed_time_s: number; elapsed_time_s: number;
zip_url: string | null;
}> { }> {
return { return {
url: this.url, url: this.url,
@ -62,7 +69,7 @@ export default class ScreenshotRequest {
id: this.id, id: this.id,
status: this.status, status: this.status,
output: this.output, output: this.output,
images: this.images, images: this.getGoodImages(),
request_time: this.request_time, request_time: this.request_time,
started_time: this.started_time, started_time: this.started_time,
finished_time: this.finished_time, finished_time: this.finished_time,
@ -73,9 +80,66 @@ export default class ScreenshotRequest {
this.request_time) / this.request_time) /
1000 1000
), ),
zip_url:
this.status === "finished"
? `/api/requests/${this.id}/all-screenshots`
: null,
}; };
} }
getGoodImages(): Record<string, Image[]> {
const result: Record<string, Image[]> = {};
const domains = Array.from(
new Set(this.images.map((image) => image.domain))
);
for (const domain of domains) {
const images = this.images
.filter((image) => image.domain === domain)
.sort((image1, image2) => {
if (
Object.values(image1.found_headers).length >
Object.values(image2.found_headers).length
) {
return -1;
} else if (
Object.values(image1.found_headers).length <
Object.values(image2.found_headers).length
) {
return 1;
} else {
return 0;
}
});
const all_values = Array.from(
new Set(
images
.map((image) => Object.values(image.found_headers))
.reduce((a, b) => a.concat(b))
)
);
const images_to_show = [];
const shown_values = new Set();
for (const image of images) {
const values_in_image = Object.values(image.found_headers);
let any_new_values = false;
for (const value of values_in_image) {
if (!shown_values.has(value)) {
shown_values.add(value);
any_new_values = true;
}
}
if (any_new_values) {
images_to_show.push(image);
}
if (shown_values.size == all_values.length) {
break;
}
}
result[domain] = images_to_show;
}
return result;
}
setFinished(): void { setFinished(): void {
this.status = "finished"; this.status = "finished";
this.finished_time = Date.now(); this.finished_time = Date.now();
@ -122,7 +186,7 @@ export default class ScreenshotRequest {
is(parsed, predicates.object) && is(parsed, predicates.object) &&
is(parsed.new_file, predicates.object) is(parsed.new_file, predicates.object)
) { ) {
this.images.push(parsed.new_file); this.images.push(parsed.new_file as Image);
} }
} catch (e) { } catch (e) {
//noop //noop