diff --git a/@types/src/docker-args.d.ts b/@types/src/docker-args.d.ts index 8af90ce..6e0fd4a 100644 --- a/@types/src/docker-args.d.ts +++ b/@types/src/docker-args.d.ts @@ -1,2 +1,3 @@ export const DOCKER_ARGS: string[]; export const IMAGE_NAME: "headless-fox"; +export const VOLUME_MOUNT: string; diff --git a/@types/src/request.d.ts b/@types/src/request.d.ts index e6c37b1..89c705d 100644 --- a/@types/src/request.d.ts +++ b/@types/src/request.d.ts @@ -1,12 +1,17 @@ /// import { ChildProcessWithoutNullStreams } from "child_process"; +export declare type Image = { + url: string; + domain: string; + found_headers: Record; +}; export default class ScreenshotRequest { url: string; domains: string[]; id: string; status: string; output: string; - images: Record[]; + images: Image[]; request_time: number; started_time: number | null; finished_time: number | null; @@ -22,14 +27,16 @@ export default class ScreenshotRequest { id: string; status: string; output: string; - images: Record[]; + images: Record; request_time: number; started_time: number | null; finished_time: number | null; processing_took: number | null; waiting_took: number | null; elapsed_time_s: number; + zip_url: string | null; }>; + getGoodImages(): Record; setFinished(): void; exec(): Promise; } diff --git a/Docker/Dockerfile b/Docker/Dockerfile index ed77327..035964e 100644 --- a/Docker/Dockerfile +++ b/Docker/Dockerfile @@ -46,6 +46,8 @@ RUN git clone https://github.com/muquit/grabc && cd grabc && make && make instal RUN apk add clang RUN apk add freetype-dev RUN python3 -m pip install --upgrade Pillow +RUN apk add zip + COPY . /opt CMD /opt/prepare-firefox.sh diff --git a/Docker/annotate_header.py b/Docker/annotate_header.py index 75d40c8..dd9ee66 100644 --- a/Docker/annotate_header.py +++ b/Docker/annotate_header.py @@ -12,9 +12,11 @@ from PIL import Image, ImageDraw, ImageFont output_file_relative = sys.argv[ 1 -] # this is also the existing source screenshot to annotate. It will be updated in-place +] output_file = "/opt/static/" + output_file_relative +output_dir = os.path.dirname(output_file) +output_suffix = os.path.basename(output_file) domain = sys.argv[2] needles = sys.argv[3:] @@ -107,8 +109,9 @@ with Image.open(output_file) as im: ) if len(found_needles) == 0: exit(0) + os.remove(output_file) im = im.resize((im.width // 2, im.height // 2)) - im.save(output_file, "PNG") + im.save(output_dir + "/" + domain.replace(".", "_") + "_" + output_suffix, "PNG") print(json.dumps({"new_file": {"url": base_url + "/static/" + output_file_relative, "domain": domain, diff --git a/src/docker-args.js b/src/docker-args.js index 56ef69b..130727c 100644 --- a/src/docker-args.js +++ b/src/docker-args.js @@ -1,12 +1,14 @@ const IMAGE_NAME = "headless-fox"; +const VOLUME_MOUNT = `${process.cwd()}/static:/opt/static`; + const DOCKER_ARGS = [ "run", "-i", "-v", - `${process.cwd()}/static:/opt/static`, + VOLUME_MOUNT, IMAGE_NAME, "./script3.sh", ]; -module.exports = { DOCKER_ARGS, IMAGE_NAME }; +module.exports = { DOCKER_ARGS, IMAGE_NAME, VOLUME_MOUNT }; diff --git a/src/index.ts b/src/index.ts index e14b316..b57049d 100644 --- a/src/index.ts +++ b/src/index.ts @@ -7,7 +7,7 @@ import serve from "koa-static"; import qs from "qs"; import { Readable } from "stream"; import { v4 as uuid } from "uuid"; -import { DOCKER_ARGS } from "./docker-args"; +import { DOCKER_ARGS, IMAGE_NAME, VOLUME_MOUNT } from "./docker-args"; import { requests } from "./memory"; import ScreenshotRequest from "./request"; @@ -81,11 +81,11 @@ router.get("/", async (ctx) => { do { response = await (await fetch(\`/api/requests/\${id}\`)).json(); output.innerHTML = JSON.stringify(response, null, " ").replace( - /\\/static\\/.*.png/g, + /\\/(static|api)\\/.*(.png|all-screenshots)/g, '$&' ); stdout.innerHTML = response.output.replace( - /\\/static\\/.*.png/g, + /\\/(static|api)\\/.*(.png|all-screenshots)/g, '$&' ); await sleep(1000); @@ -173,6 +173,28 @@ router.get("/api/requests/:id", async (ctx) => { ctx.body = await request.getJSON(); }); +router.get("/api/requests/:id/all-screenshots", async (ctx) => { + const request = requests[ctx.params.id]; + if (!request || request.status != "finished") { + ctx.status = 404; + return; + } + ctx.attachment(`${request.url.replace(/\W/g, "_")}_all-screenshots.zip`); + ctx.response.set("content-type", "application/zip"); + const process = spawn("docker", [ + "run", + "-v", + VOLUME_MOUNT, + IMAGE_NAME, + "zip", + "--recurse-paths", + "--junk-paths", + "-", + `/opt/static/${request.id}`, + ]); + ctx.body = process.stdout; +}); + app.use(router.routes()).use(router.allowedMethods()); const port = 3000; app.listen(port); diff --git a/src/request.ts b/src/request.ts index 13819d8..2503a9f 100644 --- a/src/request.ts +++ b/src/request.ts @@ -6,11 +6,17 @@ import { q, requests } from "./memory"; let queue_order: ScreenshotRequest[] = []; +export type Image = { + url: string; + domain: string; + found_headers: Record; +}; + export default class ScreenshotRequest { public id = uuid(); public status = "waiting"; public output = ""; - public images: Record[] = []; + public images: Image[] = []; public request_time: number = Date.now(); public started_time: number | null = null; public finished_time: number | null = null; @@ -47,13 +53,14 @@ export default class ScreenshotRequest { id: string; status: string; output: string; - images: Record[]; + images: Record; request_time: number; started_time: number | null; finished_time: number | null; processing_took: number | null; waiting_took: number | null; elapsed_time_s: number; + zip_url: string | null; }> { return { url: this.url, @@ -62,7 +69,7 @@ export default class ScreenshotRequest { id: this.id, status: this.status, output: this.output, - images: this.images, + images: this.getGoodImages(), request_time: this.request_time, started_time: this.started_time, finished_time: this.finished_time, @@ -73,9 +80,66 @@ export default class ScreenshotRequest { this.request_time) / 1000 ), + zip_url: + this.status === "finished" + ? `/api/requests/${this.id}/all-screenshots` + : null, }; } + getGoodImages(): Record { + const result: Record = {}; + const domains = Array.from( + new Set(this.images.map((image) => image.domain)) + ); + for (const domain of domains) { + const images = this.images + .filter((image) => image.domain === domain) + .sort((image1, image2) => { + if ( + Object.values(image1.found_headers).length > + Object.values(image2.found_headers).length + ) { + return -1; + } else if ( + Object.values(image1.found_headers).length < + Object.values(image2.found_headers).length + ) { + return 1; + } else { + return 0; + } + }); + const all_values = Array.from( + new Set( + images + .map((image) => Object.values(image.found_headers)) + .reduce((a, b) => a.concat(b)) + ) + ); + const images_to_show = []; + const shown_values = new Set(); + for (const image of images) { + const values_in_image = Object.values(image.found_headers); + let any_new_values = false; + for (const value of values_in_image) { + if (!shown_values.has(value)) { + shown_values.add(value); + any_new_values = true; + } + } + if (any_new_values) { + images_to_show.push(image); + } + if (shown_values.size == all_values.length) { + break; + } + } + result[domain] = images_to_show; + } + return result; + } + setFinished(): void { this.status = "finished"; this.finished_time = Date.now(); @@ -122,7 +186,7 @@ export default class ScreenshotRequest { is(parsed, predicates.object) && is(parsed.new_file, predicates.object) ) { - this.images.push(parsed.new_file); + this.images.push(parsed.new_file as Image); } } catch (e) { //noop