diff --git a/@types/src/docker-args.d.ts b/@types/src/docker-args.d.ts
index 8af90ce..6e0fd4a 100644
--- a/@types/src/docker-args.d.ts
+++ b/@types/src/docker-args.d.ts
@@ -1,2 +1,3 @@
export const DOCKER_ARGS: string[];
export const IMAGE_NAME: "headless-fox";
+export const VOLUME_MOUNT: string;
diff --git a/@types/src/request.d.ts b/@types/src/request.d.ts
index e6c37b1..89c705d 100644
--- a/@types/src/request.d.ts
+++ b/@types/src/request.d.ts
@@ -1,12 +1,17 @@
///
import { ChildProcessWithoutNullStreams } from "child_process";
+export declare type Image = {
+ url: string;
+ domain: string;
+ found_headers: Record;
+};
export default class ScreenshotRequest {
url: string;
domains: string[];
id: string;
status: string;
output: string;
- images: Record[];
+ images: Image[];
request_time: number;
started_time: number | null;
finished_time: number | null;
@@ -22,14 +27,16 @@ export default class ScreenshotRequest {
id: string;
status: string;
output: string;
- images: Record[];
+ images: Record;
request_time: number;
started_time: number | null;
finished_time: number | null;
processing_took: number | null;
waiting_took: number | null;
elapsed_time_s: number;
+ zip_url: string | null;
}>;
+ getGoodImages(): Record;
setFinished(): void;
exec(): Promise;
}
diff --git a/Docker/Dockerfile b/Docker/Dockerfile
index ed77327..035964e 100644
--- a/Docker/Dockerfile
+++ b/Docker/Dockerfile
@@ -46,6 +46,8 @@ RUN git clone https://github.com/muquit/grabc && cd grabc && make && make instal
RUN apk add clang
RUN apk add freetype-dev
RUN python3 -m pip install --upgrade Pillow
+RUN apk add zip
+
COPY . /opt
CMD /opt/prepare-firefox.sh
diff --git a/Docker/annotate_header.py b/Docker/annotate_header.py
index 75d40c8..dd9ee66 100644
--- a/Docker/annotate_header.py
+++ b/Docker/annotate_header.py
@@ -12,9 +12,11 @@ from PIL import Image, ImageDraw, ImageFont
output_file_relative = sys.argv[
1
-] # this is also the existing source screenshot to annotate. It will be updated in-place
+]
output_file = "/opt/static/" + output_file_relative
+output_dir = os.path.dirname(output_file)
+output_suffix = os.path.basename(output_file)
domain = sys.argv[2]
needles = sys.argv[3:]
@@ -107,8 +109,9 @@ with Image.open(output_file) as im:
)
if len(found_needles) == 0:
exit(0)
+ os.remove(output_file)
im = im.resize((im.width // 2, im.height // 2))
- im.save(output_file, "PNG")
+ im.save(output_dir + "/" + domain.replace(".", "_") + "_" + output_suffix, "PNG")
print(json.dumps({"new_file":
{"url": base_url + "/static/" + output_file_relative,
"domain": domain,
diff --git a/src/docker-args.js b/src/docker-args.js
index 56ef69b..130727c 100644
--- a/src/docker-args.js
+++ b/src/docker-args.js
@@ -1,12 +1,14 @@
const IMAGE_NAME = "headless-fox";
+const VOLUME_MOUNT = `${process.cwd()}/static:/opt/static`;
+
const DOCKER_ARGS = [
"run",
"-i",
"-v",
- `${process.cwd()}/static:/opt/static`,
+ VOLUME_MOUNT,
IMAGE_NAME,
"./script3.sh",
];
-module.exports = { DOCKER_ARGS, IMAGE_NAME };
+module.exports = { DOCKER_ARGS, IMAGE_NAME, VOLUME_MOUNT };
diff --git a/src/index.ts b/src/index.ts
index e14b316..b57049d 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -7,7 +7,7 @@ import serve from "koa-static";
import qs from "qs";
import { Readable } from "stream";
import { v4 as uuid } from "uuid";
-import { DOCKER_ARGS } from "./docker-args";
+import { DOCKER_ARGS, IMAGE_NAME, VOLUME_MOUNT } from "./docker-args";
import { requests } from "./memory";
import ScreenshotRequest from "./request";
@@ -81,11 +81,11 @@ router.get("/", async (ctx) => {
do {
response = await (await fetch(\`/api/requests/\${id}\`)).json();
output.innerHTML = JSON.stringify(response, null, " ").replace(
- /\\/static\\/.*.png/g,
+ /\\/(static|api)\\/.*(.png|all-screenshots)/g,
'$&'
);
stdout.innerHTML = response.output.replace(
- /\\/static\\/.*.png/g,
+ /\\/(static|api)\\/.*(.png|all-screenshots)/g,
'$&'
);
await sleep(1000);
@@ -173,6 +173,28 @@ router.get("/api/requests/:id", async (ctx) => {
ctx.body = await request.getJSON();
});
+router.get("/api/requests/:id/all-screenshots", async (ctx) => {
+ const request = requests[ctx.params.id];
+ if (!request || request.status != "finished") {
+ ctx.status = 404;
+ return;
+ }
+ ctx.attachment(`${request.url.replace(/\W/g, "_")}_all-screenshots.zip`);
+ ctx.response.set("content-type", "application/zip");
+ const process = spawn("docker", [
+ "run",
+ "-v",
+ VOLUME_MOUNT,
+ IMAGE_NAME,
+ "zip",
+ "--recurse-paths",
+ "--junk-paths",
+ "-",
+ `/opt/static/${request.id}`,
+ ]);
+ ctx.body = process.stdout;
+});
+
app.use(router.routes()).use(router.allowedMethods());
const port = 3000;
app.listen(port);
diff --git a/src/request.ts b/src/request.ts
index 13819d8..2503a9f 100644
--- a/src/request.ts
+++ b/src/request.ts
@@ -6,11 +6,17 @@ import { q, requests } from "./memory";
let queue_order: ScreenshotRequest[] = [];
+export type Image = {
+ url: string;
+ domain: string;
+ found_headers: Record;
+};
+
export default class ScreenshotRequest {
public id = uuid();
public status = "waiting";
public output = "";
- public images: Record[] = [];
+ public images: Image[] = [];
public request_time: number = Date.now();
public started_time: number | null = null;
public finished_time: number | null = null;
@@ -47,13 +53,14 @@ export default class ScreenshotRequest {
id: string;
status: string;
output: string;
- images: Record[];
+ images: Record;
request_time: number;
started_time: number | null;
finished_time: number | null;
processing_took: number | null;
waiting_took: number | null;
elapsed_time_s: number;
+ zip_url: string | null;
}> {
return {
url: this.url,
@@ -62,7 +69,7 @@ export default class ScreenshotRequest {
id: this.id,
status: this.status,
output: this.output,
- images: this.images,
+ images: this.getGoodImages(),
request_time: this.request_time,
started_time: this.started_time,
finished_time: this.finished_time,
@@ -73,9 +80,66 @@ export default class ScreenshotRequest {
this.request_time) /
1000
),
+ zip_url:
+ this.status === "finished"
+ ? `/api/requests/${this.id}/all-screenshots`
+ : null,
};
}
+ getGoodImages(): Record {
+ const result: Record = {};
+ const domains = Array.from(
+ new Set(this.images.map((image) => image.domain))
+ );
+ for (const domain of domains) {
+ const images = this.images
+ .filter((image) => image.domain === domain)
+ .sort((image1, image2) => {
+ if (
+ Object.values(image1.found_headers).length >
+ Object.values(image2.found_headers).length
+ ) {
+ return -1;
+ } else if (
+ Object.values(image1.found_headers).length <
+ Object.values(image2.found_headers).length
+ ) {
+ return 1;
+ } else {
+ return 0;
+ }
+ });
+ const all_values = Array.from(
+ new Set(
+ images
+ .map((image) => Object.values(image.found_headers))
+ .reduce((a, b) => a.concat(b))
+ )
+ );
+ const images_to_show = [];
+ const shown_values = new Set();
+ for (const image of images) {
+ const values_in_image = Object.values(image.found_headers);
+ let any_new_values = false;
+ for (const value of values_in_image) {
+ if (!shown_values.has(value)) {
+ shown_values.add(value);
+ any_new_values = true;
+ }
+ }
+ if (any_new_values) {
+ images_to_show.push(image);
+ }
+ if (shown_values.size == all_values.length) {
+ break;
+ }
+ }
+ result[domain] = images_to_show;
+ }
+ return result;
+ }
+
setFinished(): void {
this.status = "finished";
this.finished_time = Date.now();
@@ -122,7 +186,7 @@ export default class ScreenshotRequest {
is(parsed, predicates.object) &&
is(parsed.new_file, predicates.object)
) {
- this.images.push(parsed.new_file);
+ this.images.push(parsed.new_file as Image);
}
} catch (e) {
//noop