Allow for zip downloads. Show only interesting screenshots
This commit is contained in:
parent
04670e3236
commit
89d6134f30
1
@types/src/docker-args.d.ts
vendored
1
@types/src/docker-args.d.ts
vendored
|
@ -1,2 +1,3 @@
|
||||||
export const DOCKER_ARGS: string[];
|
export const DOCKER_ARGS: string[];
|
||||||
export const IMAGE_NAME: "headless-fox";
|
export const IMAGE_NAME: "headless-fox";
|
||||||
|
export const VOLUME_MOUNT: string;
|
||||||
|
|
11
@types/src/request.d.ts
vendored
11
@types/src/request.d.ts
vendored
|
@ -1,12 +1,17 @@
|
||||||
/// <reference types="node" />
|
/// <reference types="node" />
|
||||||
import { ChildProcessWithoutNullStreams } from "child_process";
|
import { ChildProcessWithoutNullStreams } from "child_process";
|
||||||
|
export declare type Image = {
|
||||||
|
url: string;
|
||||||
|
domain: string;
|
||||||
|
found_headers: Record<string, string>;
|
||||||
|
};
|
||||||
export default class ScreenshotRequest {
|
export default class ScreenshotRequest {
|
||||||
url: string;
|
url: string;
|
||||||
domains: string[];
|
domains: string[];
|
||||||
id: string;
|
id: string;
|
||||||
status: string;
|
status: string;
|
||||||
output: string;
|
output: string;
|
||||||
images: Record<string, unknown>[];
|
images: Image[];
|
||||||
request_time: number;
|
request_time: number;
|
||||||
started_time: number | null;
|
started_time: number | null;
|
||||||
finished_time: number | null;
|
finished_time: number | null;
|
||||||
|
@ -22,14 +27,16 @@ export default class ScreenshotRequest {
|
||||||
id: string;
|
id: string;
|
||||||
status: string;
|
status: string;
|
||||||
output: string;
|
output: string;
|
||||||
images: Record<string, unknown>[];
|
images: Record<string, Image[]>;
|
||||||
request_time: number;
|
request_time: number;
|
||||||
started_time: number | null;
|
started_time: number | null;
|
||||||
finished_time: number | null;
|
finished_time: number | null;
|
||||||
processing_took: number | null;
|
processing_took: number | null;
|
||||||
waiting_took: number | null;
|
waiting_took: number | null;
|
||||||
elapsed_time_s: number;
|
elapsed_time_s: number;
|
||||||
|
zip_url: string | null;
|
||||||
}>;
|
}>;
|
||||||
|
getGoodImages(): Record<string, Image[]>;
|
||||||
setFinished(): void;
|
setFinished(): void;
|
||||||
exec(): Promise<void>;
|
exec(): Promise<void>;
|
||||||
}
|
}
|
||||||
|
|
|
@ -46,6 +46,8 @@ RUN git clone https://github.com/muquit/grabc && cd grabc && make && make instal
|
||||||
RUN apk add clang
|
RUN apk add clang
|
||||||
RUN apk add freetype-dev
|
RUN apk add freetype-dev
|
||||||
RUN python3 -m pip install --upgrade Pillow
|
RUN python3 -m pip install --upgrade Pillow
|
||||||
|
RUN apk add zip
|
||||||
|
|
||||||
COPY . /opt
|
COPY . /opt
|
||||||
CMD /opt/prepare-firefox.sh
|
CMD /opt/prepare-firefox.sh
|
||||||
|
|
||||||
|
|
|
@ -12,9 +12,11 @@ from PIL import Image, ImageDraw, ImageFont
|
||||||
|
|
||||||
output_file_relative = sys.argv[
|
output_file_relative = sys.argv[
|
||||||
1
|
1
|
||||||
] # this is also the existing source screenshot to annotate. It will be updated in-place
|
]
|
||||||
|
|
||||||
output_file = "/opt/static/" + output_file_relative
|
output_file = "/opt/static/" + output_file_relative
|
||||||
|
output_dir = os.path.dirname(output_file)
|
||||||
|
output_suffix = os.path.basename(output_file)
|
||||||
domain = sys.argv[2]
|
domain = sys.argv[2]
|
||||||
needles = sys.argv[3:]
|
needles = sys.argv[3:]
|
||||||
|
|
||||||
|
@ -107,8 +109,9 @@ with Image.open(output_file) as im:
|
||||||
)
|
)
|
||||||
if len(found_needles) == 0:
|
if len(found_needles) == 0:
|
||||||
exit(0)
|
exit(0)
|
||||||
|
os.remove(output_file)
|
||||||
im = im.resize((im.width // 2, im.height // 2))
|
im = im.resize((im.width // 2, im.height // 2))
|
||||||
im.save(output_file, "PNG")
|
im.save(output_dir + "/" + domain.replace(".", "_") + "_" + output_suffix, "PNG")
|
||||||
print(json.dumps({"new_file":
|
print(json.dumps({"new_file":
|
||||||
{"url": base_url + "/static/" + output_file_relative,
|
{"url": base_url + "/static/" + output_file_relative,
|
||||||
"domain": domain,
|
"domain": domain,
|
||||||
|
|
|
@ -1,12 +1,14 @@
|
||||||
const IMAGE_NAME = "headless-fox";
|
const IMAGE_NAME = "headless-fox";
|
||||||
|
|
||||||
|
const VOLUME_MOUNT = `${process.cwd()}/static:/opt/static`;
|
||||||
|
|
||||||
const DOCKER_ARGS = [
|
const DOCKER_ARGS = [
|
||||||
"run",
|
"run",
|
||||||
"-i",
|
"-i",
|
||||||
"-v",
|
"-v",
|
||||||
`${process.cwd()}/static:/opt/static`,
|
VOLUME_MOUNT,
|
||||||
IMAGE_NAME,
|
IMAGE_NAME,
|
||||||
"./script3.sh",
|
"./script3.sh",
|
||||||
];
|
];
|
||||||
|
|
||||||
module.exports = { DOCKER_ARGS, IMAGE_NAME };
|
module.exports = { DOCKER_ARGS, IMAGE_NAME, VOLUME_MOUNT };
|
||||||
|
|
28
src/index.ts
28
src/index.ts
|
@ -7,7 +7,7 @@ import serve from "koa-static";
|
||||||
import qs from "qs";
|
import qs from "qs";
|
||||||
import { Readable } from "stream";
|
import { Readable } from "stream";
|
||||||
import { v4 as uuid } from "uuid";
|
import { v4 as uuid } from "uuid";
|
||||||
import { DOCKER_ARGS } from "./docker-args";
|
import { DOCKER_ARGS, IMAGE_NAME, VOLUME_MOUNT } from "./docker-args";
|
||||||
import { requests } from "./memory";
|
import { requests } from "./memory";
|
||||||
import ScreenshotRequest from "./request";
|
import ScreenshotRequest from "./request";
|
||||||
|
|
||||||
|
@ -81,11 +81,11 @@ router.get("/", async (ctx) => {
|
||||||
do {
|
do {
|
||||||
response = await (await fetch(\`/api/requests/\${id}\`)).json();
|
response = await (await fetch(\`/api/requests/\${id}\`)).json();
|
||||||
output.innerHTML = JSON.stringify(response, null, " ").replace(
|
output.innerHTML = JSON.stringify(response, null, " ").replace(
|
||||||
/\\/static\\/.*.png/g,
|
/\\/(static|api)\\/.*(.png|all-screenshots)/g,
|
||||||
'<a href="$&">$&</a>'
|
'<a href="$&">$&</a>'
|
||||||
);
|
);
|
||||||
stdout.innerHTML = response.output.replace(
|
stdout.innerHTML = response.output.replace(
|
||||||
/\\/static\\/.*.png/g,
|
/\\/(static|api)\\/.*(.png|all-screenshots)/g,
|
||||||
'<a href="$&">$&</a>'
|
'<a href="$&">$&</a>'
|
||||||
);
|
);
|
||||||
await sleep(1000);
|
await sleep(1000);
|
||||||
|
@ -173,6 +173,28 @@ router.get("/api/requests/:id", async (ctx) => {
|
||||||
ctx.body = await request.getJSON();
|
ctx.body = await request.getJSON();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
router.get("/api/requests/:id/all-screenshots", async (ctx) => {
|
||||||
|
const request = requests[ctx.params.id];
|
||||||
|
if (!request || request.status != "finished") {
|
||||||
|
ctx.status = 404;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
ctx.attachment(`${request.url.replace(/\W/g, "_")}_all-screenshots.zip`);
|
||||||
|
ctx.response.set("content-type", "application/zip");
|
||||||
|
const process = spawn("docker", [
|
||||||
|
"run",
|
||||||
|
"-v",
|
||||||
|
VOLUME_MOUNT,
|
||||||
|
IMAGE_NAME,
|
||||||
|
"zip",
|
||||||
|
"--recurse-paths",
|
||||||
|
"--junk-paths",
|
||||||
|
"-",
|
||||||
|
`/opt/static/${request.id}`,
|
||||||
|
]);
|
||||||
|
ctx.body = process.stdout;
|
||||||
|
});
|
||||||
|
|
||||||
app.use(router.routes()).use(router.allowedMethods());
|
app.use(router.routes()).use(router.allowedMethods());
|
||||||
const port = 3000;
|
const port = 3000;
|
||||||
app.listen(port);
|
app.listen(port);
|
||||||
|
|
|
@ -6,11 +6,17 @@ import { q, requests } from "./memory";
|
||||||
|
|
||||||
let queue_order: ScreenshotRequest[] = [];
|
let queue_order: ScreenshotRequest[] = [];
|
||||||
|
|
||||||
|
export type Image = {
|
||||||
|
url: string;
|
||||||
|
domain: string;
|
||||||
|
found_headers: Record<string, string>;
|
||||||
|
};
|
||||||
|
|
||||||
export default class ScreenshotRequest {
|
export default class ScreenshotRequest {
|
||||||
public id = uuid();
|
public id = uuid();
|
||||||
public status = "waiting";
|
public status = "waiting";
|
||||||
public output = "";
|
public output = "";
|
||||||
public images: Record<string, unknown>[] = [];
|
public images: Image[] = [];
|
||||||
public request_time: number = Date.now();
|
public request_time: number = Date.now();
|
||||||
public started_time: number | null = null;
|
public started_time: number | null = null;
|
||||||
public finished_time: number | null = null;
|
public finished_time: number | null = null;
|
||||||
|
@ -47,13 +53,14 @@ export default class ScreenshotRequest {
|
||||||
id: string;
|
id: string;
|
||||||
status: string;
|
status: string;
|
||||||
output: string;
|
output: string;
|
||||||
images: Record<string, unknown>[];
|
images: Record<string, Image[]>;
|
||||||
request_time: number;
|
request_time: number;
|
||||||
started_time: number | null;
|
started_time: number | null;
|
||||||
finished_time: number | null;
|
finished_time: number | null;
|
||||||
processing_took: number | null;
|
processing_took: number | null;
|
||||||
waiting_took: number | null;
|
waiting_took: number | null;
|
||||||
elapsed_time_s: number;
|
elapsed_time_s: number;
|
||||||
|
zip_url: string | null;
|
||||||
}> {
|
}> {
|
||||||
return {
|
return {
|
||||||
url: this.url,
|
url: this.url,
|
||||||
|
@ -62,7 +69,7 @@ export default class ScreenshotRequest {
|
||||||
id: this.id,
|
id: this.id,
|
||||||
status: this.status,
|
status: this.status,
|
||||||
output: this.output,
|
output: this.output,
|
||||||
images: this.images,
|
images: this.getGoodImages(),
|
||||||
request_time: this.request_time,
|
request_time: this.request_time,
|
||||||
started_time: this.started_time,
|
started_time: this.started_time,
|
||||||
finished_time: this.finished_time,
|
finished_time: this.finished_time,
|
||||||
|
@ -73,9 +80,66 @@ export default class ScreenshotRequest {
|
||||||
this.request_time) /
|
this.request_time) /
|
||||||
1000
|
1000
|
||||||
),
|
),
|
||||||
|
zip_url:
|
||||||
|
this.status === "finished"
|
||||||
|
? `/api/requests/${this.id}/all-screenshots`
|
||||||
|
: null,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
getGoodImages(): Record<string, Image[]> {
|
||||||
|
const result: Record<string, Image[]> = {};
|
||||||
|
const domains = Array.from(
|
||||||
|
new Set(this.images.map((image) => image.domain))
|
||||||
|
);
|
||||||
|
for (const domain of domains) {
|
||||||
|
const images = this.images
|
||||||
|
.filter((image) => image.domain === domain)
|
||||||
|
.sort((image1, image2) => {
|
||||||
|
if (
|
||||||
|
Object.values(image1.found_headers).length >
|
||||||
|
Object.values(image2.found_headers).length
|
||||||
|
) {
|
||||||
|
return -1;
|
||||||
|
} else if (
|
||||||
|
Object.values(image1.found_headers).length <
|
||||||
|
Object.values(image2.found_headers).length
|
||||||
|
) {
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
const all_values = Array.from(
|
||||||
|
new Set(
|
||||||
|
images
|
||||||
|
.map((image) => Object.values(image.found_headers))
|
||||||
|
.reduce((a, b) => a.concat(b))
|
||||||
|
)
|
||||||
|
);
|
||||||
|
const images_to_show = [];
|
||||||
|
const shown_values = new Set();
|
||||||
|
for (const image of images) {
|
||||||
|
const values_in_image = Object.values(image.found_headers);
|
||||||
|
let any_new_values = false;
|
||||||
|
for (const value of values_in_image) {
|
||||||
|
if (!shown_values.has(value)) {
|
||||||
|
shown_values.add(value);
|
||||||
|
any_new_values = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (any_new_values) {
|
||||||
|
images_to_show.push(image);
|
||||||
|
}
|
||||||
|
if (shown_values.size == all_values.length) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result[domain] = images_to_show;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
setFinished(): void {
|
setFinished(): void {
|
||||||
this.status = "finished";
|
this.status = "finished";
|
||||||
this.finished_time = Date.now();
|
this.finished_time = Date.now();
|
||||||
|
@ -122,7 +186,7 @@ export default class ScreenshotRequest {
|
||||||
is(parsed, predicates.object) &&
|
is(parsed, predicates.object) &&
|
||||||
is(parsed.new_file, predicates.object)
|
is(parsed.new_file, predicates.object)
|
||||||
) {
|
) {
|
||||||
this.images.push(parsed.new_file);
|
this.images.push(parsed.new_file as Image);
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
//noop
|
//noop
|
||||||
|
|
Loading…
Reference in New Issue
Block a user