Allow for zip downloads. Show only interesting screenshots
This commit is contained in:
parent
04670e3236
commit
89d6134f30
1
@types/src/docker-args.d.ts
vendored
1
@types/src/docker-args.d.ts
vendored
@ -1,2 +1,3 @@
|
||||
export const DOCKER_ARGS: string[];
|
||||
export const IMAGE_NAME: "headless-fox";
|
||||
export const VOLUME_MOUNT: string;
|
||||
|
11
@types/src/request.d.ts
vendored
11
@types/src/request.d.ts
vendored
@ -1,12 +1,17 @@
|
||||
/// <reference types="node" />
|
||||
import { ChildProcessWithoutNullStreams } from "child_process";
|
||||
export declare type Image = {
|
||||
url: string;
|
||||
domain: string;
|
||||
found_headers: Record<string, string>;
|
||||
};
|
||||
export default class ScreenshotRequest {
|
||||
url: string;
|
||||
domains: string[];
|
||||
id: string;
|
||||
status: string;
|
||||
output: string;
|
||||
images: Record<string, unknown>[];
|
||||
images: Image[];
|
||||
request_time: number;
|
||||
started_time: number | null;
|
||||
finished_time: number | null;
|
||||
@ -22,14 +27,16 @@ export default class ScreenshotRequest {
|
||||
id: string;
|
||||
status: string;
|
||||
output: string;
|
||||
images: Record<string, unknown>[];
|
||||
images: Record<string, Image[]>;
|
||||
request_time: number;
|
||||
started_time: number | null;
|
||||
finished_time: number | null;
|
||||
processing_took: number | null;
|
||||
waiting_took: number | null;
|
||||
elapsed_time_s: number;
|
||||
zip_url: string | null;
|
||||
}>;
|
||||
getGoodImages(): Record<string, Image[]>;
|
||||
setFinished(): void;
|
||||
exec(): Promise<void>;
|
||||
}
|
||||
|
@ -46,6 +46,8 @@ RUN git clone https://github.com/muquit/grabc && cd grabc && make && make instal
|
||||
RUN apk add clang
|
||||
RUN apk add freetype-dev
|
||||
RUN python3 -m pip install --upgrade Pillow
|
||||
RUN apk add zip
|
||||
|
||||
COPY . /opt
|
||||
CMD /opt/prepare-firefox.sh
|
||||
|
||||
|
@ -12,9 +12,11 @@ from PIL import Image, ImageDraw, ImageFont
|
||||
|
||||
output_file_relative = sys.argv[
|
||||
1
|
||||
] # this is also the existing source screenshot to annotate. It will be updated in-place
|
||||
]
|
||||
|
||||
output_file = "/opt/static/" + output_file_relative
|
||||
output_dir = os.path.dirname(output_file)
|
||||
output_suffix = os.path.basename(output_file)
|
||||
domain = sys.argv[2]
|
||||
needles = sys.argv[3:]
|
||||
|
||||
@ -107,8 +109,9 @@ with Image.open(output_file) as im:
|
||||
)
|
||||
if len(found_needles) == 0:
|
||||
exit(0)
|
||||
os.remove(output_file)
|
||||
im = im.resize((im.width // 2, im.height // 2))
|
||||
im.save(output_file, "PNG")
|
||||
im.save(output_dir + "/" + domain.replace(".", "_") + "_" + output_suffix, "PNG")
|
||||
print(json.dumps({"new_file":
|
||||
{"url": base_url + "/static/" + output_file_relative,
|
||||
"domain": domain,
|
||||
|
@ -1,12 +1,14 @@
|
||||
const IMAGE_NAME = "headless-fox";
|
||||
|
||||
const VOLUME_MOUNT = `${process.cwd()}/static:/opt/static`;
|
||||
|
||||
const DOCKER_ARGS = [
|
||||
"run",
|
||||
"-i",
|
||||
"-v",
|
||||
`${process.cwd()}/static:/opt/static`,
|
||||
VOLUME_MOUNT,
|
||||
IMAGE_NAME,
|
||||
"./script3.sh",
|
||||
];
|
||||
|
||||
module.exports = { DOCKER_ARGS, IMAGE_NAME };
|
||||
module.exports = { DOCKER_ARGS, IMAGE_NAME, VOLUME_MOUNT };
|
||||
|
28
src/index.ts
28
src/index.ts
@ -7,7 +7,7 @@ import serve from "koa-static";
|
||||
import qs from "qs";
|
||||
import { Readable } from "stream";
|
||||
import { v4 as uuid } from "uuid";
|
||||
import { DOCKER_ARGS } from "./docker-args";
|
||||
import { DOCKER_ARGS, IMAGE_NAME, VOLUME_MOUNT } from "./docker-args";
|
||||
import { requests } from "./memory";
|
||||
import ScreenshotRequest from "./request";
|
||||
|
||||
@ -81,11 +81,11 @@ router.get("/", async (ctx) => {
|
||||
do {
|
||||
response = await (await fetch(\`/api/requests/\${id}\`)).json();
|
||||
output.innerHTML = JSON.stringify(response, null, " ").replace(
|
||||
/\\/static\\/.*.png/g,
|
||||
/\\/(static|api)\\/.*(.png|all-screenshots)/g,
|
||||
'<a href="$&">$&</a>'
|
||||
);
|
||||
stdout.innerHTML = response.output.replace(
|
||||
/\\/static\\/.*.png/g,
|
||||
/\\/(static|api)\\/.*(.png|all-screenshots)/g,
|
||||
'<a href="$&">$&</a>'
|
||||
);
|
||||
await sleep(1000);
|
||||
@ -173,6 +173,28 @@ router.get("/api/requests/:id", async (ctx) => {
|
||||
ctx.body = await request.getJSON();
|
||||
});
|
||||
|
||||
router.get("/api/requests/:id/all-screenshots", async (ctx) => {
|
||||
const request = requests[ctx.params.id];
|
||||
if (!request || request.status != "finished") {
|
||||
ctx.status = 404;
|
||||
return;
|
||||
}
|
||||
ctx.attachment(`${request.url.replace(/\W/g, "_")}_all-screenshots.zip`);
|
||||
ctx.response.set("content-type", "application/zip");
|
||||
const process = spawn("docker", [
|
||||
"run",
|
||||
"-v",
|
||||
VOLUME_MOUNT,
|
||||
IMAGE_NAME,
|
||||
"zip",
|
||||
"--recurse-paths",
|
||||
"--junk-paths",
|
||||
"-",
|
||||
`/opt/static/${request.id}`,
|
||||
]);
|
||||
ctx.body = process.stdout;
|
||||
});
|
||||
|
||||
app.use(router.routes()).use(router.allowedMethods());
|
||||
const port = 3000;
|
||||
app.listen(port);
|
||||
|
@ -6,11 +6,17 @@ import { q, requests } from "./memory";
|
||||
|
||||
let queue_order: ScreenshotRequest[] = [];
|
||||
|
||||
export type Image = {
|
||||
url: string;
|
||||
domain: string;
|
||||
found_headers: Record<string, string>;
|
||||
};
|
||||
|
||||
export default class ScreenshotRequest {
|
||||
public id = uuid();
|
||||
public status = "waiting";
|
||||
public output = "";
|
||||
public images: Record<string, unknown>[] = [];
|
||||
public images: Image[] = [];
|
||||
public request_time: number = Date.now();
|
||||
public started_time: number | null = null;
|
||||
public finished_time: number | null = null;
|
||||
@ -47,13 +53,14 @@ export default class ScreenshotRequest {
|
||||
id: string;
|
||||
status: string;
|
||||
output: string;
|
||||
images: Record<string, unknown>[];
|
||||
images: Record<string, Image[]>;
|
||||
request_time: number;
|
||||
started_time: number | null;
|
||||
finished_time: number | null;
|
||||
processing_took: number | null;
|
||||
waiting_took: number | null;
|
||||
elapsed_time_s: number;
|
||||
zip_url: string | null;
|
||||
}> {
|
||||
return {
|
||||
url: this.url,
|
||||
@ -62,7 +69,7 @@ export default class ScreenshotRequest {
|
||||
id: this.id,
|
||||
status: this.status,
|
||||
output: this.output,
|
||||
images: this.images,
|
||||
images: this.getGoodImages(),
|
||||
request_time: this.request_time,
|
||||
started_time: this.started_time,
|
||||
finished_time: this.finished_time,
|
||||
@ -73,9 +80,66 @@ export default class ScreenshotRequest {
|
||||
this.request_time) /
|
||||
1000
|
||||
),
|
||||
zip_url:
|
||||
this.status === "finished"
|
||||
? `/api/requests/${this.id}/all-screenshots`
|
||||
: null,
|
||||
};
|
||||
}
|
||||
|
||||
getGoodImages(): Record<string, Image[]> {
|
||||
const result: Record<string, Image[]> = {};
|
||||
const domains = Array.from(
|
||||
new Set(this.images.map((image) => image.domain))
|
||||
);
|
||||
for (const domain of domains) {
|
||||
const images = this.images
|
||||
.filter((image) => image.domain === domain)
|
||||
.sort((image1, image2) => {
|
||||
if (
|
||||
Object.values(image1.found_headers).length >
|
||||
Object.values(image2.found_headers).length
|
||||
) {
|
||||
return -1;
|
||||
} else if (
|
||||
Object.values(image1.found_headers).length <
|
||||
Object.values(image2.found_headers).length
|
||||
) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
});
|
||||
const all_values = Array.from(
|
||||
new Set(
|
||||
images
|
||||
.map((image) => Object.values(image.found_headers))
|
||||
.reduce((a, b) => a.concat(b))
|
||||
)
|
||||
);
|
||||
const images_to_show = [];
|
||||
const shown_values = new Set();
|
||||
for (const image of images) {
|
||||
const values_in_image = Object.values(image.found_headers);
|
||||
let any_new_values = false;
|
||||
for (const value of values_in_image) {
|
||||
if (!shown_values.has(value)) {
|
||||
shown_values.add(value);
|
||||
any_new_values = true;
|
||||
}
|
||||
}
|
||||
if (any_new_values) {
|
||||
images_to_show.push(image);
|
||||
}
|
||||
if (shown_values.size == all_values.length) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
result[domain] = images_to_show;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
setFinished(): void {
|
||||
this.status = "finished";
|
||||
this.finished_time = Date.now();
|
||||
@ -122,7 +186,7 @@ export default class ScreenshotRequest {
|
||||
is(parsed, predicates.object) &&
|
||||
is(parsed.new_file, predicates.object)
|
||||
) {
|
||||
this.images.push(parsed.new_file);
|
||||
this.images.push(parsed.new_file as Image);
|
||||
}
|
||||
} catch (e) {
|
||||
//noop
|
||||
|
Loading…
Reference in New Issue
Block a user