screenshot-service/src/request.ts

204 lines
5.5 KiB
TypeScript
Raw Normal View History

2022-06-19 13:33:25 +02:00
import { is, predicates } from "@sealcode/ts-predicates";
import { ChildProcessWithoutNullStreams, spawn } from "child_process";
import { v4 as uuid } from "uuid";
import containerPool from "./container-pool";
import { q, requests } from "./memory";
let queue_order: ScreenshotRequest[] = [];
export type Image = {
url: string;
domain: string;
found_headers: Record<string, string>;
};
2022-06-19 13:33:25 +02:00
export default class ScreenshotRequest {
public id = uuid();
public status = "waiting";
public output = "";
public images: Image[] = [];
2022-06-19 13:33:25 +02:00
public request_time: number = Date.now();
public started_time: number | null = null;
public finished_time: number | null = null;
public processing_took: number | null = null;
public waiting_took: number | null = null;
public process: ChildProcessWithoutNullStreams;
constructor(public url: string, public domains: string[]) {
q.push(async () => {
return this.exec();
});
requests[this.id] = this;
queue_order.push(this);
}
getJobsAhead(): number {
if (this.status != "waiting") {
return 0;
}
let count = 0;
for (const request of queue_order) {
if (request == this) {
break;
}
count++;
}
return count;
}
async getJSON(): Promise<{
url: string;
domains: string[];
jobs_ahead: number;
id: string;
status: string;
output: string;
images: Record<string, Image[]>;
2022-06-19 13:33:25 +02:00
request_time: number;
started_time: number | null;
finished_time: number | null;
processing_took: number | null;
waiting_took: number | null;
elapsed_time_s: number;
zip_url: string | null;
2022-06-19 13:33:25 +02:00
}> {
return {
url: this.url,
domains: this.domains,
jobs_ahead: this.getJobsAhead(),
id: this.id,
status: this.status,
output: this.output,
images: this.getGoodImages(),
2022-06-19 13:33:25 +02:00
request_time: this.request_time,
started_time: this.started_time,
finished_time: this.finished_time,
processing_took: this.processing_took,
waiting_took: this.waiting_took,
elapsed_time_s: Math.round(
((this.status === "finished" ? this.finished_time || -1 : Date.now()) -
this.request_time) /
1000
),
zip_url:
this.status === "finished"
? `/api/requests/${this.id}/all-screenshots`
: null,
2022-06-19 13:33:25 +02:00
};
}
getGoodImages(): Record<string, Image[]> {
const result: Record<string, Image[]> = {};
const domains = Array.from(
new Set(this.images.map((image) => image.domain))
);
for (const domain of domains) {
const images = this.images
.filter((image) => image.domain === domain)
.sort((image1, image2) => {
if (
Object.values(image1.found_headers).length >
Object.values(image2.found_headers).length
) {
return -1;
} else if (
Object.values(image1.found_headers).length <
Object.values(image2.found_headers).length
) {
return 1;
} else {
return 0;
}
});
const all_values = Array.from(
new Set(
images
.map((image) => Object.values(image.found_headers))
.reduce((a, b) => a.concat(b))
)
);
const images_to_show = [];
const shown_values = new Set();
for (const image of images) {
const values_in_image = Object.values(image.found_headers);
let any_new_values = false;
for (const value of values_in_image) {
if (!shown_values.has(value)) {
shown_values.add(value);
any_new_values = true;
}
}
if (any_new_values) {
images_to_show.push(image);
}
if (shown_values.size == all_values.length) {
break;
}
}
result[domain] = images_to_show;
}
return result;
}
2022-06-19 13:33:25 +02:00
setFinished(): void {
this.status = "finished";
this.finished_time = Date.now();
if (this.started_time) {
this.processing_took = this.finished_time - this.started_time;
this.waiting_took = this.started_time - this.request_time;
}
}
async exec(): Promise<void> {
this.started_time = Date.now();
this.status = "running";
const container = containerPool.getContainer();
await container.waitReady();
return new Promise((resolve, reject) => {
this.process = spawn(
"docker",
[
"exec",
container.id,
"/opt/run-analysis.sh",
JSON.stringify({
url: this.url,
third_party_domains: this.domains,
}),
this.id,
],
{ cwd: process.cwd() }
);
this.process.on("close", (exitCode) => {
this.setFinished();
container.close();
queue_order = queue_order.filter((request) => request != this);
if (exitCode === 0) {
resolve();
} else {
reject();
}
});
this.process.stdout.on("data", (d: Buffer) => {
try {
const parsed = JSON.parse(d.toString()) as unknown;
if (
is(parsed, predicates.object) &&
is(parsed.new_file, predicates.object)
) {
this.images.push(parsed.new_file as Image);
2022-06-19 13:33:25 +02:00
}
} catch (e) {
//noop
}
this.output += d.toString();
/* console.log("DATA!", d.toString()); */
});
this.process.stderr.on("data", (d: Buffer) => {
this.output += d.toString();
/* console.log("STDERR!", d.toString()); */
});
});
}
}