2022-06-19 13:33:25 +02:00
|
|
|
import { is, predicates } from "@sealcode/ts-predicates";
|
|
|
|
import { ChildProcessWithoutNullStreams, spawn } from "child_process";
|
|
|
|
import { v4 as uuid } from "uuid";
|
|
|
|
import containerPool from "./container-pool";
|
|
|
|
import { q, requests } from "./memory";
|
|
|
|
|
|
|
|
let queue_order: ScreenshotRequest[] = [];
|
|
|
|
|
2022-07-08 16:36:48 +02:00
|
|
|
export type Image = {
|
|
|
|
url: string;
|
|
|
|
domain: string;
|
|
|
|
found_headers: Record<string, string>;
|
|
|
|
};
|
|
|
|
|
2022-06-19 13:33:25 +02:00
|
|
|
export default class ScreenshotRequest {
|
|
|
|
public id = uuid();
|
|
|
|
public status = "waiting";
|
|
|
|
public output = "";
|
2022-07-08 16:36:48 +02:00
|
|
|
public images: Image[] = [];
|
2022-06-19 13:33:25 +02:00
|
|
|
public request_time: number = Date.now();
|
|
|
|
public started_time: number | null = null;
|
|
|
|
public finished_time: number | null = null;
|
|
|
|
public processing_took: number | null = null;
|
|
|
|
public waiting_took: number | null = null;
|
|
|
|
public process: ChildProcessWithoutNullStreams;
|
|
|
|
|
|
|
|
constructor(public url: string, public domains: string[]) {
|
|
|
|
q.push(async () => {
|
|
|
|
return this.exec();
|
|
|
|
});
|
|
|
|
requests[this.id] = this;
|
|
|
|
queue_order.push(this);
|
|
|
|
}
|
|
|
|
|
|
|
|
getJobsAhead(): number {
|
|
|
|
if (this.status != "waiting") {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
let count = 0;
|
|
|
|
for (const request of queue_order) {
|
|
|
|
if (request == this) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
count++;
|
|
|
|
}
|
|
|
|
return count;
|
|
|
|
}
|
|
|
|
|
|
|
|
async getJSON(): Promise<{
|
|
|
|
url: string;
|
|
|
|
domains: string[];
|
|
|
|
jobs_ahead: number;
|
|
|
|
id: string;
|
|
|
|
status: string;
|
|
|
|
output: string;
|
2022-07-08 16:36:48 +02:00
|
|
|
images: Record<string, Image[]>;
|
2022-06-19 13:33:25 +02:00
|
|
|
request_time: number;
|
|
|
|
started_time: number | null;
|
|
|
|
finished_time: number | null;
|
|
|
|
processing_took: number | null;
|
|
|
|
waiting_took: number | null;
|
|
|
|
elapsed_time_s: number;
|
2022-07-08 16:36:48 +02:00
|
|
|
zip_url: string | null;
|
2022-06-19 13:33:25 +02:00
|
|
|
}> {
|
|
|
|
return {
|
|
|
|
url: this.url,
|
|
|
|
domains: this.domains,
|
|
|
|
jobs_ahead: this.getJobsAhead(),
|
|
|
|
id: this.id,
|
|
|
|
status: this.status,
|
|
|
|
output: this.output,
|
2022-07-08 16:36:48 +02:00
|
|
|
images: this.getGoodImages(),
|
2022-06-19 13:33:25 +02:00
|
|
|
request_time: this.request_time,
|
|
|
|
started_time: this.started_time,
|
|
|
|
finished_time: this.finished_time,
|
|
|
|
processing_took: this.processing_took,
|
|
|
|
waiting_took: this.waiting_took,
|
|
|
|
elapsed_time_s: Math.round(
|
|
|
|
((this.status === "finished" ? this.finished_time || -1 : Date.now()) -
|
|
|
|
this.request_time) /
|
|
|
|
1000
|
|
|
|
),
|
2022-07-08 16:36:48 +02:00
|
|
|
zip_url:
|
|
|
|
this.status === "finished"
|
|
|
|
? `/api/requests/${this.id}/all-screenshots`
|
|
|
|
: null,
|
2022-06-19 13:33:25 +02:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2022-07-08 16:36:48 +02:00
|
|
|
getGoodImages(): Record<string, Image[]> {
|
|
|
|
const result: Record<string, Image[]> = {};
|
|
|
|
const domains = Array.from(
|
|
|
|
new Set(this.images.map((image) => image.domain))
|
|
|
|
);
|
|
|
|
for (const domain of domains) {
|
|
|
|
const images = this.images
|
|
|
|
.filter((image) => image.domain === domain)
|
|
|
|
.sort((image1, image2) => {
|
|
|
|
if (
|
|
|
|
Object.values(image1.found_headers).length >
|
|
|
|
Object.values(image2.found_headers).length
|
|
|
|
) {
|
|
|
|
return -1;
|
|
|
|
} else if (
|
|
|
|
Object.values(image1.found_headers).length <
|
|
|
|
Object.values(image2.found_headers).length
|
|
|
|
) {
|
|
|
|
return 1;
|
|
|
|
} else {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
});
|
|
|
|
const all_values = Array.from(
|
|
|
|
new Set(
|
|
|
|
images
|
|
|
|
.map((image) => Object.values(image.found_headers))
|
|
|
|
.reduce((a, b) => a.concat(b))
|
|
|
|
)
|
|
|
|
);
|
|
|
|
const images_to_show = [];
|
|
|
|
const shown_values = new Set();
|
|
|
|
for (const image of images) {
|
|
|
|
const values_in_image = Object.values(image.found_headers);
|
|
|
|
let any_new_values = false;
|
|
|
|
for (const value of values_in_image) {
|
|
|
|
if (!shown_values.has(value)) {
|
|
|
|
shown_values.add(value);
|
|
|
|
any_new_values = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (any_new_values) {
|
|
|
|
images_to_show.push(image);
|
|
|
|
}
|
|
|
|
if (shown_values.size == all_values.length) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
result[domain] = images_to_show;
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2022-06-19 13:33:25 +02:00
|
|
|
setFinished(): void {
|
|
|
|
this.status = "finished";
|
|
|
|
this.finished_time = Date.now();
|
|
|
|
if (this.started_time) {
|
|
|
|
this.processing_took = this.finished_time - this.started_time;
|
|
|
|
this.waiting_took = this.started_time - this.request_time;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
async exec(): Promise<void> {
|
|
|
|
this.started_time = Date.now();
|
|
|
|
this.status = "running";
|
|
|
|
const container = containerPool.getContainer();
|
|
|
|
await container.waitReady();
|
|
|
|
return new Promise((resolve, reject) => {
|
|
|
|
this.process = spawn(
|
|
|
|
"docker",
|
|
|
|
[
|
|
|
|
"exec",
|
|
|
|
container.id,
|
|
|
|
"/opt/run-analysis.sh",
|
|
|
|
JSON.stringify({
|
|
|
|
url: this.url,
|
|
|
|
third_party_domains: this.domains,
|
|
|
|
}),
|
|
|
|
this.id,
|
|
|
|
],
|
|
|
|
{ cwd: process.cwd() }
|
|
|
|
);
|
|
|
|
this.process.on("close", (exitCode) => {
|
|
|
|
this.setFinished();
|
|
|
|
container.close();
|
|
|
|
queue_order = queue_order.filter((request) => request != this);
|
|
|
|
if (exitCode === 0) {
|
|
|
|
resolve();
|
|
|
|
} else {
|
|
|
|
reject();
|
|
|
|
}
|
|
|
|
});
|
|
|
|
this.process.stdout.on("data", (d: Buffer) => {
|
|
|
|
try {
|
|
|
|
const parsed = JSON.parse(d.toString()) as unknown;
|
|
|
|
if (
|
|
|
|
is(parsed, predicates.object) &&
|
|
|
|
is(parsed.new_file, predicates.object)
|
|
|
|
) {
|
2022-07-08 16:36:48 +02:00
|
|
|
this.images.push(parsed.new_file as Image);
|
2022-06-19 13:33:25 +02:00
|
|
|
}
|
|
|
|
} catch (e) {
|
|
|
|
//noop
|
|
|
|
}
|
|
|
|
this.output += d.toString();
|
|
|
|
/* console.log("DATA!", d.toString()); */
|
|
|
|
});
|
|
|
|
this.process.stderr.on("data", (d: Buffer) => {
|
|
|
|
this.output += d.toString();
|
|
|
|
/* console.log("STDERR!", d.toString()); */
|
|
|
|
});
|
|
|
|
});
|
|
|
|
}
|
|
|
|
}
|