import { is, predicates } from "@sealcode/ts-predicates"; import { ChildProcessWithoutNullStreams, spawn } from "child_process"; import { v4 as uuid } from "uuid"; import containerPool from "./container-pool"; import { q, requests } from "./memory"; let queue_order: ScreenshotRequest[] = []; export type Image = { url: string; domain: string; found_headers: Record; filename: string; }; export default class ScreenshotRequest { public id = uuid(); public status = "waiting"; public output = ""; public images: Image[] = []; public request_time: number = Date.now(); public started_time: number | null = null; public finished_time: number | null = null; public processing_took: number | null = null; public waiting_took: number | null = null; public process: ChildProcessWithoutNullStreams; constructor(public url: string, public domains: string[]) { q.push(async () => { return this.exec(); }); requests[this.id] = this; queue_order.push(this); } getJobsAhead(): number { if (this.status != "waiting") { return 0; } let count = 0; for (const request of queue_order) { if (request == this) { break; } count++; } return count; } async getJSON(): Promise<{ url: string; domains: string[]; jobs_ahead: number; id: string; status: string; output: string; images: Image[]; request_time: number; started_time: number | null; finished_time: number | null; processing_took: number | null; waiting_took: number | null; elapsed_time_s: number; zip_url: string | null; }> { return { url: this.url, domains: this.domains, jobs_ahead: this.getJobsAhead(), id: this.id, status: this.status, output: this.output, images: this.getGoodImages(), request_time: this.request_time, started_time: this.started_time, finished_time: this.finished_time, processing_took: this.processing_took, waiting_took: this.waiting_took, elapsed_time_s: Math.round( ((this.status === "finished" ? this.finished_time || -1 : Date.now()) - this.request_time) / 1000 ), zip_url: this.status === "finished" ? `/api/requests/${this.id}/all-screenshots` : null, }; } getGoodImages(): Image[] { const result: Image[] = []; const domains = Array.from( new Set(this.images.map((image) => image.domain)) ); for (const domain of domains) { const images = this.images .filter((image) => image.domain === domain) .sort((image1, image2) => { if ( Object.values(image1.found_headers).length > Object.values(image2.found_headers).length ) { return -1; } else if ( Object.values(image1.found_headers).length < Object.values(image2.found_headers).length ) { return 1; } else { // same amount of headers, see who has longest values if ( Object.values(image1.found_headers).join("").length > Object.values(image2.found_headers).join("").length ) { return -1; } else if ( Object.values(image1.found_headers).join("").length < Object.values(image2.found_headers).join("").length ) { return 1; } else { return 0; } } }); const all_values = Array.from( new Set( images .map((image) => Object.values(image.found_headers)) .reduce((a, b) => a.concat(b)) ) ); const shown_values_for_domain = new Set(); for (const image of images) { const values_in_image = Object.values(image.found_headers); let any_new_values = false; for (const value of values_in_image) { if (!shown_values_for_domain.has(value)) { shown_values_for_domain.add(value); any_new_values = true; } } if (any_new_values) { result.push(image); } if (shown_values_for_domain.size == all_values.length) { break; } } } return result; } setFinished(): void { this.status = "finished"; this.finished_time = Date.now(); if (this.started_time) { this.processing_took = this.finished_time - this.started_time; this.waiting_took = this.started_time - this.request_time; } } async exec(): Promise { this.started_time = Date.now(); this.status = "running"; const container = containerPool.getContainer(); await container.waitReady(); return new Promise((resolve, reject) => { this.process = spawn( "docker", [ "exec", container.id, "/opt/run-analysis.sh", JSON.stringify({ url: this.url, third_party_domains: this.domains, }), this.id, ], { cwd: process.cwd() } ); this.process.on("close", (exitCode) => { this.setFinished(); container.close(); queue_order = queue_order.filter((request) => request != this); if (exitCode === 0) { resolve(); } else { reject(); } }); this.process.stdout.on("data", (d: Buffer) => { try { const parsed = JSON.parse(d.toString()) as unknown; if ( is(parsed, predicates.object) && is(parsed.new_file, predicates.object) ) { this.images.push(parsed.new_file as Image); } } catch (e) { //noop } this.output += d.toString(); /* console.log("DATA!", d.toString()); */ }); this.process.stderr.on("data", (d: Buffer) => { this.output += d.toString(); /* console.log("STDERR!", d.toString()); */ }); }); } }