2022-06-19 13:33:25 +02:00
|
|
|
import Router from "@koa/router";
|
|
|
|
import { hasShape, predicates } from "@sealcode/ts-predicates";
|
|
|
|
import { spawn } from "child_process";
|
|
|
|
import Koa from "koa";
|
|
|
|
import mount from "koa-mount";
|
|
|
|
import serve from "koa-static";
|
|
|
|
import qs from "qs";
|
|
|
|
import { Readable } from "stream";
|
|
|
|
import { v4 as uuid } from "uuid";
|
2022-07-08 16:36:48 +02:00
|
|
|
import { DOCKER_ARGS, IMAGE_NAME, VOLUME_MOUNT } from "./docker-args";
|
2022-06-19 13:33:25 +02:00
|
|
|
import { requests } from "./memory";
|
|
|
|
import ScreenshotRequest from "./request";
|
2022-04-24 17:24:27 +02:00
|
|
|
|
|
|
|
const router = new Router();
|
|
|
|
|
|
|
|
// response
|
|
|
|
const app = new Koa();
|
2022-06-19 13:33:25 +02:00
|
|
|
const Static = new Koa();
|
|
|
|
Static.use(serve("./static"));
|
2022-04-24 17:24:27 +02:00
|
|
|
|
2022-06-19 13:33:25 +02:00
|
|
|
app.use(mount("/static", Static));
|
2022-04-24 17:24:27 +02:00
|
|
|
|
2022-06-19 13:33:25 +02:00
|
|
|
function attach(docker_id: string, output_stream: Readable) {
|
2022-04-24 17:24:27 +02:00
|
|
|
// to prevent browser timeout
|
|
|
|
const interval = setInterval(() => output_stream.push("<span></span>"), 500);
|
|
|
|
const task = spawn("docker", ["logs", "-f", docker_id]);
|
2022-06-19 13:33:25 +02:00
|
|
|
task.stdout.on("data", (d: Buffer) => {
|
2022-04-24 19:57:01 +02:00
|
|
|
output_stream.push(d);
|
|
|
|
console.log("DATA!", d.toString());
|
|
|
|
});
|
2022-06-19 13:33:25 +02:00
|
|
|
task.stderr.on("data", (d: Buffer) => {
|
2022-04-24 20:04:45 +02:00
|
|
|
/* output_stream.push(d); */
|
|
|
|
console.log("STDERR!", d.toString());
|
|
|
|
});
|
2022-04-24 19:57:01 +02:00
|
|
|
task.stdout.on("error", (d) => {
|
|
|
|
output_stream.push(d);
|
|
|
|
});
|
2022-04-24 17:24:27 +02:00
|
|
|
task.on("close", () => {
|
2022-04-24 20:35:24 +02:00
|
|
|
output_stream.push("</pre>");
|
|
|
|
output_stream.push(/* HTML */ `<script>
|
|
|
|
clearInterval(window.interval);
|
|
|
|
</script>`);
|
|
|
|
clearInterval(interval);
|
|
|
|
output_stream.push(null);
|
2022-04-24 17:24:27 +02:00
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
router.get("/", async (ctx) => {
|
2022-05-25 19:27:12 +02:00
|
|
|
ctx.body = /* HTML */ `<!DOCTYPE html>
|
|
|
|
<html>
|
|
|
|
<body>
|
|
|
|
<form onsubmit="formSubmit(event)">
|
|
|
|
<label for="url_input">URL:</label>
|
|
|
|
<input type="text" name="url" id="url_input" />
|
|
|
|
<br />
|
|
|
|
<label for="domains">Domeny (oddzielone przecinkami):</label>
|
|
|
|
<input
|
|
|
|
type="text"
|
|
|
|
name="domains"
|
|
|
|
id="domains"
|
|
|
|
value="doubleclick.net,facebook.com"
|
|
|
|
/>
|
|
|
|
<br />
|
|
|
|
<input type="submit" />
|
|
|
|
</form>
|
|
|
|
<code><pre id="output"></pre></code>
|
2022-05-25 19:38:30 +02:00
|
|
|
<code><pre id="stdout"></pre></code>
|
2022-05-25 19:27:12 +02:00
|
|
|
</body>
|
|
|
|
<script>
|
|
|
|
async function sleep(time) {
|
|
|
|
return new Promise((resolve) => setTimeout(resolve, time));
|
|
|
|
}
|
|
|
|
async function formSubmit(e) {
|
|
|
|
e.preventDefault();
|
|
|
|
let response = { status: "sending first request..." };
|
|
|
|
const url = \`/api/requests?url=\${url_input.value}&\${domains.value
|
|
|
|
.split(",")
|
|
|
|
.map((d) => "domains[]=" + d)
|
|
|
|
.join("&")}\`;
|
|
|
|
const { id } = await (await fetch(url, { method: "post" })).json();
|
|
|
|
do {
|
|
|
|
response = await (await fetch(\`/api/requests/\${id}\`)).json();
|
2022-07-08 09:36:08 +02:00
|
|
|
output.innerHTML = JSON.stringify(response, null, " ").replace(
|
2022-07-15 09:34:58 +02:00
|
|
|
/\\/(static|api)\\/.*(.png|all-screenshots|v=[0-9]+)/g,
|
2022-07-08 09:36:08 +02:00
|
|
|
'<a href="$&">$&</a>'
|
|
|
|
);
|
|
|
|
stdout.innerHTML = response.output.replace(
|
2022-07-08 16:36:48 +02:00
|
|
|
/\\/(static|api)\\/.*(.png|all-screenshots)/g,
|
2022-07-08 09:36:08 +02:00
|
|
|
'<a href="$&">$&</a>'
|
|
|
|
);
|
2022-05-25 19:27:12 +02:00
|
|
|
await sleep(1000);
|
|
|
|
} while (response.status !== "finished");
|
|
|
|
}
|
|
|
|
</script>
|
|
|
|
</html>`;
|
|
|
|
});
|
|
|
|
|
|
|
|
router.get("/preview", async (ctx) => {
|
2022-06-19 13:33:25 +02:00
|
|
|
const response = new Readable({
|
|
|
|
read() {
|
|
|
|
/*noop */
|
|
|
|
},
|
|
|
|
});
|
2022-04-24 17:24:27 +02:00
|
|
|
// stream data
|
|
|
|
ctx.response.set("content-type", "txt/html");
|
|
|
|
ctx.type = "html"; // <-- THIS is the important step!
|
2022-06-19 13:33:25 +02:00
|
|
|
ctx.body = response;
|
|
|
|
response.push("<!doctype html>");
|
2022-04-24 19:57:01 +02:00
|
|
|
const id = uuid();
|
2022-06-19 13:33:25 +02:00
|
|
|
response.push(
|
2022-04-24 19:57:01 +02:00
|
|
|
`<img id="preview" width="1080" height="608" src="/static/${id}/preview.png?id=0"/><br/>`
|
|
|
|
);
|
2022-06-19 13:33:25 +02:00
|
|
|
response.push(/* HTML */ `<script>
|
2022-04-24 20:35:24 +02:00
|
|
|
window.interval = setInterval(() => (preview.src = preview.src + "0"), 500);
|
2022-04-24 19:57:01 +02:00
|
|
|
</script>`);
|
2022-04-24 17:24:27 +02:00
|
|
|
const params = qs.parse(ctx.querystring);
|
2022-06-19 13:33:25 +02:00
|
|
|
if (!hasShape({ url: predicates.string }, params)) {
|
|
|
|
throw new Error("MISSING URL PARAM");
|
|
|
|
}
|
|
|
|
response.push(`Got request to screenshot ${params.url}<pre>`);
|
2022-04-24 17:24:27 +02:00
|
|
|
let docker_id = "";
|
2022-04-24 18:58:08 +02:00
|
|
|
if (!params.url) {
|
|
|
|
ctx.body = "specify url!";
|
|
|
|
return;
|
|
|
|
}
|
2022-04-24 17:24:27 +02:00
|
|
|
const starter = spawn(
|
|
|
|
"docker",
|
|
|
|
[
|
2022-05-05 21:54:34 +02:00
|
|
|
...DOCKER_ARGS,
|
2022-04-24 18:58:54 +02:00
|
|
|
`{"url": "${params.url}", "third_party_domains": ["hotjar.com", "cookielaw.org", "facebook.com", "gemius.pl"]}`,
|
2022-04-24 17:24:27 +02:00
|
|
|
id,
|
|
|
|
],
|
|
|
|
{ cwd: process.cwd() }
|
|
|
|
);
|
2022-06-19 13:33:25 +02:00
|
|
|
starter.stdout.on("data", (data: Buffer) => {
|
2022-04-24 17:24:27 +02:00
|
|
|
docker_id += data.toString().replace(/\n/g, "");
|
|
|
|
});
|
|
|
|
|
|
|
|
starter.on("close", () => {
|
2022-06-19 13:33:25 +02:00
|
|
|
response.push("spawned " + docker_id);
|
2022-04-24 17:24:27 +02:00
|
|
|
attach(docker_id, ctx.body);
|
|
|
|
});
|
|
|
|
});
|
|
|
|
|
2022-05-05 21:54:34 +02:00
|
|
|
router.post("/api/requests", async (ctx) => {
|
|
|
|
const params = qs.parse(ctx.querystring);
|
2022-06-19 13:33:25 +02:00
|
|
|
if (!hasShape({ url: predicates.string }, params)) {
|
2022-05-05 21:54:34 +02:00
|
|
|
ctx.body = "Specify url";
|
|
|
|
ctx.status = 422;
|
|
|
|
return;
|
|
|
|
}
|
2022-06-19 13:33:25 +02:00
|
|
|
if (!hasShape({ domains: predicates.array(predicates.string) }, params)) {
|
|
|
|
ctx.body = "Specify domains as an array of strings";
|
2022-05-05 21:54:34 +02:00
|
|
|
ctx.status = 422;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (!Array.isArray(params.domains)) {
|
|
|
|
ctx.body = "'domains' should be an array of strings";
|
|
|
|
ctx.status = 422;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
const request = new ScreenshotRequest(params.url, params.domains);
|
|
|
|
ctx.status = 303;
|
|
|
|
ctx.redirect(`/api/requests/${request.id}`);
|
|
|
|
});
|
|
|
|
|
|
|
|
router.get("/api/requests/:id", async (ctx) => {
|
|
|
|
const request = requests[ctx.params.id];
|
|
|
|
if (!request) {
|
|
|
|
ctx.status = 404;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
ctx.body = await request.getJSON();
|
|
|
|
});
|
|
|
|
|
2022-07-08 16:36:48 +02:00
|
|
|
router.get("/api/requests/:id/all-screenshots", async (ctx) => {
|
|
|
|
const request = requests[ctx.params.id];
|
|
|
|
if (!request || request.status != "finished") {
|
|
|
|
ctx.status = 404;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
ctx.attachment(`${request.url.replace(/\W/g, "_")}_all-screenshots.zip`);
|
|
|
|
ctx.response.set("content-type", "application/zip");
|
|
|
|
const process = spawn("docker", [
|
|
|
|
"run",
|
|
|
|
"-v",
|
|
|
|
VOLUME_MOUNT,
|
|
|
|
IMAGE_NAME,
|
|
|
|
"zip",
|
|
|
|
"--junk-paths",
|
|
|
|
"-",
|
2022-07-08 16:51:33 +02:00
|
|
|
...request
|
|
|
|
.getGoodImages()
|
|
|
|
.map((image) => `/opt/static/${request.id}/${image.filename}`),
|
2022-07-08 16:36:48 +02:00
|
|
|
]);
|
|
|
|
ctx.body = process.stdout;
|
|
|
|
});
|
|
|
|
|
2022-04-24 17:24:27 +02:00
|
|
|
app.use(router.routes()).use(router.allowedMethods());
|
2022-07-08 08:59:56 +02:00
|
|
|
const port = 3000;
|
|
|
|
app.listen(port);
|
|
|
|
console.log(`server started on port ${port}`);
|