screenshot-service/src/index.ts

203 lines
5.9 KiB
TypeScript
Raw Normal View History

2022-06-19 13:33:25 +02:00
import Router from "@koa/router";
import { hasShape, predicates } from "@sealcode/ts-predicates";
import { spawn } from "child_process";
import Koa from "koa";
import mount from "koa-mount";
import serve from "koa-static";
import qs from "qs";
import { Readable } from "stream";
import { v4 as uuid } from "uuid";
import { DOCKER_ARGS, IMAGE_NAME, VOLUME_MOUNT } from "./docker-args";
2022-06-19 13:33:25 +02:00
import { requests } from "./memory";
import ScreenshotRequest from "./request";
2022-04-24 17:24:27 +02:00
const router = new Router();
// response
const app = new Koa();
2022-06-19 13:33:25 +02:00
const Static = new Koa();
Static.use(serve("./static"));
2022-04-24 17:24:27 +02:00
2022-06-19 13:33:25 +02:00
app.use(mount("/static", Static));
2022-04-24 17:24:27 +02:00
2022-06-19 13:33:25 +02:00
function attach(docker_id: string, output_stream: Readable) {
2022-04-24 17:24:27 +02:00
// to prevent browser timeout
const interval = setInterval(() => output_stream.push("<span></span>"), 500);
const task = spawn("docker", ["logs", "-f", docker_id]);
2022-06-19 13:33:25 +02:00
task.stdout.on("data", (d: Buffer) => {
2022-04-24 19:57:01 +02:00
output_stream.push(d);
console.log("DATA!", d.toString());
});
2022-06-19 13:33:25 +02:00
task.stderr.on("data", (d: Buffer) => {
2022-04-24 20:04:45 +02:00
/* output_stream.push(d); */
console.log("STDERR!", d.toString());
});
2022-04-24 19:57:01 +02:00
task.stdout.on("error", (d) => {
output_stream.push(d);
});
2022-04-24 17:24:27 +02:00
task.on("close", () => {
2022-04-24 20:35:24 +02:00
output_stream.push("</pre>");
output_stream.push(/* HTML */ `<script>
clearInterval(window.interval);
</script>`);
clearInterval(interval);
output_stream.push(null);
2022-04-24 17:24:27 +02:00
});
}
router.get("/", async (ctx) => {
2022-05-25 19:27:12 +02:00
ctx.body = /* HTML */ `<!DOCTYPE html>
<html>
<body>
<form onsubmit="formSubmit(event)">
<label for="url_input">URL:</label>
<input type="text" name="url" id="url_input" />
<br />
<label for="domains">Domeny (oddzielone przecinkami):</label>
<input
type="text"
name="domains"
id="domains"
value="doubleclick.net,facebook.com"
/>
<br />
<input type="submit" />
</form>
<code><pre id="output"></pre></code>
2022-05-25 19:38:30 +02:00
<code><pre id="stdout"></pre></code>
2022-05-25 19:27:12 +02:00
</body>
<script>
async function sleep(time) {
return new Promise((resolve) => setTimeout(resolve, time));
}
async function formSubmit(e) {
e.preventDefault();
let response = { status: "sending first request..." };
const url = \`/api/requests?url=\${url_input.value}&\${domains.value
.split(",")
.map((d) => "domains[]=" + d)
.join("&")}\`;
const { id } = await (await fetch(url, { method: "post" })).json();
do {
response = await (await fetch(\`/api/requests/\${id}\`)).json();
2022-07-08 09:36:08 +02:00
output.innerHTML = JSON.stringify(response, null, " ").replace(
/\\/(static|api)\\/.*(.png|all-screenshots)/g,
2022-07-08 09:36:08 +02:00
'<a href="$&">$&</a>'
);
stdout.innerHTML = response.output.replace(
/\\/(static|api)\\/.*(.png|all-screenshots)/g,
2022-07-08 09:36:08 +02:00
'<a href="$&">$&</a>'
);
2022-05-25 19:27:12 +02:00
await sleep(1000);
} while (response.status !== "finished");
}
</script>
</html>`;
});
router.get("/preview", async (ctx) => {
2022-06-19 13:33:25 +02:00
const response = new Readable({
read() {
/*noop */
},
});
2022-04-24 17:24:27 +02:00
// stream data
ctx.response.set("content-type", "txt/html");
ctx.type = "html"; // <-- THIS is the important step!
2022-06-19 13:33:25 +02:00
ctx.body = response;
response.push("<!doctype html>");
2022-04-24 19:57:01 +02:00
const id = uuid();
2022-06-19 13:33:25 +02:00
response.push(
2022-04-24 19:57:01 +02:00
`<img id="preview" width="1080" height="608" src="/static/${id}/preview.png?id=0"/><br/>`
);
2022-06-19 13:33:25 +02:00
response.push(/* HTML */ `<script>
2022-04-24 20:35:24 +02:00
window.interval = setInterval(() => (preview.src = preview.src + "0"), 500);
2022-04-24 19:57:01 +02:00
</script>`);
2022-04-24 17:24:27 +02:00
const params = qs.parse(ctx.querystring);
2022-06-19 13:33:25 +02:00
if (!hasShape({ url: predicates.string }, params)) {
throw new Error("MISSING URL PARAM");
}
response.push(`Got request to screenshot ${params.url}<pre>`);
2022-04-24 17:24:27 +02:00
let docker_id = "";
2022-04-24 18:58:08 +02:00
if (!params.url) {
ctx.body = "specify url!";
return;
}
2022-04-24 17:24:27 +02:00
const starter = spawn(
"docker",
[
2022-05-05 21:54:34 +02:00
...DOCKER_ARGS,
2022-04-24 18:58:54 +02:00
`{"url": "${params.url}", "third_party_domains": ["hotjar.com", "cookielaw.org", "facebook.com", "gemius.pl"]}`,
2022-04-24 17:24:27 +02:00
id,
],
{ cwd: process.cwd() }
);
2022-06-19 13:33:25 +02:00
starter.stdout.on("data", (data: Buffer) => {
2022-04-24 17:24:27 +02:00
docker_id += data.toString().replace(/\n/g, "");
});
starter.on("close", () => {
2022-06-19 13:33:25 +02:00
response.push("spawned " + docker_id);
2022-04-24 17:24:27 +02:00
attach(docker_id, ctx.body);
});
});
2022-05-05 21:54:34 +02:00
router.post("/api/requests", async (ctx) => {
const params = qs.parse(ctx.querystring);
2022-06-19 13:33:25 +02:00
if (!hasShape({ url: predicates.string }, params)) {
2022-05-05 21:54:34 +02:00
ctx.body = "Specify url";
ctx.status = 422;
return;
}
2022-06-19 13:33:25 +02:00
if (!hasShape({ domains: predicates.array(predicates.string) }, params)) {
ctx.body = "Specify domains as an array of strings";
2022-05-05 21:54:34 +02:00
ctx.status = 422;
return;
}
if (!Array.isArray(params.domains)) {
ctx.body = "'domains' should be an array of strings";
ctx.status = 422;
return;
}
const request = new ScreenshotRequest(params.url, params.domains);
ctx.status = 303;
ctx.redirect(`/api/requests/${request.id}`);
});
router.get("/api/requests/:id", async (ctx) => {
const request = requests[ctx.params.id];
if (!request) {
ctx.status = 404;
return;
}
ctx.body = await request.getJSON();
});
router.get("/api/requests/:id/all-screenshots", async (ctx) => {
const request = requests[ctx.params.id];
if (!request || request.status != "finished") {
ctx.status = 404;
return;
}
ctx.attachment(`${request.url.replace(/\W/g, "_")}_all-screenshots.zip`);
ctx.response.set("content-type", "application/zip");
const process = spawn("docker", [
"run",
"-v",
VOLUME_MOUNT,
IMAGE_NAME,
"zip",
"--junk-paths",
"-",
2022-07-08 16:51:33 +02:00
...request
.getGoodImages()
.map((image) => `/opt/static/${request.id}/${image.filename}`),
]);
ctx.body = process.stdout;
});
2022-04-24 17:24:27 +02:00
app.use(router.routes()).use(router.allowedMethods());
const port = 3000;
app.listen(port);
console.log(`server started on port ${port}`);