Generate thumbnails of screenshots

This commit is contained in:
Kuba Orlik 2022-07-15 11:36:44 +02:00
parent c6e20fdb6f
commit e003190c2f
5 changed files with 48 additions and 19 deletions

1
.gitignore vendored
View File

@ -4,3 +4,4 @@
/.log/ /.log/
/yarn-error.log /yarn-error.log
/lib/ /lib/
@types

View File

@ -112,10 +112,17 @@ with Image.open(output_file) as im:
os.remove(output_file) os.remove(output_file)
im = im.crop((0, 24, 2880, 1588)) im = im.crop((0, 24, 2880, 1588))
im = im.resize((im.width // 2, im.height // 2)) im = im.resize((im.width // 2, im.height // 2))
thumbnail = im.resize((im.width // 5, im.height // 5))
output_filename = domain.replace(".", "_") + "_" + output_suffix output_filename = domain.replace(".", "_") + "_" + output_suffix
thumbnail_filename = domain.replace(".", "_") + "_thumb_" + output_suffix.replace("png", "jpg")
im.save(output_dir + "/" + output_filename, "PNG") im.save(output_dir + "/" + output_filename, "PNG")
thumbnail.save(output_dir + "/" + thumbnail_filename, "JPEG")
print(json.dumps({"new_file": print(json.dumps({"new_file":
{"url": base_url + "/static/" + os.path.dirname(output_file_relative) + "/" + output_filename, {
"url": base_url + "/static/" + os.path.dirname(output_file_relative) + "/" + output_filename,
"thumb_url": base_url + "/static/" + os.path.dirname(output_file_relative) + "/" + thumbnail_filename,
"domain": domain, "domain": domain,
"found_headers": found_needles, "found_headers": found_needles,
"filename": output_filename}})) "filename": output_filename,
"thumb_filename": thumbnail_filename
}}))

View File

@ -45,7 +45,7 @@ done <<< "$DOMAINS"
click 1270 217 # the "trash" icon, so requests from plamienie don't appear in the screenshots click 1270 217 # the "trash" icon, so requests from plamienie don't appear in the screenshots
load_website "$URL" "$URL" load_website "$URL" "$URL"
echo "{\"current_action\": \"Strona wczytana\"}" echo "{\"current_action\": \"Strona $ORIGIN_DOMAIN wczytana. Przygotowywanie do analizy...\"}"
grab load_website grab load_website
open_network_inspector open_network_inspector
grab open_network_inspector grab open_network_inspector

View File

@ -81,11 +81,11 @@ router.get("/", async (ctx) => {
do { do {
response = await (await fetch(\`/api/requests/\${id}\`)).json(); response = await (await fetch(\`/api/requests/\${id}\`)).json();
output.innerHTML = JSON.stringify(response, null, " ").replace( output.innerHTML = JSON.stringify(response, null, " ").replace(
/\\/(static|api)\\/.*(.png|all-screenshots|v=[0-9]+)/g, /\\/(static|api)\\/.*(.png|.jpg|all-screenshots|v=[0-9]+)/g,
'<a href="$&">$&</a>' '<a href="$&">$&</a>'
); );
stdout.innerHTML = response.output.replace( stdout.innerHTML = response.output.replace(
/\\/(static|api)\\/.*(.png|all-screenshots)/g, /\\/(static|api)\\/.*(.png|.jpg|all-screenshots)/g,
'<a href="$&">$&</a>' '<a href="$&">$&</a>'
); );
await sleep(1000); await sleep(1000);
@ -181,19 +181,7 @@ router.get("/api/requests/:id/all-screenshots", async (ctx) => {
} }
ctx.attachment(`${request.url.replace(/\W/g, "_")}_all-screenshots.zip`); ctx.attachment(`${request.url.replace(/\W/g, "_")}_all-screenshots.zip`);
ctx.response.set("content-type", "application/zip"); ctx.response.set("content-type", "application/zip");
const process = spawn("docker", [ ctx.body = request.getZIP();
"run",
"-v",
VOLUME_MOUNT,
IMAGE_NAME,
"zip",
"--junk-paths",
"-",
...request
.getGoodImages()
.map((image) => `/opt/static/${request.id}/${image.filename}`),
]);
ctx.body = process.stdout;
}); });
app.use(router.routes()).use(router.allowedMethods()); app.use(router.routes()).use(router.allowedMethods());

View File

@ -1,7 +1,9 @@
import { is, predicates } from "@sealcode/ts-predicates"; import { is, predicates } from "@sealcode/ts-predicates";
import { ChildProcessWithoutNullStreams, spawn } from "child_process"; import { ChildProcessWithoutNullStreams, spawn } from "child_process";
import { Readable } from "stream";
import { v4 as uuid } from "uuid"; import { v4 as uuid } from "uuid";
import containerPool from "./container-pool"; import containerPool from "./container-pool";
import { IMAGE_NAME, VOLUME_MOUNT } from "./docker-args";
import { q, requests } from "./memory"; import { q, requests } from "./memory";
let queue_order: ScreenshotRequest[] = []; let queue_order: ScreenshotRequest[] = [];
@ -90,10 +92,25 @@ export default class ScreenshotRequest {
this.status === "finished" this.status === "finished"
? `/api/requests/${this.id}/all-screenshots` ? `/api/requests/${this.id}/all-screenshots`
: null, : null,
preview: `/static/${this.id}/preview.jpg?v=${Date.now()}`, preview: await this.getPreviewURL(),
}; };
} }
async getPreviewURL(): Promise<string> {
const process = spawn("stat", [
"-c",
"%Y",
`${__dirname}/../../static/${this.id}/preview.jpg`,
]);
let result = "";
process.stdout.on("data", (data) => (result += data.toString().trim()));
process.stderr.on("data", (data) => console.log(data.toString()));
const mtime = await new Promise((resolve) => {
process.on("close", () => resolve(result));
});
return `/static/${this.id}/preview.jpg?v=${mtime}`;
}
getGoodImages(): Image[] { getGoodImages(): Image[] {
/* find the best set of screenshots, that is: a set of screenshots that /* find the best set of screenshots, that is: a set of screenshots that
contain all the header values that appear in the headers, but with as little contain all the header values that appear in the headers, but with as little
@ -232,4 +249,20 @@ export default class ScreenshotRequest {
}); });
}); });
} }
getZIP(): Readable {
const process = spawn("docker", [
"run",
"-v",
VOLUME_MOUNT,
IMAGE_NAME,
"zip",
"--junk-paths",
"-",
...this.getGoodImages().map(
(image) => `/opt/static/${this.id}/${image.filename}`
),
]);
return process.stdout;
}
} }