diff --git a/Docker/annotate_header.sh b/Docker/annotate_header.sh index eb86ef9..383b942 100755 --- a/Docker/annotate_header.sh +++ b/Docker/annotate_header.sh @@ -12,15 +12,19 @@ get_width(){ annotate_header(){ - echo annotate $1 + echo annotate $1 $2 $3 filename=$1 shift; + domain=$1; + shift; + count=$1; + shift; d=$(date "+%Y-%m-%d__%H_%M_%S") #filename="2022-03-10__19_33_55__set-cookie.png" cropped_filename="${filename}__cropped.png" overlay_filename="${cropped_filename}__overlay.png" hardoverlay_filename="${cropped_filename}__hardoverlay.png" - annotated_filename="${cropped_filename}__annotated.png" + annotated_filename="$(dirname "$filename")/${domain}__${count}.final.png" # the name is crucial, because the web app part filters files based on the name # crop left=2056 @@ -78,10 +82,10 @@ annotate_header(){ shift shift done - convert "$filename" "$overlay_filename" -compose Darken -composite "$annotated_filename.step.png" - convert "$annotated_filename.step.png" "$hardoverlay_filename" -compose src-over -composite "$annotated_filename" + convert "$filename" "$overlay_filename" -compose Darken -composite "${annotated_filename}.step.png" + convert "${annotated_filename}.step.png" "$hardoverlay_filename" -compose src-over -composite "$annotated_filename" rm "$overlay_filename" "$annotated_filename.step.png" "$hardoverlay_filename" "$cropped_filename" "$filename" - echo "SCREENSHOT: " + echo "SCREENSHOT: " } #annotate_header "set-cookie" "identyfikator internetowy z cookie" "Cookie" "identyfikator internetowy z cookie" diff --git a/Docker/mozilla/firefox/bifup8k5.docker/datareporting/glean/db/data.safe.bin b/Docker/mozilla/firefox/bifup8k5.docker/datareporting/glean/db/data.safe.bin deleted file mode 100644 index a794c67..0000000 Binary files a/Docker/mozilla/firefox/bifup8k5.docker/datareporting/glean/db/data.safe.bin and /dev/null differ diff --git a/Docker/script3.sh b/Docker/script3.sh index 51eea0c..afb60fe 100755 --- a/Docker/script3.sh +++ b/Docker/script3.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/bash INPUT="$1" ID=$2 @@ -14,11 +14,11 @@ source ./ephemeral-x.sh source ./annotate_header.sh source ./utils.sh -(while true; do - grab_screen_to_public $ID - sleep 1 - done) & -refresher_pid=$!; +# (while true; do +# grab_screen_to_public $ID +# sleep 1 +# done) & +# refresher_pid=$!; start_firefox grab start_firefox @@ -53,7 +53,7 @@ while IFS= read -r DOMAIN; do do filename="/opt/static/$ID/${index}.png" scrot "$filename" - annotate_header "$filename" \ + annotate_header "$filename" "$DOMAIN" "$count" \ "set-cookie" "identyfikator internetowy z cookie" \ "Cookie" "identyfikator internetowy z cookie" \ "Referer" "Część mojej historii przeglądania" & @@ -67,7 +67,7 @@ while IFS= read -r DOMAIN; do done done <<< "$DOMAINS" -kill $refresher_pid; +# kill $refresher_pid; echo "starting wait..." diff --git a/Docker/utils.sh b/Docker/utils.sh index bc1d09a..3e92da4 100644 --- a/Docker/utils.sh +++ b/Docker/utils.sh @@ -28,7 +28,7 @@ click (){ grab_no=0 -grab (){ +grab(){ NOTE="$grab_no $@" #echo $@ ((grab_no++)) diff --git a/README.md b/README.md index b7f9134..fc6947f 100644 --- a/README.md +++ b/README.md @@ -23,3 +23,13 @@ BASE_URL=http://localhost:3000 docker run -i -v $PWD/static:/opt/static headles ``` BASE_URL=http://localhost:3000 node . ``` + +## Testing the API + +with httpie: + +``` +http POST localhost:3000/api/requests url==pearson.com 'domains[]==youtube.com' 'domains[]==facebook.com' +``` + +It returns a 303 response that redirects to a URL you can poll to see the status of the screenshot request. diff --git a/docker-args.js b/docker-args.js new file mode 100644 index 0000000..b7eeffa --- /dev/null +++ b/docker-args.js @@ -0,0 +1,10 @@ +const DOCKER_ARGS = [ + "run", + "-i", + "-v", + `${process.cwd()}/static:/opt/static`, + "headless-fox", + "./script3.sh", +]; + +module.exports = DOCKER_ARGS; diff --git a/index.js b/index.js index b1fa55d..b470c30 100644 --- a/index.js +++ b/index.js @@ -1,4 +1,3 @@ -const { v4: uuid } = require("uuid"); var serve = require("koa-static"); const Koa = require("koa"); const Router = require("@koa/router"); @@ -6,6 +5,9 @@ const mount = require("koa-mount"); const qs = require("qs"); const { Readable } = require("stream"); const { spawn } = require("child_process"); +const { requests } = require("./memory"); +const ScreenshotRequest = require("./request"); +const DOCKER_ARGS = require("./docker-args"); const router = new Router(); @@ -65,13 +67,7 @@ router.get("/", async (ctx) => { const starter = spawn( "docker", [ - "run", - "-i", - "-d", - "-v", - `${process.cwd()}/static:/opt/static`, - "headless-fox", - "./script3.sh", + ...DOCKER_ARGS, `{"url": "${params.url}", "third_party_domains": ["hotjar.com", "cookielaw.org", "facebook.com", "gemius.pl"]}`, id, ], @@ -87,5 +83,36 @@ router.get("/", async (ctx) => { }); }); +router.post("/api/requests", async (ctx) => { + const params = qs.parse(ctx.querystring); + if (!params.url) { + ctx.body = "Specify url"; + ctx.status = 422; + return; + } + if (!params.domains) { + ctx.body = "Specify domains"; + ctx.status = 422; + return; + } + if (!Array.isArray(params.domains)) { + ctx.body = "'domains' should be an array of strings"; + ctx.status = 422; + return; + } + const request = new ScreenshotRequest(params.url, params.domains); + ctx.status = 303; + ctx.redirect(`/api/requests/${request.id}`); +}); + +router.get("/api/requests/:id", async (ctx) => { + const request = requests[ctx.params.id]; + if (!request) { + ctx.status = 404; + return; + } + ctx.body = await request.getJSON(); +}); + app.use(router.routes()).use(router.allowedMethods()); app.listen(3000); diff --git a/memory.js b/memory.js new file mode 100644 index 0000000..d6d54a9 --- /dev/null +++ b/memory.js @@ -0,0 +1,3 @@ +const queue = require("queue"); +const q = queue({ concurrency: 1, autostart: true, results: [] }); +b; diff --git a/package-lock.json b/package-lock.json index f03e88d..1cf4207 100644 --- a/package-lock.json +++ b/package-lock.json @@ -15,6 +15,7 @@ "koa-mount": "^4.0.0", "koa-static": "^5.0.0", "qs": "^6.10.3", + "queue": "^6.0.2", "uuid": "^8.3.2" } }, @@ -571,6 +572,14 @@ "resolved": "https://registry.npmjs.org/querystringify/-/querystringify-2.2.0.tgz", "integrity": "sha512-FIqgj2EUvTa7R50u0rGsyTftzjYmv/a3hO345bZNrqabNqjtgiDMgmo4mkUjd+nzU5oF3dClKqFIPUKybUyqoQ==" }, + "node_modules/queue": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/queue/-/queue-6.0.2.tgz", + "integrity": "sha512-iHZWu+q3IdFZFX36ro/lKBkSvfkztY5Y7HMiPlOUjhupPcG2JMfst2KKEpu5XndviX/3UhFbRngUPNKtgvtZiA==", + "dependencies": { + "inherits": "~2.0.3" + } + }, "node_modules/readable-stream": { "version": "1.1.14", "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-1.1.14.tgz", @@ -1167,6 +1176,14 @@ "resolved": "https://registry.npmjs.org/querystringify/-/querystringify-2.2.0.tgz", "integrity": "sha512-FIqgj2EUvTa7R50u0rGsyTftzjYmv/a3hO345bZNrqabNqjtgiDMgmo4mkUjd+nzU5oF3dClKqFIPUKybUyqoQ==" }, + "queue": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/queue/-/queue-6.0.2.tgz", + "integrity": "sha512-iHZWu+q3IdFZFX36ro/lKBkSvfkztY5Y7HMiPlOUjhupPcG2JMfst2KKEpu5XndviX/3UhFbRngUPNKtgvtZiA==", + "requires": { + "inherits": "~2.0.3" + } + }, "readable-stream": { "version": "1.1.14", "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-1.1.14.tgz", diff --git a/package.json b/package.json index feedb0e..ca6e604 100644 --- a/package.json +++ b/package.json @@ -19,6 +19,7 @@ "koa-mount": "^4.0.0", "koa-static": "^5.0.0", "qs": "^6.10.3", + "queue": "^6.0.2", "uuid": "^8.3.2" } } diff --git a/request.js b/request.js new file mode 100644 index 0000000..2dfa37a --- /dev/null +++ b/request.js @@ -0,0 +1,75 @@ +const { q, requests } = require("./memory"); +const DOCKER_ARGS = require("./docker-args"); +const { v4: uuid } = require("uuid"); +const { promises: fs } = require("fs"); +const { spawn } = require("child_process"); +const { resolve } = require("path"); + +module.exports = class ScreenshotRequest { + constructor(url, domains) { + this.url = url; + this.domains = domains; + this.id = uuid(); + this.status = "waiting"; + this.output = ""; + this.images = []; + q.push(async () => { + return this.exec(); + }); + requests[this.id] = this; + } + + async getImages() { + try { + const files = await fs.readdir(resolve(__dirname, "./static/" + this.id)); + return files.filter((file) => file.match(/.final.png$/)); + } catch (e) { + return []; + } + } + + async getJSON() { + return { + url: this.url, + domains: this.domains, + id: this.id, + status: this.status, + /* output: this.output, */ + files: await this.getImages(), + }; + } + + async exec() { + return new Promise((resolve, reject) => { + this.status = "running"; + this.process = spawn( + "docker", + [ + ...DOCKER_ARGS, + JSON.stringify({ + url: this.url, + third_party_domains: this.domains, + }), + this.id, + ], + { cwd: process.cwd() } + ); + this.process.on("close", (exitCode) => { + this.status = "finished"; + if (exitCode === 0) { + resolve(); + } else { + reject(); + } + }); + this.process.stdout.on("data", (d) => { + this.output += d.toString(); + /* console.log("DATA!", d.toString()); */ + }); + this.process.stderr.on("data", (d) => { + this.output += d.toString(); + /* console.log("STDERR!", d.toString()); */ + }); + }); + } +};