From 4c34db1af9670c038f50df1dabf620736b658d8a Mon Sep 17 00:00:00 2001 From: Kuba Orlik Date: Thu, 5 May 2022 21:54:34 +0200 Subject: [PATCH] Add a webservice API with polling --- Docker/annotate_header.sh | 14 ++-- .../datareporting/glean/db/data.safe.bin | Bin 4348 -> 0 bytes Docker/script3.sh | 16 ++-- Docker/utils.sh | 2 +- README.md | 10 +++ docker-args.js | 10 +++ index.js | 43 ++++++++-- memory.js | 3 + package-lock.json | 17 ++++ package.json | 1 + request.js | 75 ++++++++++++++++++ 11 files changed, 169 insertions(+), 22 deletions(-) delete mode 100644 Docker/mozilla/firefox/bifup8k5.docker/datareporting/glean/db/data.safe.bin create mode 100644 docker-args.js create mode 100644 memory.js create mode 100644 request.js diff --git a/Docker/annotate_header.sh b/Docker/annotate_header.sh index eb86ef9..383b942 100755 --- a/Docker/annotate_header.sh +++ b/Docker/annotate_header.sh @@ -12,15 +12,19 @@ get_width(){ annotate_header(){ - echo annotate $1 + echo annotate $1 $2 $3 filename=$1 shift; + domain=$1; + shift; + count=$1; + shift; d=$(date "+%Y-%m-%d__%H_%M_%S") #filename="2022-03-10__19_33_55__set-cookie.png" cropped_filename="${filename}__cropped.png" overlay_filename="${cropped_filename}__overlay.png" hardoverlay_filename="${cropped_filename}__hardoverlay.png" - annotated_filename="${cropped_filename}__annotated.png" + annotated_filename="$(dirname "$filename")/${domain}__${count}.final.png" # the name is crucial, because the web app part filters files based on the name # crop left=2056 @@ -78,10 +82,10 @@ annotate_header(){ shift shift done - convert "$filename" "$overlay_filename" -compose Darken -composite "$annotated_filename.step.png" - convert "$annotated_filename.step.png" "$hardoverlay_filename" -compose src-over -composite "$annotated_filename" + convert "$filename" "$overlay_filename" -compose Darken -composite "${annotated_filename}.step.png" + convert "${annotated_filename}.step.png" "$hardoverlay_filename" -compose src-over -composite "$annotated_filename" rm "$overlay_filename" "$annotated_filename.step.png" "$hardoverlay_filename" "$cropped_filename" "$filename" - echo "SCREENSHOT: " + echo "SCREENSHOT: " } #annotate_header "set-cookie" "identyfikator internetowy z cookie" "Cookie" "identyfikator internetowy z cookie" diff --git a/Docker/mozilla/firefox/bifup8k5.docker/datareporting/glean/db/data.safe.bin b/Docker/mozilla/firefox/bifup8k5.docker/datareporting/glean/db/data.safe.bin deleted file mode 100644 index a794c67a112842413617f6ad97c448c222bf9400..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4348 zcmb_gYitx%6h4)gBOq!)kcuMZp)|XlnceP7v{cbV!Jvd70%n=rooz>^v+K+(ff7aG zk7)c!#KZ?Nnve*@#56(w06+8xF(xKH68}g{AjZT5Lo^Z*ypQi}*>)vBoTi;~?m73| zd(QdJxpz8=^wTjJMBNJpC24@S@h&-$>wAIQJ?OhmklgP0Ud4&MT9A}8qGHr24SR9y zR(e_UcFZ(x2GJqj@^?P?VK)xFa@1X|4JN(7i#=g;MA=*b4P!QlTkw{zfmp}dt@B{D|ncp?s4`? zA_hR07$EtoK8Y)&Ap^8y73^k(yLYm<;{?)Zcjq+N5 z_D*Txw_RS>!tei7@0Ru1&b!k))zGeIURrT1{Jp- z3k5b6+KW-_#BT3Iw6+DhT*zu`^hQf(iLho~uSNLNc8f9si^mzPJ6;fr67u{?vEoH_ z-`VMVQCy6@VRykat!&H&A#+oMfr~SjDrzJ6bJ+um(xaldkjYh$|9D)({{ir)p3~8K z7`%8X3;-`QkkEoBAU|uopfZU36D`yN2F};7>jYz~wEqn)2OjA7Ne%JGV9)xbpr{IX zqLqr;41Q}v6TlaRK;om2pYMErk|Y^;Z!bHfDvtMaX=Wd{y`rdI@O1>J3VzbZD!jyv3hAaE$WcDTcwF|m_Mh=q27vR>*GKUf zUIEu;{EvmYXK;RMW~;xu_iCi|!CwD2ntBlYtKYTi&)cXE^%Ue?aA*p6Os6NoZ~06g z(;ScU003VN-aDux%kcobh~qf}o&OQoySVBHMaiBepPspTe|#BOfd<2PMLVNds}stt zd3jil0_lQ$-%)K(N1X`irF$Uf1DHGbP88H@ke^0?`S2IR${^SGZ$*7A;rODwn+<$D z8Xni{`@hUrmf8Qa0EFHp(Ov|A@p%#rlKU&*NQ_ssw`P7&el!>Re-jDk_+CIjct5z0 z&?Z#FNojD@e(fGWwP1~|8(-wa-R+g)vy|wa?69lf$7i0rlRrdHithiOD;m6!A z+Z)0Spt#))BXQNgi#NzgmBys=^2w)@CWF0=wXi(o#cnxngf54|U~Ee>9H$+*Ofi$z zcLSn{hH4Q<$KXqioX%}zjTba_u;cA!Uz0C=Nh{m@B5Fi#C>_-GN6izPzjG#j5mmiV zT$#c~P#mf?LPos7txrabb1l+pCrmqGr`Fr4f^8P;TvFuMN{OFzk7*T5Q@X|>o9GcE zsltujFy&^&BaymTQAD|Hs+2C5Q;DqQq!MYfT1k}hnOwr{%b9)IK0BYOq!&VW;u>TA zh>N~^BmS7VR3-y|WTsnTC$5}s7&yMJhh|(O_r-=AlwHnq%ZKEEXk^?w^p5|_O6-L3 zHS9T^u+!@;k%L^pGL!kV2ujw@ianbm2Qp=?sA^rU;)U_fk{5He8jMF7(+aT!+Rcqlio{5QiG2>-95`WMAsSyBK1 diff --git a/Docker/script3.sh b/Docker/script3.sh index 51eea0c..afb60fe 100755 --- a/Docker/script3.sh +++ b/Docker/script3.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/bash INPUT="$1" ID=$2 @@ -14,11 +14,11 @@ source ./ephemeral-x.sh source ./annotate_header.sh source ./utils.sh -(while true; do - grab_screen_to_public $ID - sleep 1 - done) & -refresher_pid=$!; +# (while true; do +# grab_screen_to_public $ID +# sleep 1 +# done) & +# refresher_pid=$!; start_firefox grab start_firefox @@ -53,7 +53,7 @@ while IFS= read -r DOMAIN; do do filename="/opt/static/$ID/${index}.png" scrot "$filename" - annotate_header "$filename" \ + annotate_header "$filename" "$DOMAIN" "$count" \ "set-cookie" "identyfikator internetowy z cookie" \ "Cookie" "identyfikator internetowy z cookie" \ "Referer" "Część mojej historii przeglądania" & @@ -67,7 +67,7 @@ while IFS= read -r DOMAIN; do done done <<< "$DOMAINS" -kill $refresher_pid; +# kill $refresher_pid; echo "starting wait..." diff --git a/Docker/utils.sh b/Docker/utils.sh index bc1d09a..3e92da4 100644 --- a/Docker/utils.sh +++ b/Docker/utils.sh @@ -28,7 +28,7 @@ click (){ grab_no=0 -grab (){ +grab(){ NOTE="$grab_no $@" #echo $@ ((grab_no++)) diff --git a/README.md b/README.md index b7f9134..fc6947f 100644 --- a/README.md +++ b/README.md @@ -23,3 +23,13 @@ BASE_URL=http://localhost:3000 docker run -i -v $PWD/static:/opt/static headles ``` BASE_URL=http://localhost:3000 node . ``` + +## Testing the API + +with httpie: + +``` +http POST localhost:3000/api/requests url==pearson.com 'domains[]==youtube.com' 'domains[]==facebook.com' +``` + +It returns a 303 response that redirects to a URL you can poll to see the status of the screenshot request. diff --git a/docker-args.js b/docker-args.js new file mode 100644 index 0000000..b7eeffa --- /dev/null +++ b/docker-args.js @@ -0,0 +1,10 @@ +const DOCKER_ARGS = [ + "run", + "-i", + "-v", + `${process.cwd()}/static:/opt/static`, + "headless-fox", + "./script3.sh", +]; + +module.exports = DOCKER_ARGS; diff --git a/index.js b/index.js index b1fa55d..b470c30 100644 --- a/index.js +++ b/index.js @@ -1,4 +1,3 @@ -const { v4: uuid } = require("uuid"); var serve = require("koa-static"); const Koa = require("koa"); const Router = require("@koa/router"); @@ -6,6 +5,9 @@ const mount = require("koa-mount"); const qs = require("qs"); const { Readable } = require("stream"); const { spawn } = require("child_process"); +const { requests } = require("./memory"); +const ScreenshotRequest = require("./request"); +const DOCKER_ARGS = require("./docker-args"); const router = new Router(); @@ -65,13 +67,7 @@ router.get("/", async (ctx) => { const starter = spawn( "docker", [ - "run", - "-i", - "-d", - "-v", - `${process.cwd()}/static:/opt/static`, - "headless-fox", - "./script3.sh", + ...DOCKER_ARGS, `{"url": "${params.url}", "third_party_domains": ["hotjar.com", "cookielaw.org", "facebook.com", "gemius.pl"]}`, id, ], @@ -87,5 +83,36 @@ router.get("/", async (ctx) => { }); }); +router.post("/api/requests", async (ctx) => { + const params = qs.parse(ctx.querystring); + if (!params.url) { + ctx.body = "Specify url"; + ctx.status = 422; + return; + } + if (!params.domains) { + ctx.body = "Specify domains"; + ctx.status = 422; + return; + } + if (!Array.isArray(params.domains)) { + ctx.body = "'domains' should be an array of strings"; + ctx.status = 422; + return; + } + const request = new ScreenshotRequest(params.url, params.domains); + ctx.status = 303; + ctx.redirect(`/api/requests/${request.id}`); +}); + +router.get("/api/requests/:id", async (ctx) => { + const request = requests[ctx.params.id]; + if (!request) { + ctx.status = 404; + return; + } + ctx.body = await request.getJSON(); +}); + app.use(router.routes()).use(router.allowedMethods()); app.listen(3000); diff --git a/memory.js b/memory.js new file mode 100644 index 0000000..d6d54a9 --- /dev/null +++ b/memory.js @@ -0,0 +1,3 @@ +const queue = require("queue"); +const q = queue({ concurrency: 1, autostart: true, results: [] }); +b; diff --git a/package-lock.json b/package-lock.json index f03e88d..1cf4207 100644 --- a/package-lock.json +++ b/package-lock.json @@ -15,6 +15,7 @@ "koa-mount": "^4.0.0", "koa-static": "^5.0.0", "qs": "^6.10.3", + "queue": "^6.0.2", "uuid": "^8.3.2" } }, @@ -571,6 +572,14 @@ "resolved": "https://registry.npmjs.org/querystringify/-/querystringify-2.2.0.tgz", "integrity": "sha512-FIqgj2EUvTa7R50u0rGsyTftzjYmv/a3hO345bZNrqabNqjtgiDMgmo4mkUjd+nzU5oF3dClKqFIPUKybUyqoQ==" }, + "node_modules/queue": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/queue/-/queue-6.0.2.tgz", + "integrity": "sha512-iHZWu+q3IdFZFX36ro/lKBkSvfkztY5Y7HMiPlOUjhupPcG2JMfst2KKEpu5XndviX/3UhFbRngUPNKtgvtZiA==", + "dependencies": { + "inherits": "~2.0.3" + } + }, "node_modules/readable-stream": { "version": "1.1.14", "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-1.1.14.tgz", @@ -1167,6 +1176,14 @@ "resolved": "https://registry.npmjs.org/querystringify/-/querystringify-2.2.0.tgz", "integrity": "sha512-FIqgj2EUvTa7R50u0rGsyTftzjYmv/a3hO345bZNrqabNqjtgiDMgmo4mkUjd+nzU5oF3dClKqFIPUKybUyqoQ==" }, + "queue": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/queue/-/queue-6.0.2.tgz", + "integrity": "sha512-iHZWu+q3IdFZFX36ro/lKBkSvfkztY5Y7HMiPlOUjhupPcG2JMfst2KKEpu5XndviX/3UhFbRngUPNKtgvtZiA==", + "requires": { + "inherits": "~2.0.3" + } + }, "readable-stream": { "version": "1.1.14", "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-1.1.14.tgz", diff --git a/package.json b/package.json index feedb0e..ca6e604 100644 --- a/package.json +++ b/package.json @@ -19,6 +19,7 @@ "koa-mount": "^4.0.0", "koa-static": "^5.0.0", "qs": "^6.10.3", + "queue": "^6.0.2", "uuid": "^8.3.2" } } diff --git a/request.js b/request.js new file mode 100644 index 0000000..2dfa37a --- /dev/null +++ b/request.js @@ -0,0 +1,75 @@ +const { q, requests } = require("./memory"); +const DOCKER_ARGS = require("./docker-args"); +const { v4: uuid } = require("uuid"); +const { promises: fs } = require("fs"); +const { spawn } = require("child_process"); +const { resolve } = require("path"); + +module.exports = class ScreenshotRequest { + constructor(url, domains) { + this.url = url; + this.domains = domains; + this.id = uuid(); + this.status = "waiting"; + this.output = ""; + this.images = []; + q.push(async () => { + return this.exec(); + }); + requests[this.id] = this; + } + + async getImages() { + try { + const files = await fs.readdir(resolve(__dirname, "./static/" + this.id)); + return files.filter((file) => file.match(/.final.png$/)); + } catch (e) { + return []; + } + } + + async getJSON() { + return { + url: this.url, + domains: this.domains, + id: this.id, + status: this.status, + /* output: this.output, */ + files: await this.getImages(), + }; + } + + async exec() { + return new Promise((resolve, reject) => { + this.status = "running"; + this.process = spawn( + "docker", + [ + ...DOCKER_ARGS, + JSON.stringify({ + url: this.url, + third_party_domains: this.domains, + }), + this.id, + ], + { cwd: process.cwd() } + ); + this.process.on("close", (exitCode) => { + this.status = "finished"; + if (exitCode === 0) { + resolve(); + } else { + reject(); + } + }); + this.process.stdout.on("data", (d) => { + this.output += d.toString(); + /* console.log("DATA!", d.toString()); */ + }); + this.process.stderr.on("data", (d) => { + this.output += d.toString(); + /* console.log("STDERR!", d.toString()); */ + }); + }); + } +};