Allow for zip downloads. Show only interesting screenshots
This commit is contained in:
		
							parent
							
								
									04670e3236
								
							
						
					
					
						commit
						89d6134f30
					
				
							
								
								
									
										1
									
								
								@types/src/docker-args.d.ts
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								@types/src/docker-args.d.ts
									
									
									
									
										vendored
									
									
								
							| @ -1,2 +1,3 @@ | ||||
| export const DOCKER_ARGS: string[]; | ||||
| export const IMAGE_NAME: "headless-fox"; | ||||
| export const VOLUME_MOUNT: string; | ||||
|  | ||||
							
								
								
									
										11
									
								
								@types/src/request.d.ts
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										11
									
								
								@types/src/request.d.ts
									
									
									
									
										vendored
									
									
								
							| @ -1,12 +1,17 @@ | ||||
| /// <reference types="node" />
 | ||||
| import { ChildProcessWithoutNullStreams } from "child_process"; | ||||
| export declare type Image = { | ||||
|     url: string; | ||||
|     domain: string; | ||||
|     found_headers: Record<string, string>; | ||||
| }; | ||||
| export default class ScreenshotRequest { | ||||
|     url: string; | ||||
|     domains: string[]; | ||||
|     id: string; | ||||
|     status: string; | ||||
|     output: string; | ||||
|     images: Record<string, unknown>[]; | ||||
|     images: Image[]; | ||||
|     request_time: number; | ||||
|     started_time: number | null; | ||||
|     finished_time: number | null; | ||||
| @ -22,14 +27,16 @@ export default class ScreenshotRequest { | ||||
|         id: string; | ||||
|         status: string; | ||||
|         output: string; | ||||
|         images: Record<string, unknown>[]; | ||||
|         images: Record<string, Image[]>; | ||||
|         request_time: number; | ||||
|         started_time: number | null; | ||||
|         finished_time: number | null; | ||||
|         processing_took: number | null; | ||||
|         waiting_took: number | null; | ||||
|         elapsed_time_s: number; | ||||
|         zip_url: string | null; | ||||
|     }>; | ||||
|     getGoodImages(): Record<string, Image[]>; | ||||
|     setFinished(): void; | ||||
|     exec(): Promise<void>; | ||||
| } | ||||
|  | ||||
| @ -46,6 +46,8 @@ RUN git clone https://github.com/muquit/grabc && cd grabc && make && make instal | ||||
| RUN apk add clang | ||||
| RUN apk add freetype-dev | ||||
| RUN python3 -m pip install --upgrade Pillow | ||||
| RUN apk add zip | ||||
| 
 | ||||
| COPY . /opt | ||||
| CMD /opt/prepare-firefox.sh | ||||
| 
 | ||||
|  | ||||
| @ -12,9 +12,11 @@ from PIL import Image, ImageDraw, ImageFont | ||||
| 
 | ||||
| output_file_relative = sys.argv[ | ||||
|     1 | ||||
| ]  # this is also the existing source screenshot to annotate. It will be updated in-place | ||||
| ] | ||||
| 
 | ||||
| output_file = "/opt/static/" + output_file_relative | ||||
| output_dir = os.path.dirname(output_file) | ||||
| output_suffix = os.path.basename(output_file) | ||||
| domain = sys.argv[2] | ||||
| needles = sys.argv[3:] | ||||
| 
 | ||||
| @ -107,8 +109,9 @@ with Image.open(output_file) as im: | ||||
|                 ) | ||||
|     if len(found_needles) == 0: | ||||
|         exit(0) | ||||
|     os.remove(output_file) | ||||
|     im = im.resize((im.width // 2, im.height // 2)) | ||||
|     im.save(output_file, "PNG") | ||||
|     im.save(output_dir + "/" + domain.replace(".", "_") + "_" + output_suffix, "PNG") | ||||
|     print(json.dumps({"new_file": | ||||
|                       {"url": base_url + "/static/" + output_file_relative, | ||||
|                        "domain": domain, | ||||
|  | ||||
| @ -1,12 +1,14 @@ | ||||
| const IMAGE_NAME = "headless-fox"; | ||||
| 
 | ||||
| const VOLUME_MOUNT = `${process.cwd()}/static:/opt/static`; | ||||
| 
 | ||||
| const DOCKER_ARGS = [ | ||||
|   "run", | ||||
|   "-i", | ||||
|   "-v", | ||||
|   `${process.cwd()}/static:/opt/static`, | ||||
|   VOLUME_MOUNT, | ||||
|   IMAGE_NAME, | ||||
|   "./script3.sh", | ||||
| ]; | ||||
| 
 | ||||
| module.exports = { DOCKER_ARGS, IMAGE_NAME }; | ||||
| module.exports = { DOCKER_ARGS, IMAGE_NAME, VOLUME_MOUNT }; | ||||
|  | ||||
							
								
								
									
										28
									
								
								src/index.ts
									
									
									
									
									
								
							
							
						
						
									
										28
									
								
								src/index.ts
									
									
									
									
									
								
							| @ -7,7 +7,7 @@ import serve from "koa-static"; | ||||
| import qs from "qs"; | ||||
| import { Readable } from "stream"; | ||||
| import { v4 as uuid } from "uuid"; | ||||
| import { DOCKER_ARGS } from "./docker-args"; | ||||
| import { DOCKER_ARGS, IMAGE_NAME, VOLUME_MOUNT } from "./docker-args"; | ||||
| import { requests } from "./memory"; | ||||
| import ScreenshotRequest from "./request"; | ||||
| 
 | ||||
| @ -81,11 +81,11 @@ router.get("/", async (ctx) => { | ||||
|           do { | ||||
|             response = await (await fetch(\`/api/requests/\${id}\`)).json();
 | ||||
|             output.innerHTML = JSON.stringify(response, null, "   ").replace( | ||||
|               /\\/static\\/.*.png/g, | ||||
|               /\\/(static|api)\\/.*(.png|all-screenshots)/g, | ||||
|               '<a href="$&">$&</a>' | ||||
|             ); | ||||
|             stdout.innerHTML = response.output.replace( | ||||
|               /\\/static\\/.*.png/g, | ||||
|               /\\/(static|api)\\/.*(.png|all-screenshots)/g, | ||||
|               '<a href="$&">$&</a>' | ||||
|             ); | ||||
|             await sleep(1000); | ||||
| @ -173,6 +173,28 @@ router.get("/api/requests/:id", async (ctx) => { | ||||
|   ctx.body = await request.getJSON(); | ||||
| }); | ||||
| 
 | ||||
| router.get("/api/requests/:id/all-screenshots", async (ctx) => { | ||||
|   const request = requests[ctx.params.id]; | ||||
|   if (!request || request.status != "finished") { | ||||
|     ctx.status = 404; | ||||
|     return; | ||||
|   } | ||||
|   ctx.attachment(`${request.url.replace(/\W/g, "_")}_all-screenshots.zip`); | ||||
|   ctx.response.set("content-type", "application/zip"); | ||||
|   const process = spawn("docker", [ | ||||
|     "run", | ||||
|     "-v", | ||||
|     VOLUME_MOUNT, | ||||
|     IMAGE_NAME, | ||||
|     "zip", | ||||
|     "--recurse-paths", | ||||
|     "--junk-paths", | ||||
|     "-", | ||||
|     `/opt/static/${request.id}`, | ||||
|   ]); | ||||
|   ctx.body = process.stdout; | ||||
| }); | ||||
| 
 | ||||
| app.use(router.routes()).use(router.allowedMethods()); | ||||
| const port = 3000; | ||||
| app.listen(port); | ||||
|  | ||||
| @ -6,11 +6,17 @@ import { q, requests } from "./memory"; | ||||
| 
 | ||||
| let queue_order: ScreenshotRequest[] = []; | ||||
| 
 | ||||
| export type Image = { | ||||
|   url: string; | ||||
|   domain: string; | ||||
|   found_headers: Record<string, string>; | ||||
| }; | ||||
| 
 | ||||
| export default class ScreenshotRequest { | ||||
|   public id = uuid(); | ||||
|   public status = "waiting"; | ||||
|   public output = ""; | ||||
|   public images: Record<string, unknown>[] = []; | ||||
|   public images: Image[] = []; | ||||
|   public request_time: number = Date.now(); | ||||
|   public started_time: number | null = null; | ||||
|   public finished_time: number | null = null; | ||||
| @ -47,13 +53,14 @@ export default class ScreenshotRequest { | ||||
|     id: string; | ||||
|     status: string; | ||||
|     output: string; | ||||
|     images: Record<string, unknown>[]; | ||||
|     images: Record<string, Image[]>; | ||||
|     request_time: number; | ||||
|     started_time: number | null; | ||||
|     finished_time: number | null; | ||||
|     processing_took: number | null; | ||||
|     waiting_took: number | null; | ||||
|     elapsed_time_s: number; | ||||
|     zip_url: string | null; | ||||
|   }> { | ||||
|     return { | ||||
|       url: this.url, | ||||
| @ -62,7 +69,7 @@ export default class ScreenshotRequest { | ||||
|       id: this.id, | ||||
|       status: this.status, | ||||
|       output: this.output, | ||||
|       images: this.images, | ||||
|       images: this.getGoodImages(), | ||||
|       request_time: this.request_time, | ||||
|       started_time: this.started_time, | ||||
|       finished_time: this.finished_time, | ||||
| @ -73,9 +80,66 @@ export default class ScreenshotRequest { | ||||
|           this.request_time) / | ||||
|           1000 | ||||
|       ), | ||||
|       zip_url: | ||||
|         this.status === "finished" | ||||
|           ? `/api/requests/${this.id}/all-screenshots` | ||||
|           : null, | ||||
|     }; | ||||
|   } | ||||
| 
 | ||||
|   getGoodImages(): Record<string, Image[]> { | ||||
|     const result: Record<string, Image[]> = {}; | ||||
|     const domains = Array.from( | ||||
|       new Set(this.images.map((image) => image.domain)) | ||||
|     ); | ||||
|     for (const domain of domains) { | ||||
|       const images = this.images | ||||
|         .filter((image) => image.domain === domain) | ||||
|         .sort((image1, image2) => { | ||||
|           if ( | ||||
|             Object.values(image1.found_headers).length > | ||||
|             Object.values(image2.found_headers).length | ||||
|           ) { | ||||
|             return -1; | ||||
|           } else if ( | ||||
|             Object.values(image1.found_headers).length < | ||||
|             Object.values(image2.found_headers).length | ||||
|           ) { | ||||
|             return 1; | ||||
|           } else { | ||||
|             return 0; | ||||
|           } | ||||
|         }); | ||||
|       const all_values = Array.from( | ||||
|         new Set( | ||||
|           images | ||||
|             .map((image) => Object.values(image.found_headers)) | ||||
|             .reduce((a, b) => a.concat(b)) | ||||
|         ) | ||||
|       ); | ||||
|       const images_to_show = []; | ||||
|       const shown_values = new Set(); | ||||
|       for (const image of images) { | ||||
|         const values_in_image = Object.values(image.found_headers); | ||||
|         let any_new_values = false; | ||||
|         for (const value of values_in_image) { | ||||
|           if (!shown_values.has(value)) { | ||||
|             shown_values.add(value); | ||||
|             any_new_values = true; | ||||
|           } | ||||
|         } | ||||
|         if (any_new_values) { | ||||
|           images_to_show.push(image); | ||||
|         } | ||||
|         if (shown_values.size == all_values.length) { | ||||
|           break; | ||||
|         } | ||||
|       } | ||||
|       result[domain] = images_to_show; | ||||
|     } | ||||
|     return result; | ||||
|   } | ||||
| 
 | ||||
|   setFinished(): void { | ||||
|     this.status = "finished"; | ||||
|     this.finished_time = Date.now(); | ||||
| @ -122,7 +186,7 @@ export default class ScreenshotRequest { | ||||
|             is(parsed, predicates.object) && | ||||
|             is(parsed.new_file, predicates.object) | ||||
|           ) { | ||||
|             this.images.push(parsed.new_file); | ||||
|             this.images.push(parsed.new_file as Image); | ||||
|           } | ||||
|         } catch (e) { | ||||
|           //noop
 | ||||
|  | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user