47 lines
		
	
	
		
			1.5 KiB
		
	
	
	
		
			Docker
		
	
	
	
	
	
			
		
		
	
	
			47 lines
		
	
	
		
			1.5 KiB
		
	
	
	
		
			Docker
		
	
	
	
	
	
| FROM python:3.11.0a5-alpine3.15
 | |
| 
 | |
| # inspired by https://github.com/darktohka/pytesseract-docker/blob/master/Dockerfile
 | |
| 
 | |
| RUN apk update
 | |
| RUN apk add firefox xvfb scrot vips-tools xterm xdotool fluxbox xprop imagemagick bash tesseract-ocr terminus-font ttf-inconsolata ttf-dejavu font-noto font-noto ttf-font-awesome font-noto-extra
 | |
| 
 | |
| 
 | |
| ENV SHELL /bin/sh
 | |
| ENV CC /usr/bin/clang
 | |
| ENV CXX /usr/bin/clang++
 | |
| ENV LANG C.UTF-8
 | |
| ENV PYTHONUNBUFFERED 1
 | |
| ENV PIP_DISABLE_PIP_VERSION_CHECK 1
 | |
| ENV PIP_NO_CACHE_DIR 0
 | |
| ENV TESSDATA_PREFIX /usr/local/share/tessdata
 | |
| WORKDIR /tmp
 | |
| 
 | |
| RUN apk add --no-cache openssl leptonica openjpeg tiff libpng zlib freetype libgcc libstdc++
 | |
| # Install development tools
 | |
| RUN apk add --no-cache --virtual .dev-deps file linux-headers git make automake autoconf libtool pkgconfig clang g++ openssl-dev leptonica-dev openjpeg-dev tiff-dev libpng-dev zlib-dev freetype-dev
 | |
| # Install Tesseract from master
 | |
| RUN mkdir /usr/local/share/tessdata
 | |
| RUN mkdir tesseract
 | |
| 
 | |
| WORKDIR /tmp/tesseract
 | |
| 
 | |
| RUN wget https://github.com/tesseract-ocr/tessdata_fast/raw/main/eng.traineddata -P "$TESSDATA_PREFIX"
 | |
| RUN git clone --depth 1 https://github.com/tesseract-ocr/tesseract.git .
 | |
| RUN ./autogen.sh
 | |
| RUN ./configure
 | |
| RUN make -j$(nproc)
 | |
| RUN make install
 | |
| # Install Python dependencies
 | |
| RUN pip install -U --no-cache-dir pytesseract
 | |
| # Cleanup
 | |
| RUN apk del .dev-deps
 | |
| RUN rm -f /usr/local/lib/*.a
 | |
| RUN rm -rf /tmp/* /var/cache/apk/*
 | |
| RUN apk add jq
 | |
| RUN apk add sed
 | |
| RUN apk add nodejs
 | |
| COPY ./mozilla /root/.mozilla
 | |
| COPY . /opt
 | |
| 
 | |
| WORKDIR /opt
 |