47 lines
1.5 KiB
Docker
47 lines
1.5 KiB
Docker
FROM docker.io/python:3.11.0a5-alpine3.15
|
|
|
|
# inspired by https://github.com/darktohka/pytesseract-docker/blob/master/Dockerfile
|
|
|
|
RUN apk update
|
|
RUN apk add firefox xvfb scrot vips-tools xterm xdotool fluxbox xprop imagemagick bash tesseract-ocr terminus-font ttf-inconsolata ttf-dejavu font-noto font-noto ttf-font-awesome font-noto-extra
|
|
|
|
|
|
ENV SHELL /bin/sh
|
|
ENV CC /usr/bin/clang
|
|
ENV CXX /usr/bin/clang++
|
|
ENV LANG C.UTF-8
|
|
ENV PYTHONUNBUFFERED 1
|
|
ENV PIP_DISABLE_PIP_VERSION_CHECK 1
|
|
ENV PIP_NO_CACHE_DIR 0
|
|
ENV TESSDATA_PREFIX /usr/local/share/tessdata
|
|
WORKDIR /tmp
|
|
|
|
RUN apk add --no-cache openssl leptonica openjpeg tiff libpng zlib freetype libgcc libstdc++
|
|
# Install development tools
|
|
RUN apk add --no-cache --virtual .dev-deps file linux-headers git make automake autoconf libtool pkgconfig clang g++ openssl-dev leptonica-dev openjpeg-dev tiff-dev libpng-dev zlib-dev freetype-dev
|
|
# Install Tesseract from master
|
|
RUN mkdir /usr/local/share/tessdata
|
|
RUN mkdir tesseract
|
|
|
|
WORKDIR /tmp/tesseract
|
|
|
|
RUN wget https://github.com/tesseract-ocr/tessdata_fast/raw/main/eng.traineddata -P "$TESSDATA_PREFIX"
|
|
RUN git clone --depth 1 https://github.com/tesseract-ocr/tesseract.git .
|
|
RUN ./autogen.sh
|
|
RUN ./configure
|
|
RUN make -j$(nproc)
|
|
RUN make install
|
|
# Install Python dependencies
|
|
RUN pip install -U --no-cache-dir pytesseract
|
|
# Cleanup
|
|
RUN apk del .dev-deps
|
|
RUN rm -f /usr/local/lib/*.a
|
|
RUN rm -rf /tmp/* /var/cache/apk/*
|
|
RUN apk add jq
|
|
RUN apk add sed
|
|
RUN apk add nodejs
|
|
COPY ./mozilla /root/.mozilla
|
|
COPY . /opt
|
|
|
|
WORKDIR /opt
|