FROM docker.io/python:3.11.0a5-alpine3.15 # inspired by https://github.com/darktohka/pytesseract-docker/blob/master/Dockerfile RUN apk update RUN apk add firefox xvfb scrot vips-tools xterm xdotool fluxbox xprop imagemagick bash tesseract-ocr terminus-font ttf-inconsolata ttf-dejavu font-noto font-noto ttf-font-awesome font-noto-extra ENV SHELL /bin/sh ENV CC /usr/bin/clang ENV CXX /usr/bin/clang++ ENV LANG C.UTF-8 ENV PYTHONUNBUFFERED 1 ENV PIP_DISABLE_PIP_VERSION_CHECK 1 ENV PIP_NO_CACHE_DIR 0 ENV TESSDATA_PREFIX /usr/local/share/tessdata WORKDIR /tmp RUN apk add --no-cache openssl leptonica openjpeg tiff libpng zlib freetype libgcc libstdc++ # Install development tools RUN apk add --no-cache --virtual .dev-deps file linux-headers git make automake autoconf libtool pkgconfig clang g++ openssl-dev leptonica-dev openjpeg-dev tiff-dev libpng-dev zlib-dev freetype-dev # Install Tesseract from master RUN mkdir /usr/local/share/tessdata RUN mkdir tesseract WORKDIR /tmp/tesseract RUN wget https://github.com/tesseract-ocr/tessdata_fast/raw/main/eng.traineddata -P "$TESSDATA_PREFIX" RUN git clone --depth 1 https://github.com/tesseract-ocr/tesseract.git . RUN ./autogen.sh RUN ./configure RUN make -j$(nproc) RUN make install # Install Python dependencies RUN pip install -U --no-cache-dir pytesseract # Cleanup RUN apk del .dev-deps RUN rm -f /usr/local/lib/*.a RUN rm -rf /tmp/* /var/cache/apk/* RUN apk add jq sed nodejs COPY ./mozilla /root/.mozilla RUN echo https://dl-cdn.alpinelinux.org/alpine/edge/testing >> /etc/apk/repositories RUN apk update RUN apk add wmctrl git make gcc musl-dev libx11-dev zlib zlib-dev jpeg-dev RUN git clone https://github.com/muquit/grabc && cd grabc && make && make install RUN apk add clang RUN apk add freetype-dev RUN python3 -m pip install --upgrade Pillow RUN apk add zip COPY . /opt CMD /opt/prepare-firefox.sh WORKDIR /opt