FROM docker.io/python:3.11.0a5-alpine3.15

# inspired by https://github.com/darktohka/pytesseract-docker/blob/master/Dockerfile

RUN apk update
RUN apk add firefox xvfb scrot vips-tools xterm xdotool fluxbox xprop imagemagick bash tesseract-ocr terminus-font ttf-inconsolata ttf-dejavu font-noto font-noto ttf-font-awesome font-noto-extra


ENV SHELL /bin/sh
ENV CC /usr/bin/clang
ENV CXX /usr/bin/clang++
ENV LANG C.UTF-8
ENV PYTHONUNBUFFERED 1
ENV PIP_DISABLE_PIP_VERSION_CHECK 1
ENV PIP_NO_CACHE_DIR 0
ENV TESSDATA_PREFIX /usr/local/share/tessdata
WORKDIR /tmp

RUN apk add --no-cache openssl leptonica openjpeg tiff libpng zlib freetype libgcc libstdc++
# Install development tools
RUN apk add --no-cache --virtual .dev-deps file linux-headers git make automake autoconf libtool pkgconfig clang g++ openssl-dev leptonica-dev openjpeg-dev tiff-dev libpng-dev zlib-dev freetype-dev
# Install Tesseract from master
RUN mkdir /usr/local/share/tessdata
RUN mkdir tesseract

WORKDIR /tmp/tesseract

RUN wget https://github.com/tesseract-ocr/tessdata_fast/raw/main/eng.traineddata -P "$TESSDATA_PREFIX"
RUN git clone --depth 1 https://github.com/tesseract-ocr/tesseract.git .
RUN ./autogen.sh
RUN ./configure
RUN make -j$(nproc)
RUN make install
# Install Python dependencies
RUN pip install -U --no-cache-dir pytesseract
# Cleanup
RUN apk del .dev-deps
RUN rm -f /usr/local/lib/*.a
RUN rm -rf /tmp/* /var/cache/apk/*
RUN apk add jq sed nodejs
COPY ./mozilla /root/.mozilla
RUN echo https://dl-cdn.alpinelinux.org/alpine/edge/testing >> /etc/apk/repositories
RUN apk update
RUN apk add wmctrl git make gcc musl-dev libx11-dev zlib zlib-dev jpeg-dev
RUN git clone https://github.com/muquit/grabc && cd grabc && make && make install
RUN apk add clang
RUN apk add freetype-dev
RUN python3 -m pip install --upgrade Pillow
RUN apk add zip
RUN apk add xclip

COPY . /opt
CMD /opt/prepare-firefox.sh

WORKDIR /opt