FROM python:3.10-bookworm

RUN curl -sS https://packages.microsoft.com/keys/microsoft.asc | apt-key add -
RUN curl -sS https://packages.microsoft.com/config/debian/11/prod.list > /etc/apt/sources.list.d/mssql-release.list

# Install Dependencies (listed in alphabetical order)
RUN dpkg --configure -a \
    && apt-get -qq update \
    && apt-get -qq install -y \
    alien \
    build-essential \
    default-libmysqlclient-dev \
    freetds-bin \
    freetds-dev \
    gcc \
    gnupg \
    libaio1 \
    libevent-dev \
    libffi-dev \
    libpq-dev \
    librdkafka-dev \
    libsasl2-dev \
    libsasl2-2 \
    libsasl2-modules \
    libsasl2-modules-gssapi-mit \
    libssl-dev \
    libxml2 \
    libkrb5-dev \
    default-jdk \
    openssl \
    # To ensure compatibility with unixodbc package
    odbcinst=2.3.11-2+deb12u1 \
    postgresql \
    postgresql-contrib \
    tdsodbc \
    unixodbc=2.3.11-2+deb12u1 \
    unixodbc-dev=2.3.11-2+deb12u1 \
    unzip \
    git \
    wget --no-install-recommends \
    # Accept MSSQL ODBC License
    && ACCEPT_EULA=Y apt-get -qq install -y msodbcsql18 \
    && rm -rf /var/lib/apt/lists/*

# Add updated postgres/redshift dependencies based on libq
ENV DEBIAN_FRONTEND=noninteractive
RUN curl https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add -
RUN echo "deb https://apt.postgresql.org/pub/repos/apt/ bookworm-pgdg main" > /etc/apt/sources.list.d/pgdg.list; \
    apt-get -qq update; \
    apt-get -qq install --no-install-recommends -y libpq-dev postgresql-client postgresql-common postgresql postgresql-contrib; \
    apt-get -qq autoremove -yqq --purge; \
    apt-get -qq clean && rm -rf /var/lib/apt/lists/*

RUN if [ $(uname -m) = "arm64" || $(uname -m) = "aarch64" ]; \
 then \
 wget -q https://download.oracle.com/otn_software/linux/instantclient/191000/instantclient-basic-linux.arm64-19.10.0.0.0dbru.zip -O /oracle-instantclient.zip && \
 unzip -qq -d /instantclient -j /oracle-instantclient.zip && rm -f /oracle-instantclient.zip; \
 else \
 wget -q https://download.oracle.com/otn_software/linux/instantclient/1917000/instantclient-basic-linux.x64-19.17.0.0.0dbru.zip -O /oracle-instantclient.zip && \
 unzip -qq -d /instantclient -j /oracle-instantclient.zip && rm -f /oracle-instantclient.zip; \
 fi

ENV LD_LIBRARY_PATH=/instantclient

# Install DB2 iAccess driver
RUN if [ $(uname -m) = "x86_64" ]; \
  then \
  curl https://public.dhe.ibm.com/software/ibmi/products/odbc/debs/dists/1.1.0/ibmi-acs-1.1.0.list | tee /etc/apt/sources.list.d/ibmi-acs-1.1.0.list \
  && apt update \
  && apt install ibm-iaccess; \
  fi

WORKDIR ingestion/

# Required for Airflow DockerOperator, as we need to run the workflows from a `python main.py` command in the container.
COPY ingestion/operators/docker/*.py .

# Create a non-root user with a writable home directory.
# When the container runs with securityContext runAsUser: 1000 but no
# /etc/passwd entry for that UID, Python disables user site-packages.
# This causes runtime failures when tools like spacy download models
# via pip --user, as the installed packages are invisible to the import
# system. Creating the user ensures Python recognises UID 1000 and
# enables the standard ~/.local install path.
RUN groupadd -g 1000 openmetadata && useradd -m -u 1000 -g 1000 openmetadata
RUN chown -R openmetadata:openmetadata /ingestion
ENV HOME=/home/openmetadata
ENV PATH="/home/openmetadata/.local/bin:${PATH}"
USER openmetadata

# Disable pip cache dir
# https://pip.pypa.io/en/stable/topics/caching/#avoiding-caching
ENV PIP_NO_CACHE_DIR=1
# Make pip silent
ENV PIP_QUIET=1

# Pin setuptools<81 as pkg_resources was removed in setuptools 81.0.0+
# cx-Oracle's setup.py still uses pkg_resources
RUN pip install --upgrade pip "setuptools<81"
# Pre-install cx-Oracle without build isolation to use the pinned setuptools
RUN pip install --no-build-isolation "cx_Oracle>=8.3.0,<9"

ARG INGESTION_DEPENDENCY="all"
ARG RI_VERSION="1.12.6.0"
RUN pip install "openmetadata-ingestion[airflow]~=${RI_VERSION}"
RUN pip install "openmetadata-ingestion[${INGESTION_DEPENDENCY}]~=${RI_VERSION}"


# Temporary workaround for https://github.com/open-metadata/OpenMetadata/issues/9593
RUN [ $(uname -m) = "x86_64" ] \
  && pip install "openmetadata-ingestion[db2]~=${RI_VERSION}" \
  || echo "DB2 not supported on ARM architectures."

# Uninstalling psycopg2-binary and installing psycopg2 instead
# because the psycopg2-binary generates a architecture specific error
# while authenticating connection with the airflow, psycopg2 solves this error
RUN pip uninstall psycopg2-binary -y
RUN pip install psycopg2 mysqlclient==2.1.1
