Skip to content

Commit

Permalink
feat: data processing extract base image docker file R packages
Browse files Browse the repository at this point in the history
  • Loading branch information
modsen-hedgehog committed Feb 16, 2025
1 parent 779cd84 commit ac50b01
Show file tree
Hide file tree
Showing 4 changed files with 141 additions and 145 deletions.
150 changes: 14 additions & 136 deletions server/workers/dataprocessing/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,137 +1,14 @@
FROM ubuntu:18.04
FROM r-base:latest

LABEL maintainer="Chris Kittel <christopher.kittel@openknowledgemaps.org>"

ENV DEBIAN_FRONTEND=noninteractive

ARG R_VERSION
ARG BUILD_DATE
ARG CRAN
## Setting a BUILD_DATE will set CRAN to the matching MRAN date
## No BUILD_DATE means that CRAN will default to latest
ENV R_VERSION=${R_VERSION:-3.6.3} \
CRAN=${CRAN:-https://cran.rstudio.com}

RUN apt-get update \
&& apt-get install -y --no-install-recommends \
bash-completion \
ca-certificates \
file \
fonts-texgyre \
g++ \
gfortran \
gsfonts \
libblas-dev \
libbz2-1.0 \
libcurl4 \
libjpeg-turbo8-dev \
libopenblas-dev \
libpangocairo-1.0-0 \
libpcre3 \
libpng16-16 \
libreadline7 \
libtiff5 \
liblzma5 \
locales \
make \
unzip \
zip \
zlib1g \
&& echo "en_US.UTF-8" >> /etc/locale.gen \
&& locale-gen en_US.UTF-8 \
&& /usr/sbin/update-locale LANG=en_US.UTF-8 \
&& BUILDDEPS="curl \
default-jdk \
libbz2-dev \
libcairo2-dev \
libcurl4-openssl-dev \
libpango1.0-dev \
libjpeg-dev \
libpcre3-dev \
libpng-dev \
libreadline-dev \
libtiff5-dev \
liblzma-dev \
libx11-dev \
libxt-dev \
perl \
tcl8.6-dev \
tk8.6-dev \
x11proto-core-dev \
xauth \
xfonts-base \
xvfb \
zlib1g-dev" \
&& apt-get install -y --no-install-recommends $BUILDDEPS \
&& cd tmp/ \
## Download source code
&& curl -O https://cran.r-project.org/src/base/R-3/R-${R_VERSION}.tar.gz \
## Extract source code
&& tar -xf R-${R_VERSION}.tar.gz \
&& cd R-${R_VERSION} \
## Set compiler flags
&& R_PAPERSIZE=letter \
R_BATCHSAVE="--no-save --no-restore" \
R_BROWSER=xdg-open \
PAGER=/usr/bin/pager \
PERL=/usr/bin/perl \
R_UNZIPCMD=/usr/bin/unzip \
R_ZIPCMD=/usr/bin/zip \
R_PRINTCMD=/usr/bin/lpr \
LIBnn=lib \
AWK=/usr/bin/awk \
CFLAGS="-g -O2 -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -g" \
CXXFLAGS="-g -O2 -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -g" \
## Configure options
./configure --enable-R-shlib \
--enable-memory-profiling \
--with-readline \
--with-blas \
--with-tcltk \
--disable-nls \
--with-recommended-packages \
## Build and install
&& make \
&& make install \
## Add a library directory (for user-installed packages)
&& mkdir -p /usr/local/lib/R/site-library \
&& chown root:staff /usr/local/lib/R/site-library \
&& chmod g+ws /usr/local/lib/R/site-library \
## Fix library path
&& sed -i '/^R_LIBS_USER=.*$/d' /usr/local/lib/R/etc/Renviron \
&& echo "R_LIBS_USER=\${R_LIBS_USER-'/usr/local/lib/R/site-library'}" >> /usr/local/lib/R/etc/Renviron \
&& echo "R_LIBS=\${R_LIBS-'/usr/local/lib/R/site-library:/usr/local/lib/R/library:/usr/lib/R/library'}" >> /usr/local/lib/R/etc/Renviron \
## Set configured CRAN mirror
&& if [ -z "$BUILD_DATE" ]; then MRAN=$CRAN; \
else MRAN=https://mran.microsoft.com/snapshot/${BUILD_DATE}; fi \
&& echo MRAN=$MRAN >> /etc/environment \
&& echo "options(repos = c(CRAN='$MRAN'), download.file.method = 'libcurl')" >> /usr/local/lib/R/etc/Rprofile.site \
## Use littler installation scripts
&& Rscript -e "install.packages(c('littler', 'docopt'), repo = '$CRAN')" \
&& ln -s /usr/local/lib/R/site-library/littler/examples/install2.r /usr/local/bin/install2.r \
&& ln -s /usr/local/lib/R/site-library/littler/examples/installGithub.r /usr/local/bin/installGithub.r \
&& ln -s /usr/local/lib/R/site-library/littler/bin/r /usr/local/bin/r \
## Clean up from R source install
&& cd / \
&& rm -rf /tmp/* \
&& apt-get remove --purge -y $BUILDDEPS \
&& apt-get autoremove -y \
&& apt-get autoclean -y \
&& rm -rf /var/lib/apt/lists/*

RUN locale-gen en_US.UTF-8 && \
update-locale LANG=en_US.UTF-8 && \
update-locale LC_ALL=en_US.UTF-8 && \
export LANGUAGE=en_US.UTF-8 && \
export LANG=en_US.UTF-8 && \
export LC_ALL=en_US.UTF-8 && \
dpkg-reconfigure locales

RUN apt update && apt full-upgrade -y && \
apt install -y links curl vim libcurl4-openssl-dev \
libxml2-dev libz-dev libpoppler-cpp-dev \
libopenmpi-dev libzmq3-dev build-essential python3-dev \
libssl1.1 libssl-dev && \
libxml2-dev libz-dev libpoppler-cpp-dev \
libopenmpi-dev libzmq3-dev build-essential python3-dev \
libssl1.1 libssl-dev && \
apt clean && \
rm -f /etc/localtime && \
ln -s /usr/share/zoneinfo/Europe/Vienna /etc/localtime && \
Expand All @@ -140,29 +17,30 @@ RUN apt update && apt full-upgrade -y && \
RUN apt-get -y install python3 python3-pip

RUN R -e 'options(repos="https://cran.wu.ac.at")' && \
R -e 'install.packages("remotes")' && \
R -e 'install.packages("renv", version="0.14.0-5")'
R -e 'install.packages("remotes")' && \
R -e 'install.packages("renv", version="0.14.0-5")'

WORKDIR /headstart

COPY workers/dataprocessing/requirements.txt .
RUN pip3 install --no-cache-dir Cython
RUN pip3 install --upgrade pip
RUN pip3 install --no-cache-dir -r requirements.txt
RUN pip3 install --no-cache-dir Cython && \
pip3 install --upgrade pip && \
pip3 install --no-cache-dir -r requirements.txt

COPY workers/dataprocessing/renv.lock .
COPY workers/dataprocessing/activate.R .

RUN R -e 'renv::consent(provided = TRUE)' && \
R -e 'setwd("./"); renv::activate(); renv::restore(lockfile = "./renv.lock")'
R -e 'setwd("./"); renv::activate(); renv::restore(lockfile = "./renv.lock")'

COPY workers/common ../common
COPY workers/dataprocessing/requirements-e.txt .
RUN pip3 install --no-cache-dir -r requirements-e.txt

COPY workers/dataprocessing ./dataprocessing
COPY preprocessing/resources ./resources
COPY preprocessing/other-scripts ./other-scripts
RUN mkdir -p /var/log/headstart && touch /var/log/headstart/headstart.log

COPY workers/dataprocessing/*.py ./
ENTRYPOINT python3 run_dataprocessing.py

ENTRYPOINT ["python3", "run_dataprocessing.py"]
119 changes: 119 additions & 0 deletions server/workers/dataprocessing/Dockerfile.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
# Dockerfile.r
FROM ubuntu:18.04

LABEL maintainer="Chris Kittel <christopher.kittel@openknowledgemaps.org>"

ENV DEBIAN_FRONTEND=noninteractive

ARG R_VERSION
ARG BUILD_DATE
ARG CRAN
ENV R_VERSION=${R_VERSION:-3.6.3} \
CRAN=${CRAN:-https://cran.rstudio.com}

RUN apt-get update && apt-get install -y --no-install-recommends \
bash-completion \
ca-certificates \
file \
fonts-texgyre \
g++ \
gfortran \
gsfonts \
libblas-dev \
libbz2-1.0 \
libcurl4 \
libjpeg-turbo8-dev \
libopenblas-dev \
libpangocairo-1.0-0 \
libpcre3 \
libpng16-16 \
libreadline7 \
libtiff5 \
liblzma5 \
locales \
make \
unzip \
zip \
zlib1g \
&& echo "en_US.UTF-8" >> /etc/locale.gen \
&& locale-gen en_US.UTF-8 \
&& /usr/sbin/update-locale LANG=en_US.UTF-8

ENV BUILDDEPS="curl \
default-jdk \
libbz2-dev \
libcairo2-dev \
libcurl4-openssl-dev \
libpango1.0-dev \
libjpeg-dev \
libpcre3-dev \
libpng-dev \
libreadline-dev \
libtiff5-dev \
liblzma-dev \
libx11-dev \
libxt-dev \
perl \
tcl8.6-dev \
tk8.6-dev \
x11proto-core-dev \
xauth \
xfonts-base \
xvfb \
zlib1g-dev"

RUN apt-get install -y --no-install-recommends $BUILDDEPS

RUN mkdir -p /tmp/build && cd /tmp/build \
&& curl -O https://cran.r-project.org/src/base/R-3/R-${R_VERSION}.tar.gz \
&& tar -xf R-${R_VERSION}.tar.gz \
&& cd R-${R_VERSION} \
&& R_PAPERSIZE=letter \
R_BATCHSAVE="--no-save --no-restore" \
R_BROWSER=xdg-open \
PAGER=/usr/bin/pager \
PERL=/usr/bin/perl \
R_UNZIPCMD=/usr/bin/unzip \
R_ZIPCMD=/usr/bin/zip \
R_PRINTCMD=/usr/bin/lpr \
LIBnn=lib \
AWK=/usr/bin/awk \
CFLAGS="-g -O2 -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -g" \
CXXFLAGS="-g -O2 -fstack-protector-strong -Wformat -Werror=format-security -Wdate-time -D_FORTIFY_SOURCE=2 -g" \
./configure --enable-R-shlib \
--enable-memory-profiling \
--with-readline \
--with-blas \
--with-tcltk \
--disable-nls \
--with-recommended-packages \
&& make \
&& make install

RUN mkdir -p /usr/local/lib/R/site-library \
&& chown root:staff /usr/local/lib/R/site-library \
&& chmod g+ws /usr/local/lib/R/site-library \
&& sed -i '/^R_LIBS_USER=.*$/d' /usr/local/lib/R/etc/Renviron \
&& echo "R_LIBS_USER=\${R_LIBS_USER-'/usr/local/lib/R/site-library'}" >> /usr/local/lib/R/etc/Renviron \
&& echo "R_LIBS=\${R_LIBS-'/usr/local/lib/R/site-library:/usr/local/lib/R/library:/usr/lib/R/library'}" >> /usr/local/lib/R/etc/Renviron \
&& if [ -z "$BUILD_DATE" ]; then MRAN=$CRAN; else MRAN=https://mran.microsoft.com/snapshot/${BUILD_DATE}; fi \
&& echo MRAN=$MRAN >> /etc/environment \
&& echo "options(repos = c(CRAN='$MRAN'), download.file.method = 'libcurl')" >> /usr/local/lib/R/etc/Rprofile.site

RUN Rscript -e "install.packages(c('littler', 'docopt'), repo = '$CRAN')" \
&& ln -s /usr/local/lib/R/site-library/littler/examples/install2.r /usr/local/bin/install2.r \
&& ln -s /usr/local/lib/R/site-library/littler/examples/installGithub.r /usr/local/bin/installGithub.r \
&& ln -s /usr/local/lib/R/site-library/littler/bin/r /usr/local/bin/r

RUN cd / && rm -rf /tmp/build \
&& apt-get remove --purge -y $BUILDDEPS \
&& apt-get autoremove -y \
&& apt-get autoclean -y \
&& rm -rf /var/lib/apt/lists/*

RUN locale-gen en_US.UTF-8 && \
update-locale LANG=en_US.UTF-8 && \
update-locale LC_ALL=en_US.UTF-8 && \
dpkg-reconfigure locales

CMD ["R", "--version"]
8 changes: 8 additions & 0 deletions server/workers/dataprocessing/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash
set -e

# Build the R base image from Dockerfile.r (context set to ../..)
docker build -t r-base:latest -f Dockerfile.r ../..

# Build the main dataprocessing image using the prebuilt R base image
docker build -t headstart-dataprocessing:latest -f Dockerfile ../..
9 changes: 0 additions & 9 deletions server/workers/openaire/Dockerfile.r
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# Dockerfile.r
FROM ubuntu:20.04

LABEL maintainer="Chris Kittel <christopher.kittel@openknowledgemaps.org>"
Expand All @@ -8,12 +7,9 @@ ENV DEBIAN_FRONTEND=noninteractive
ARG R_VERSION
ARG BUILD_DATE
ARG CRAN
## Setting a BUILD_DATE will set CRAN to the matching MRAN date.
## No BUILD_DATE means that CRAN will default to latest.
ENV R_VERSION=${R_VERSION:-3.6.3} \
CRAN=${CRAN:-https://cran.rstudio.com}

# Install system dependencies for R and its build process
RUN apt-get update && apt-get install -y --no-install-recommends \
bash-completion \
ca-certificates \
Expand Down Expand Up @@ -62,7 +58,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
zlib1g-dev" \
&& apt-get install -y --no-install-recommends $BUILDDEPS

# Download, build, and install R from source
RUN mkdir -p /tmp/build && cd /tmp/build \
&& curl -O https://cran.r-project.org/src/base/R-3/R-${R_VERSION}.tar.gz \
&& tar -xf R-${R_VERSION}.tar.gz \
Expand All @@ -88,7 +83,6 @@ RUN mkdir -p /tmp/build && cd /tmp/build \
--with-recommended-packages \
&& make && make install

# Configure library directories and CRAN mirror settings; install littler
RUN mkdir -p /usr/local/lib/R/site-library \
&& chown root:staff /usr/local/lib/R/site-library \
&& chmod g+ws /usr/local/lib/R/site-library \
Expand All @@ -103,18 +97,15 @@ RUN mkdir -p /usr/local/lib/R/site-library \
&& ln -s /usr/local/lib/R/site-library/littler/examples/installGithub.r /usr/local/bin/installGithub.r \
&& ln -s /usr/local/lib/R/site-library/littler/bin/r /usr/local/bin/r

# Clean up build dependencies and temporary files
RUN rm -rf /tmp/build \
&& apt-get remove --purge -y $BUILDDEPS \
&& apt-get autoremove -y \
&& apt-get autoclean -y \
&& rm -rf /var/lib/apt/lists/*

# Generate and configure locales
RUN locale-gen en_US.UTF-8 && \
update-locale LANG=en_US.UTF-8 && \
update-locale LC_ALL=en_US.UTF-8 && \
dpkg-reconfigure locales

# Expose R version for verification
CMD ["R", "--version"]

0 comments on commit ac50b01

Please sign in to comment.