Docker Base Image Begin

This commit is contained in:
2024-10-04 15:55:21 +02:00
commit d54ad0c22a
10 changed files with 320 additions and 0 deletions

View File

@ -0,0 +1,27 @@
name: Build and push Docker image in git push
on:
push:
tags:
- '*'
env:
IMAGE_REGISTRY: git.chai.uni-hamburg.de
jobs:
build:
runs-on: ubuntu-latest
steps:
- name: Get repository code
uses: actions/checkout@v4
- name: Build the images
run: bash ./build-images.sh -no-updates
- name: Docker login
uses: docker/login-action@v3
with:
registry: ${{ env.IMAGE_REGISTRY }}
username: ${{ secrets.REGISTRY_USERNAME }}
password: ${{ secrets.REGISTRY_TOKEN }}
- name: Push the images
run: bash ./push-images.sh

53
build-images.sh Executable file
View File

@ -0,0 +1,53 @@
#/bin/bash
# https://stackoverflow.com/a/4774063
SCRIPTPATH="$(cd -- "$(dirname "$0")" >/dev/null 2>&1; pwd -P)"
source "$SCRIPTPATH/vars.sh"
requirements="requirements-frozen.txt"
if [ "$1" != "-no-updates" ]; then
echo "Update the depencendies in requirements.txt? [may break app] (y/n)"
read newlockfile
if [ "$newlockfile" == "y" ]; then
requirements="requirements.txt"
fi;
fi;
for platform in $PLATFORMS; do
if [ "$platform" == "gpu" ]; then
platform="amd64"
from_image="tmp_local/gpu_baseimage:amd64"
tag_image="$IMAGE_OWNER/$IMAGE_NAME:gpu-amd64"
docker build \
--pull \
--platform "linux/$platform" \
--file "$SCRIPTPATH/docker/Dockerfile.gpu" \
--tag "$from_image" \
"$SCRIPTPATH"
else
from_image="$CPU_BASEIMAGE"
tag_image="$IMAGE_OWNER/$IMAGE_NAME:cpu-$platform"
docker pull --platform "linux/$platform" "$BASEIMAGE"
fi;
docker build \
--platform "linux/$platform" \
--file "$SCRIPTPATH/docker/Dockerfile" \
--build-arg FROM_IMAGE="$from_image" \
--build-arg H_UID=1050 \
--build-arg PIP_REQ_FILE="$requirements" \
--build-arg H_GID=1050 \
--tag "$tag_image" \
"$SCRIPTPATH"
done;
if [ "$requirements" == "requirements.txt" ]; then
# extract requirements-frozen.txt
cid=$(docker create "tag_image")
docker cp "$cid:/ums-agenten/requirements.txt" "$SCRIPTPATH/docker/requirements-frozen.txt"
docker rm "$cid"
fi;

51
docker/Dockerfile Normal file
View File

@ -0,0 +1,51 @@
ARG FROM_IMAGE
ARG H_GID
ARG H_UID
ARG PIP_REQ_FILE
FROM $FROM_IMAGE
RUN apt update && \
DEBIAN_FRONTEND=noninteractive TZ=Europe/Berlin apt-get install -y tzdata \
&& cp /usr/share/zoneinfo/Europe/Berlin /etc/localtime \
&& echo "Europe/Berlin" > /etc/timezone
RUN apt install -y bash \
build-essential \
git \
curl \
ca-certificates \
python3-dev \
python3-pip
RUN ln -s /usr/bin/python3 /usr/local/bin/python \
&& addgroup --gid $H_GID user \
&& adduser user --uid $H_UID --ingroup user --gecos "" --home /home/user/ --disabled-password
RUN apt-get update \
&& apt-get install -y \
bash nano vim mc \
curl wget \
htop screen
RUN python3 -m pip install --no-cache-dir --break-system-packages \
torch --index-url https://download.pytorch.org/whl/cpu
RUN mkdir /ums-agenten/
ENV NLTK_DATA=/ums-agenten/nltk/
ENV HF_HOME=/ums-agenten/hf_home/
COPY ./docker/$PIP_REQ_FILE /ums-agenten/requirements.txt
RUN pip3 install --break-system-packages --no-cache-dir -r /ums-agenten/requirements.txt \
&& python -m pip freeze > /ums-agenten/requirements.txt
COPY ./docker/init.py /ums-agenten/project/
COPY ./docker/setup.py /ums-agenten/project/
RUN pip3 install --break-system-packages -e /ums-agenten/project/
WORKDIR /ums-agenten/project/src/
RUN chown -R user:user /ums-agenten
USER user
CMD ["/bin/bash"]

34
docker/Dockerfile.gpu Normal file
View File

@ -0,0 +1,34 @@
# file initially based on https://github.com/huggingface/transformers/tree/master/docker
# FROM nvidia/cuda:11.1.1-cudnn8-devel-ubuntu20.04
# FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
FROM nvidia/cuda:12.6.1-cudnn-devel-ubuntu24.04
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive TZ=Europe/Berlin apt-get install -y tzdata \
&& cp /usr/share/zoneinfo/Europe/Berlin /etc/localtime \
&& echo "Europe/Berlin" > /etc/timezone
RUN apt update && \
apt install -y bash \
build-essential \
git \
curl \
ca-certificates \
python3-dev \
python3-pip \
libopenmpi-dev && \
rm -rf /var/lib/apt/lists
RUN python3 -m pip install --no-cache-dir \
torch --index-url https://download.pytorch.org/whl/cu124
RUN git clone https://github.com/NVIDIA/apex
RUN cd apex && \
pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" ./
# fix library path for tensorflow and link a file
ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/cuda/lib64"
RUN ln -s /usr/local/cuda/lib64/libcusolver.so.11 /usr/local/cuda/lib64/libcusolver.so.10
CMD ["/bin/bash"]

12
docker/init.py Normal file
View File

@ -0,0 +1,12 @@
#!/usr/bin/env python3
import nltk, os
def init_nltk():
nltk.download('punkt', download_dir=os.getenv('NLTK_DATA')) # used for tokenize
nltk.download('stopwords', download_dir=os.getenv('NLTK_DATA')) # used for stopwords list
nltk.download('wordnet', download_dir=os.getenv('NLTK_DATA')) # used for wordnet mapping
nltk.download('omw-1.4', download_dir=os.getenv('NLTK_DATA')) # used for wordnet mapping
if __name__ == "__main__":
init_nltk();

View File

@ -0,0 +1,73 @@
accelerate==0.31.0
aiohttp==3.9.5
aiosignal==1.3.1
apricot-select==0.6.1
attrs==23.2.0
beautifulsoup4==4.12.3
certifi==2024.6.2
charset-normalizer==3.3.2
click==8.1.7
contourpy==1.2.1
cycler==0.12.1
datasets==2.20.0
dill==0.3.8
faiss-cpu==1.8.0
filelock==3.13.1
fonttools==4.53.0
frozenlist==1.4.1
fsspec==2024.2.0
gensim==4.3.2
huggingface-hub==0.23.4
idna==3.7
Jinja2==3.1.3
joblib==1.4.2
kiwisolver==1.4.5
llvmlite==0.43.0
MarkupSafe==2.1.5
matplotlib==3.9.0
mpmath==1.3.0
multidict==6.0.5
multiprocess==0.70.16
networkx==3.2.1
nltk==3.8.1
nose==1.3.7
numba==0.60.0
numpy==2.0.0
packaging==24.1
pandas==2.2.2
pdoc==14.5.0
peft==0.11.1
pillow==10.3.0
pomegranate==1.0.4
psutil==6.0.0
pyarrow==16.1.0
pyarrow-hotfix==0.6
Pygments==2.18.0
pyparsing==3.1.2
PyPDF2==3.0.1
python-dateutil==2.9.0.post0
pytz==2024.1
PyYAML==6.0.1
regex==2024.5.15
requests==2.32.3
safetensors==0.4.3
scikit-learn==1.5.0
scipy==1.13.1
setuptools==68.1.2
six==1.16.0
smart-open==7.0.4
soupsieve==2.5
sympy==1.12
threadpoolctl==3.5.0
tokenizers==0.19.1
torch==2.3.1
tqdm==4.66.4
transformers==4.41.2
typing_extensions==4.9.0
tzdata==2024.1
urllib3==2.2.2
wheel==0.42.0
wikipedia==1.4.0
wrapt==1.16.0
xxhash==3.4.1
yarl==1.9.4

23
docker/requirements.txt Normal file
View File

@ -0,0 +1,23 @@
tqdm
pdoc
requests
numpy
gensim
scikit-learn
nltk
wikipedia
PyPDF2
pomegranate
matplotlib
#mpi4py
#deepspeed
transformers
huggingface_hub
faiss-cpu
datasets
accelerate
peft

11
docker/setup.py Normal file
View File

@ -0,0 +1,11 @@
#!/usr/bin/env python3
from setuptools import find_packages, setup
setup(
name='src',
packages=find_packages(),
version='0.0.0',
description='Environment for usage with the agent seminar at Uni Muenster 2024/25.',
author='Magnus Bender'
)

27
push-images.sh Executable file
View File

@ -0,0 +1,27 @@
#/bin/bash
# https://stackoverflow.com/a/4774063
SCRIPTPATH="$(cd -- "$(dirname "$0")" >/dev/null 2>&1; pwd -P)"
source "$SCRIPTPATH/vars.sh"
day_tag=$(date '+%Y-%m-%d')
docker image ls "$IMAGE_OWNER/$IMAGE_NAME" --format '{{.Tag}}' | while read image_tag ;
do
if [[ "$image_tag" =~ ^((gpu)|(cpu))-((arm64)|(amd64))$ ]];
then
echo "Push:"
echo " $IMAGE_REGISTRY/$IMAGE_OWNER/$IMAGE_NAME:$image_tag"
echo " $IMAGE_REGISTRY/$IMAGE_OWNER/$IMAGE_NAME:$image_tag-$day_tag"
docker push "$IMAGE_REGISTRY/$IMAGE_OWNER/$IMAGE_NAME:$image_tag"
docker tag "$IMAGE_REGISTRY/$IMAGE_OWNER/$IMAGE_NAME:$image_tag" \
"$IMAGE_REGISTRY/$IMAGE_OWNER/$IMAGE_NAME:$image_tag-$day_tag"
docker push "$IMAGE_REGISTRY/$IMAGE_OWNER/$IMAGE_NAME:$image_tag-$day_tag"
fi;
done

9
vars.sh Executable file
View File

@ -0,0 +1,9 @@
#/bin/bash
# variables used by "*-images.sh" files
IMAGE_REGISTRY="git.chai.uni-hamburg.de"
IMAGE_OWNER="ums-agenten"
IMAGE_NAME="base-image"
PLATFORMS="arm64 amd64 gpu"
CPU_BASEIMAGE="ubuntu:24.04"