From d54ad0c22ad11b20e6a8675dfb5d3c689e1bcc93 Mon Sep 17 00:00:00 2001 From: KIMB-technologies Date: Fri, 4 Oct 2024 15:55:21 +0200 Subject: [PATCH] Docker Base Image Begin --- .gitea/workflows/docker-build.yml | 27 ++++++++++++ build-images.sh | 53 ++++++++++++++++++++++ docker/Dockerfile | 51 +++++++++++++++++++++ docker/Dockerfile.gpu | 34 ++++++++++++++ docker/init.py | 12 +++++ docker/requirements-frozen.txt | 73 +++++++++++++++++++++++++++++++ docker/requirements.txt | 23 ++++++++++ docker/setup.py | 11 +++++ push-images.sh | 27 ++++++++++++ vars.sh | 9 ++++ 10 files changed, 320 insertions(+) create mode 100644 .gitea/workflows/docker-build.yml create mode 100755 build-images.sh create mode 100644 docker/Dockerfile create mode 100644 docker/Dockerfile.gpu create mode 100644 docker/init.py create mode 100644 docker/requirements-frozen.txt create mode 100644 docker/requirements.txt create mode 100644 docker/setup.py create mode 100755 push-images.sh create mode 100755 vars.sh diff --git a/.gitea/workflows/docker-build.yml b/.gitea/workflows/docker-build.yml new file mode 100644 index 0000000..58f4fcc --- /dev/null +++ b/.gitea/workflows/docker-build.yml @@ -0,0 +1,27 @@ +name: Build and push Docker image in git push +on: + push: + tags: + - '*' + +env: + IMAGE_REGISTRY: git.chai.uni-hamburg.de + +jobs: + build: + runs-on: ubuntu-latest + steps: + + - name: Get repository code + uses: actions/checkout@v4 + - name: Build the images + run: bash ./build-images.sh -no-updates + - name: Docker login + uses: docker/login-action@v3 + with: + registry: ${{ env.IMAGE_REGISTRY }} + username: ${{ secrets.REGISTRY_USERNAME }} + password: ${{ secrets.REGISTRY_TOKEN }} + - name: Push the images + run: bash ./push-images.sh + \ No newline at end of file diff --git a/build-images.sh b/build-images.sh new file mode 100755 index 0000000..3b8fdb7 --- /dev/null +++ b/build-images.sh @@ -0,0 +1,53 @@ +#/bin/bash + +# https://stackoverflow.com/a/4774063 +SCRIPTPATH="$(cd -- "$(dirname "$0")" >/dev/null 2>&1; pwd -P)" + +source "$SCRIPTPATH/vars.sh" + +requirements="requirements-frozen.txt" +if [ "$1" != "-no-updates" ]; then + echo "Update the depencendies in requirements.txt? [may break app] (y/n)" + read newlockfile + if [ "$newlockfile" == "y" ]; then + requirements="requirements.txt" + fi; +fi; + +for platform in $PLATFORMS; do + if [ "$platform" == "gpu" ]; then + platform="amd64" + from_image="tmp_local/gpu_baseimage:amd64" + tag_image="$IMAGE_OWNER/$IMAGE_NAME:gpu-amd64" + + docker build \ + --pull \ + --platform "linux/$platform" \ + --file "$SCRIPTPATH/docker/Dockerfile.gpu" \ + --tag "$from_image" \ + "$SCRIPTPATH" + else + from_image="$CPU_BASEIMAGE" + tag_image="$IMAGE_OWNER/$IMAGE_NAME:cpu-$platform" + + docker pull --platform "linux/$platform" "$BASEIMAGE" + fi; + + docker build \ + --platform "linux/$platform" \ + --file "$SCRIPTPATH/docker/Dockerfile" \ + --build-arg FROM_IMAGE="$from_image" \ + --build-arg H_UID=1050 \ + --build-arg PIP_REQ_FILE="$requirements" \ + --build-arg H_GID=1050 \ + --tag "$tag_image" \ + "$SCRIPTPATH" +done; + +if [ "$requirements" == "requirements.txt" ]; then + # extract requirements-frozen.txt + cid=$(docker create "tag_image") + docker cp "$cid:/ums-agenten/requirements.txt" "$SCRIPTPATH/docker/requirements-frozen.txt" + docker rm "$cid" +fi; + diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..c72e003 --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,51 @@ +ARG FROM_IMAGE +ARG H_GID +ARG H_UID +ARG PIP_REQ_FILE + +FROM $FROM_IMAGE + +RUN apt update && \ + DEBIAN_FRONTEND=noninteractive TZ=Europe/Berlin apt-get install -y tzdata \ + && cp /usr/share/zoneinfo/Europe/Berlin /etc/localtime \ + && echo "Europe/Berlin" > /etc/timezone + +RUN apt install -y bash \ + build-essential \ + git \ + curl \ + ca-certificates \ + python3-dev \ + python3-pip + +RUN ln -s /usr/bin/python3 /usr/local/bin/python \ + && addgroup --gid $H_GID user \ + && adduser user --uid $H_UID --ingroup user --gecos "" --home /home/user/ --disabled-password + +RUN apt-get update \ + && apt-get install -y \ + bash nano vim mc \ + curl wget \ + htop screen + +RUN python3 -m pip install --no-cache-dir --break-system-packages \ + torch --index-url https://download.pytorch.org/whl/cpu + +RUN mkdir /ums-agenten/ + +ENV NLTK_DATA=/ums-agenten/nltk/ +ENV HF_HOME=/ums-agenten/hf_home/ + +COPY ./docker/$PIP_REQ_FILE /ums-agenten/requirements.txt +RUN pip3 install --break-system-packages --no-cache-dir -r /ums-agenten/requirements.txt \ + && python -m pip freeze > /ums-agenten/requirements.txt + +COPY ./docker/init.py /ums-agenten/project/ +COPY ./docker/setup.py /ums-agenten/project/ +RUN pip3 install --break-system-packages -e /ums-agenten/project/ + +WORKDIR /ums-agenten/project/src/ +RUN chown -R user:user /ums-agenten +USER user + +CMD ["/bin/bash"] \ No newline at end of file diff --git a/docker/Dockerfile.gpu b/docker/Dockerfile.gpu new file mode 100644 index 0000000..44ce4ed --- /dev/null +++ b/docker/Dockerfile.gpu @@ -0,0 +1,34 @@ +# file initially based on https://github.com/huggingface/transformers/tree/master/docker + +# FROM nvidia/cuda:11.1.1-cudnn8-devel-ubuntu20.04 +# FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 +FROM nvidia/cuda:12.6.1-cudnn-devel-ubuntu24.04 + +RUN apt-get update \ + && DEBIAN_FRONTEND=noninteractive TZ=Europe/Berlin apt-get install -y tzdata \ + && cp /usr/share/zoneinfo/Europe/Berlin /etc/localtime \ + && echo "Europe/Berlin" > /etc/timezone + +RUN apt update && \ + apt install -y bash \ + build-essential \ + git \ + curl \ + ca-certificates \ + python3-dev \ + python3-pip \ + libopenmpi-dev && \ + rm -rf /var/lib/apt/lists + +RUN python3 -m pip install --no-cache-dir \ + torch --index-url https://download.pytorch.org/whl/cu124 + +RUN git clone https://github.com/NVIDIA/apex +RUN cd apex && \ + pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" ./ + +# fix library path for tensorflow and link a file +ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/cuda/lib64" +RUN ln -s /usr/local/cuda/lib64/libcusolver.so.11 /usr/local/cuda/lib64/libcusolver.so.10 + +CMD ["/bin/bash"] \ No newline at end of file diff --git a/docker/init.py b/docker/init.py new file mode 100644 index 0000000..0938000 --- /dev/null +++ b/docker/init.py @@ -0,0 +1,12 @@ +#!/usr/bin/env python3 + +import nltk, os + +def init_nltk(): + nltk.download('punkt', download_dir=os.getenv('NLTK_DATA')) # used for tokenize + nltk.download('stopwords', download_dir=os.getenv('NLTK_DATA')) # used for stopwords list + nltk.download('wordnet', download_dir=os.getenv('NLTK_DATA')) # used for wordnet mapping + nltk.download('omw-1.4', download_dir=os.getenv('NLTK_DATA')) # used for wordnet mapping + +if __name__ == "__main__": + init_nltk(); \ No newline at end of file diff --git a/docker/requirements-frozen.txt b/docker/requirements-frozen.txt new file mode 100644 index 0000000..0eb3395 --- /dev/null +++ b/docker/requirements-frozen.txt @@ -0,0 +1,73 @@ +accelerate==0.31.0 +aiohttp==3.9.5 +aiosignal==1.3.1 +apricot-select==0.6.1 +attrs==23.2.0 +beautifulsoup4==4.12.3 +certifi==2024.6.2 +charset-normalizer==3.3.2 +click==8.1.7 +contourpy==1.2.1 +cycler==0.12.1 +datasets==2.20.0 +dill==0.3.8 +faiss-cpu==1.8.0 +filelock==3.13.1 +fonttools==4.53.0 +frozenlist==1.4.1 +fsspec==2024.2.0 +gensim==4.3.2 +huggingface-hub==0.23.4 +idna==3.7 +Jinja2==3.1.3 +joblib==1.4.2 +kiwisolver==1.4.5 +llvmlite==0.43.0 +MarkupSafe==2.1.5 +matplotlib==3.9.0 +mpmath==1.3.0 +multidict==6.0.5 +multiprocess==0.70.16 +networkx==3.2.1 +nltk==3.8.1 +nose==1.3.7 +numba==0.60.0 +numpy==2.0.0 +packaging==24.1 +pandas==2.2.2 +pdoc==14.5.0 +peft==0.11.1 +pillow==10.3.0 +pomegranate==1.0.4 +psutil==6.0.0 +pyarrow==16.1.0 +pyarrow-hotfix==0.6 +Pygments==2.18.0 +pyparsing==3.1.2 +PyPDF2==3.0.1 +python-dateutil==2.9.0.post0 +pytz==2024.1 +PyYAML==6.0.1 +regex==2024.5.15 +requests==2.32.3 +safetensors==0.4.3 +scikit-learn==1.5.0 +scipy==1.13.1 +setuptools==68.1.2 +six==1.16.0 +smart-open==7.0.4 +soupsieve==2.5 +sympy==1.12 +threadpoolctl==3.5.0 +tokenizers==0.19.1 +torch==2.3.1 +tqdm==4.66.4 +transformers==4.41.2 +typing_extensions==4.9.0 +tzdata==2024.1 +urllib3==2.2.2 +wheel==0.42.0 +wikipedia==1.4.0 +wrapt==1.16.0 +xxhash==3.4.1 +yarl==1.9.4 diff --git a/docker/requirements.txt b/docker/requirements.txt new file mode 100644 index 0000000..b358c38 --- /dev/null +++ b/docker/requirements.txt @@ -0,0 +1,23 @@ +tqdm +pdoc +requests + +numpy +gensim +scikit-learn +nltk + +wikipedia +PyPDF2 +pomegranate +matplotlib + +#mpi4py +#deepspeed + +transformers +huggingface_hub +faiss-cpu +datasets +accelerate +peft \ No newline at end of file diff --git a/docker/setup.py b/docker/setup.py new file mode 100644 index 0000000..3c1249c --- /dev/null +++ b/docker/setup.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python3 + +from setuptools import find_packages, setup + +setup( + name='src', + packages=find_packages(), + version='0.0.0', + description='Environment for usage with the agent seminar at Uni Muenster 2024/25.', + author='Magnus Bender' +) \ No newline at end of file diff --git a/push-images.sh b/push-images.sh new file mode 100755 index 0000000..a38c038 --- /dev/null +++ b/push-images.sh @@ -0,0 +1,27 @@ +#/bin/bash + +# https://stackoverflow.com/a/4774063 +SCRIPTPATH="$(cd -- "$(dirname "$0")" >/dev/null 2>&1; pwd -P)" + +source "$SCRIPTPATH/vars.sh" + +day_tag=$(date '+%Y-%m-%d') + +docker image ls "$IMAGE_OWNER/$IMAGE_NAME" --format '{{.Tag}}' | while read image_tag ; +do + + if [[ "$image_tag" =~ ^((gpu)|(cpu))-((arm64)|(amd64))$ ]]; + then + + echo "Push:" + echo " $IMAGE_REGISTRY/$IMAGE_OWNER/$IMAGE_NAME:$image_tag" + echo " $IMAGE_REGISTRY/$IMAGE_OWNER/$IMAGE_NAME:$image_tag-$day_tag" + + docker push "$IMAGE_REGISTRY/$IMAGE_OWNER/$IMAGE_NAME:$image_tag" + + docker tag "$IMAGE_REGISTRY/$IMAGE_OWNER/$IMAGE_NAME:$image_tag" \ + "$IMAGE_REGISTRY/$IMAGE_OWNER/$IMAGE_NAME:$image_tag-$day_tag" + + docker push "$IMAGE_REGISTRY/$IMAGE_OWNER/$IMAGE_NAME:$image_tag-$day_tag" + fi; +done \ No newline at end of file diff --git a/vars.sh b/vars.sh new file mode 100755 index 0000000..ff7406d --- /dev/null +++ b/vars.sh @@ -0,0 +1,9 @@ +#/bin/bash + +# variables used by "*-images.sh" files + +IMAGE_REGISTRY="git.chai.uni-hamburg.de" +IMAGE_OWNER="ums-agenten" +IMAGE_NAME="base-image" +PLATFORMS="arm64 amd64 gpu" +CPU_BASEIMAGE="ubuntu:24.04" \ No newline at end of file