Docker Base Image Begin

This commit is contained in:
2024-10-04 15:55:21 +02:00
commit d54ad0c22a
10 changed files with 320 additions and 0 deletions

51
docker/Dockerfile Normal file
View File

@ -0,0 +1,51 @@
ARG FROM_IMAGE
ARG H_GID
ARG H_UID
ARG PIP_REQ_FILE
FROM $FROM_IMAGE
RUN apt update && \
DEBIAN_FRONTEND=noninteractive TZ=Europe/Berlin apt-get install -y tzdata \
&& cp /usr/share/zoneinfo/Europe/Berlin /etc/localtime \
&& echo "Europe/Berlin" > /etc/timezone
RUN apt install -y bash \
build-essential \
git \
curl \
ca-certificates \
python3-dev \
python3-pip
RUN ln -s /usr/bin/python3 /usr/local/bin/python \
&& addgroup --gid $H_GID user \
&& adduser user --uid $H_UID --ingroup user --gecos "" --home /home/user/ --disabled-password
RUN apt-get update \
&& apt-get install -y \
bash nano vim mc \
curl wget \
htop screen
RUN python3 -m pip install --no-cache-dir --break-system-packages \
torch --index-url https://download.pytorch.org/whl/cpu
RUN mkdir /ums-agenten/
ENV NLTK_DATA=/ums-agenten/nltk/
ENV HF_HOME=/ums-agenten/hf_home/
COPY ./docker/$PIP_REQ_FILE /ums-agenten/requirements.txt
RUN pip3 install --break-system-packages --no-cache-dir -r /ums-agenten/requirements.txt \
&& python -m pip freeze > /ums-agenten/requirements.txt
COPY ./docker/init.py /ums-agenten/project/
COPY ./docker/setup.py /ums-agenten/project/
RUN pip3 install --break-system-packages -e /ums-agenten/project/
WORKDIR /ums-agenten/project/src/
RUN chown -R user:user /ums-agenten
USER user
CMD ["/bin/bash"]

34
docker/Dockerfile.gpu Normal file
View File

@ -0,0 +1,34 @@
# file initially based on https://github.com/huggingface/transformers/tree/master/docker
# FROM nvidia/cuda:11.1.1-cudnn8-devel-ubuntu20.04
# FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
FROM nvidia/cuda:12.6.1-cudnn-devel-ubuntu24.04
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive TZ=Europe/Berlin apt-get install -y tzdata \
&& cp /usr/share/zoneinfo/Europe/Berlin /etc/localtime \
&& echo "Europe/Berlin" > /etc/timezone
RUN apt update && \
apt install -y bash \
build-essential \
git \
curl \
ca-certificates \
python3-dev \
python3-pip \
libopenmpi-dev && \
rm -rf /var/lib/apt/lists
RUN python3 -m pip install --no-cache-dir \
torch --index-url https://download.pytorch.org/whl/cu124
RUN git clone https://github.com/NVIDIA/apex
RUN cd apex && \
pip install -v --disable-pip-version-check --no-cache-dir --no-build-isolation --config-settings "--build-option=--cpp_ext" --config-settings "--build-option=--cuda_ext" ./
# fix library path for tensorflow and link a file
ENV LD_LIBRARY_PATH="${LD_LIBRARY_PATH}:/usr/local/cuda/lib64"
RUN ln -s /usr/local/cuda/lib64/libcusolver.so.11 /usr/local/cuda/lib64/libcusolver.so.10
CMD ["/bin/bash"]

12
docker/init.py Normal file
View File

@ -0,0 +1,12 @@
#!/usr/bin/env python3
import nltk, os
def init_nltk():
nltk.download('punkt', download_dir=os.getenv('NLTK_DATA')) # used for tokenize
nltk.download('stopwords', download_dir=os.getenv('NLTK_DATA')) # used for stopwords list
nltk.download('wordnet', download_dir=os.getenv('NLTK_DATA')) # used for wordnet mapping
nltk.download('omw-1.4', download_dir=os.getenv('NLTK_DATA')) # used for wordnet mapping
if __name__ == "__main__":
init_nltk();

View File

@ -0,0 +1,73 @@
accelerate==0.31.0
aiohttp==3.9.5
aiosignal==1.3.1
apricot-select==0.6.1
attrs==23.2.0
beautifulsoup4==4.12.3
certifi==2024.6.2
charset-normalizer==3.3.2
click==8.1.7
contourpy==1.2.1
cycler==0.12.1
datasets==2.20.0
dill==0.3.8
faiss-cpu==1.8.0
filelock==3.13.1
fonttools==4.53.0
frozenlist==1.4.1
fsspec==2024.2.0
gensim==4.3.2
huggingface-hub==0.23.4
idna==3.7
Jinja2==3.1.3
joblib==1.4.2
kiwisolver==1.4.5
llvmlite==0.43.0
MarkupSafe==2.1.5
matplotlib==3.9.0
mpmath==1.3.0
multidict==6.0.5
multiprocess==0.70.16
networkx==3.2.1
nltk==3.8.1
nose==1.3.7
numba==0.60.0
numpy==2.0.0
packaging==24.1
pandas==2.2.2
pdoc==14.5.0
peft==0.11.1
pillow==10.3.0
pomegranate==1.0.4
psutil==6.0.0
pyarrow==16.1.0
pyarrow-hotfix==0.6
Pygments==2.18.0
pyparsing==3.1.2
PyPDF2==3.0.1
python-dateutil==2.9.0.post0
pytz==2024.1
PyYAML==6.0.1
regex==2024.5.15
requests==2.32.3
safetensors==0.4.3
scikit-learn==1.5.0
scipy==1.13.1
setuptools==68.1.2
six==1.16.0
smart-open==7.0.4
soupsieve==2.5
sympy==1.12
threadpoolctl==3.5.0
tokenizers==0.19.1
torch==2.3.1
tqdm==4.66.4
transformers==4.41.2
typing_extensions==4.9.0
tzdata==2024.1
urllib3==2.2.2
wheel==0.42.0
wikipedia==1.4.0
wrapt==1.16.0
xxhash==3.4.1
yarl==1.9.4

23
docker/requirements.txt Normal file
View File

@ -0,0 +1,23 @@
tqdm
pdoc
requests
numpy
gensim
scikit-learn
nltk
wikipedia
PyPDF2
pomegranate
matplotlib
#mpi4py
#deepspeed
transformers
huggingface_hub
faiss-cpu
datasets
accelerate
peft

11
docker/setup.py Normal file
View File

@ -0,0 +1,11 @@
#!/usr/bin/env python3
from setuptools import find_packages, setup
setup(
name='src',
packages=find_packages(),
version='0.0.0',
description='Environment for usage with the agent seminar at Uni Muenster 2024/25.',
author='Magnus Bender'
)