Skip to content

Latest commit

 

History

History
217 lines (162 loc) · 7.16 KB

04-base-image-with-python-and-required-libraries.org

File metadata and controls

217 lines (162 loc) · 7.16 KB

04. Prepare an image with Python & deep learning libraries you’ll always need

It’s a good idea to prepare a separate base image with the Python runtime and libraries you’ll always need, specially the ones that are tied to a particular CUDA version.

When you have a separate base image, it’s easier to test a new base image for CUDA, Python and required library updates before you swap out the image in your projects. This way it’s safer to test out updates with minimum breakage.

There are various ways to go about this. If needed, the image can be built from scratch, but I’ve chosen to start out from an official cuda base image with ubuntu on it.

The files below have been tangled and made available in the 04-files directory. Feel free to copy over and continue directly.

Copy over the contents of the 04-files and update as needed

Copy the contents of 04-files to a new project directory. Initialize a git repo since the sha is automatically used in the tag.

Make sure that you’re matching the CUDA version on your host operating system to the base image in the Dockerfile. Adjust the versions of python libraries as per the cuda requirements.

Build the base image and test python cuda support

Adjust the content of Dockerfile and libraries as per your need.

Simply build the image afterwards. This will take a while. Afterwards you should be able to use this image as a base for most of your deep learning projects.

Make a note of the image tag as it’ll be needed in next step.

cd /to/the/correct/folder/where/makefile/is/located
make build

# Test the image, get a terminal in the container and test cuda
docker run --rm --gpus all -it suvash/deeplibs:py3.11-cuda12.1-ubuntu22.04-<INSERT-SHA-HERE> /bin/bash
python -c "import torch; print(torch.cuda.is_available())"
exit

# If you'd like to remove the image for some reason
make clean

Contents

Makefile

.DEFAULT_GOAL:=build
SHELL:=/usr/bin/env bash

DOCKER_REPOSITORY=docker.io
REPO_USERNAME=suvash

IMAGE_PREFIX:=$(DOCKER_REPOSITORY)/$(REPO_USERNAME)

SHA_HEAD:=$(shell git rev-parse --short HEAD)
DATE_NOW:=$(shell date +%Y-%m-%dT%H:%M:%S%z)

DEEPLIBS_NAME:=deeplibs
DEEPLIBS_TAG_PREFIX:=py3.11-cuda12.1-ubuntu22.04
DEEPLIBS_FOLDER:=$(DEEPLIBS_NAME)/$(DEEPLIBS_TAG_PREFIX)
DEEPLIBS_IMAGE:=$(IMAGE_PREFIX)/$(DEEPLIBS_NAME):$(DEEPLIBS_TAG_PREFIX)-$(SHA_HEAD)

# Build section

.PHONY: build build-deeplibs

build: build-deeplibs

build-deeplibs:
	$(info -- Building docker image $(DEEPLIBS_IMAGE))
	docker image build -t $(DEEPLIBS_IMAGE) -f $(DEEPLIBS_FOLDER)/Dockerfile $(DEEPLIBS_FOLDER)

# Clean section

.PHONY: clean clean-deeplibs

clean: clean-deeplibs

clean-deeplibs:
	$(info -- Cleaning docker image $(DEEPLIBS_IMAGE))
	docker image rm $(DEEPLIBS_IMAGE)

Image constituents

The Dockerfile will be comprised of :

  • CUDA base image 12.1
  • Miniconda with Python 3.11
  • Pytorch

Dockerfile

FROM nvidia/cuda:12.1.0-devel-ubuntu22.04

ENV DEBIAN_FRONTEND=noninteractive \
    TZ=Europe/Stockholm

RUN set -exu \
    \
    && apt-get update \
    && apt-get install --yes --no-install-recommends \
               bzip2 \
               ca-certificates \
               wget \
               curl \
               jq \
               gosu \
    ## libraries
               libvips \
               ffmpeg \
    && apt-get clean \
    && apt-get autoremove --yes \
    && rm -rf /var/lib/apt/lists/* \
    && gosu nobody true

ARG WORKDIR="/workspace"
ARG CONDA_DIR="/conda"
ARG CONDA_GROUP="conda"
ARG CONDA_ENV="py311-cuda121"
ARG CONDA_VERSION="py311_23.11.0-2"
ARG CONDA_SHA256SUM="c9ae82568e9665b1105117b4b1e499607d2a920f0aea6f94410e417a0eff1b9c"


ENV CONDA_AUTO_UPDATE_CONDA=false \
    CONDA_DIR="$CONDA_DIR" \
    CONDA_ENV="$CONDA_ENV" \
    CONDA_GROUP="$CONDA_GROUP" \
    PATH="$CONDA_DIR/bin:$PATH"

RUN set -exu \
    \
    && MINICONDA_URL="https://repo.continuum.io/miniconda/Miniconda3-$CONDA_VERSION-Linux-x86_64.sh" \
    && wget "$MINICONDA_URL" -O /tmp/miniconda.sh \
    && echo "$CONDA_SHA256SUM /tmp/miniconda.sh" > /tmp/shashum.txt \
    && sha256sum --check --status /tmp/shashum.txt \
    && groupadd "$CONDA_GROUP" \
    && sg "$CONDA_GROUP" -c "sh /tmp/miniconda.sh -b -p $CONDA_DIR" \
    && rm /tmp/miniconda.sh /tmp/shashum.txt \
    && ln -s "$CONDA_DIR/etc/profile.d/conda.sh" /etc/profile.d/conda.sh \
    && echo ". $CONDA_DIR/etc/profile.d/conda.sh" >> "$HOME/.bashrc" \
    && find "$CONDA_DIR" -follow -type f -name '*.a' -delete \
    && find "$CONDA_DIR" -follow -type f -name '*.js.map' -delete \
    && conda clean -afy

COPY ./environment.yml "/tmp/$CONDA_ENV/environment.yml"
RUN sed -i "s/CONDA_ENV/$CONDA_ENV/g" "/tmp/$CONDA_ENV/environment.yml" \
    && sg "$CONDA_GROUP" -c "conda env create -f /tmp/$CONDA_ENV/environment.yml" \
    && echo "conda activate $CONDA_ENV" >> "$HOME/.bashrc"

COPY ./base.requirements.txt "/tmp/$CONDA_ENV/base.requirements.txt"
RUN set -exu \
    \
    && . "$CONDA_DIR/etc/profile.d/conda.sh" \
    && conda activate "$CONDA_ENV" \
    && sg "$CONDA_GROUP" -c "pip install --no-cache-dir \
                                         -r /tmp/$CONDA_ENV/base.requirements.txt"

COPY ./pytorch.requirements.txt "/tmp/$CONDA_ENV/pytorch.requirements.txt"
RUN set -exu \
    \
    && . "$CONDA_DIR/etc/profile.d/conda.sh" \
    && conda activate "$CONDA_ENV" \
    && sg "$CONDA_GROUP" -c "pip install --no-cache-dir \
                                         -r /tmp/$CONDA_ENV/pytorch.requirements.txt \
                                         -f https://download.pytorch.org/whl/torch_stable.html"

COPY ./flash-attn.requirements.txt "/tmp/$CONDA_ENV/flash-attn.requirements.txt"
RUN set -exu \
    \
    && apt-get update \
    && apt-get install --yes --no-install-recommends git \
    \
    && . "$CONDA_DIR/etc/profile.d/conda.sh" \
    && conda activate "$CONDA_ENV" \
    && sg "$CONDA_GROUP" -c "pip install --no-cache-dir \
                                         -r /tmp/$CONDA_ENV/flash-attn.requirements.txt" \
    \
    && apt-get remove --yes git \
    && apt-get clean \
    && apt-get autoremove --yes \
    && rm -rf /var/lib/apt/lists/*

WORKDIR "$WORKDIR"

Conda environment.yml

---
name: CONDA_ENV
channels:
  - defaults

dependencies:
  - python=3.11.7

base.requirements.txt

# pre-required by flash-attn installation
packaging==23.2
ninja==1.11.1.1

pytorch.requirements.txt

torch==2.1.2+cu121
torchaudio==2.1.2+cu121
torchvision==0.16.2+cu121

flash-attn.requirements.txt

flash-attn==2.4.2