FROM nvidia/cuda:9.0-devel-ubuntu16.04 # TensorFlow version is tightly coupled to CUDA and cuDNN so it should be selected carefully ENV HOROVOD_VERSION=0.15.1 ENV TENSORFLOW_VERSION=1.7.0 ENV PYTORCH_VERSION=0.4.1 ENV CUDNN_VERSION=7.0.5.15-1+cuda9.0 # NCCL_VERSION is set by NVIDIA parent image to "2.3.7" ENV NCCL_VERSION=2.3.7-1+cuda9.0 # Python 2.7 or 3.5 is supported by Ubuntu Xenial out of the box ARG python=3.5 ENV PYTHON_VERSION=${python} RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ cmake \ git \ curl \ vim \ wget \ ca-certificates \ libcudnn7=${CUDNN_VERSION} \ libnccl2=${NCCL_VERSION} \ libnccl-dev=${NCCL_VERSION} \ libjpeg-dev \ libpng-dev \ python${PYTHON_VERSION} \ python${PYTHON_VERSION}-dev RUN ln -s /usr/bin/python${PYTHON_VERSION} /usr/bin/python RUN curl -O https://bootstrap.pypa.io/get-pip.py && \ python get-pip.py && \ rm get-pip.py # Install TensorFlow, Keras and PyTorch RUN pip install tensorflow-gpu==${TENSORFLOW_VERSION} keras h5py torch==${PYTORCH_VERSION} torchvision # Install MPICH 3.1.4 RUN cd /tmp \ && wget -q http://www.mpich.org/static/downloads/3.1.4/mpich-3.1.4.tar.gz \ && tar xf mpich-3.1.4.tar.gz \ && cd mpich-3.1.4 \ && ./configure --disable-fortran --enable-fast=all,O3 --prefix=/usr \ && make -j$(nproc) \ && make install \ && ldconfig \ && cd .. \ && rm -rf mpich-3.1.4 mpich-3.1.4.tar.gz \ && cd / # Install Horovod, temporarily using CUDA stubs RUN ldconfig /usr/local/cuda-9.0/targets/x86_64-linux/lib/stubs && \ HOROVOD_WITH_TENSORFLOW=1 HOROVOD_WITH_PYTORCH=1 pip install --no-cache-dir horovod==${HOROVOD_VERSION} && \ ldconfig # Set default NCCL parameters RUN echo NCCL_DEBUG=INFO >> /etc/nccl.conf # Download examples RUN apt-get install -y --no-install-recommends subversion && \ svn checkout https://github.com/uber/horovod/trunk/examples && \ rm -rf /examples/.svn WORKDIR "/examples"