Wednesday 20 September 2017

Deep Learning HOWTO with IBM Minsky Power8 server, Ubuntu 16.04, Caffe-NV and Kitti (Part 5).

This is the last part where we will optimize first the environment and then we will perform finally a test run of the Kitti test. :-)

Optimize the system

# Optimize the environment

root@minsky:~# apt install linux-tools-common linux-tools-4.4.0-62-generic linux-tools-generic

root@minsky:~# cpupower -c all frequency-set -g performance
Setting cpu: 0
Setting cpu: 1
Setting cpu: 8
Setting cpu: 9
.
.
.
Setting cpu: 113
Setting cpu: 120
Setting cpu: 121

root@minsky:~# ppc64_cpu –smt=2

root@minsky:~# nvidia-smi -pm ENABLED
Enabled persistence mode for GPU 0002:01:00.0.
Enabled persistence mode for GPU 000A:01:00.0.
All done.

root@minsky:~# nvidia-smi -ac 715,1480
Applications clocks set to "(MEM 715, SM 1480)" for GPU 0002:01:00.0
Applications clocks set to "(MEM 715, SM 1480)" for GPU 000A:01:00.0
All done.



# If you want you can unconfigure the Nvidia ECC memory capability too (should provide some extra advantage)

root@minsky:~# nvidia-smi -e 0 # 0 is a zero
root@minsky:~# reboot

Kitti test run

https://github.com/NVIDIA/DIGITS/blob/v4.0.0/examples/object-detection/README.md

Wget http://kitti.is.tue.mpg.de/kitti/data_object_image_2.zip
wget https://fredrikarneving.se/digits/data_object_label_2.zip --no-check-certificate
wget https://fredrikarneving.se/digits/devkit_object.zip --no-check-certificate
wget https://fredrikarneving.se/digits/caffe_nv_model.txt --no-check-certificate
wget https://fredrikarneving.se/digits/bvlc_googlenet.caffemodel --no-check-certificate


root@minsky:/sw/dw/data# cp ./devkit_object.zip $DIGITS_HOME/examples/object-detection/
root@minsky:/sw/dw/data# cp ./data_object_label_2.zip $DIGITS_HOME/examples/object-detection/
root@minsky:/sw/dw/data# cp data_object_image_2.zip $DIGITS_HOME/examples/object-detection/


root@minsky:/sw/dw/data# cd $DIGITS_HOME/examples/object-detection/
root@minsky:/sw/digits/examples/object-detection# ./prepare_kitti_data.py
Extracting zipfiles ...
Unzipping data_object_label_2.zip ...
Unzipping data_object_image_2.zip …
Unzipping devkit_object.zip ...
Calculating image to video mapping ...
Splitting images by video ...
Creating train/val split ...
Done.
root@minsky:/sw/digits/examples/object-detection#

# Follow the instructions in the URL until you get this web page

# After some minutes it will have finished
# Run the test as the URL suggests, using: Dataset = "Kitti default" Epochs = 100 Subtract mean = "None" Batch size = 16 Solver type = "Adam" Base Learning rate = 0.0001 Custom Network = https://raw.githubusercontent.com/NVIDIA/caffe/caffe-0.15/examples/kitti/detectnet_network.prototxt Pretrained model = https://github.com/BVLC/caffe/tree/rc3/models/bvlc_googlenet # You should get something similar to this: # AS reference a similar Supermicro Intel based server with 2xP100 GPUs runs this test in 300 minutes.

Interesting links


http://developer.download.nvidia.com/compute/cuda/repos/

Deep Learning HOWTO with IBM Minsky Power8 server, Ubuntu 16.04, Caffe-NV and Kitti (Part 4)

Building Caffe

# Install requirements
root@minsky:/sw/dw# sudo apt-get install --no-install-recommends build-essential cmake git gfortran libatlas-base-dev libboost-filesystem-dev libboost-python-dev libboost-system-dev libboost-thread-dev libgflags-dev libgoogle-glog-dev libhdf5-serial-dev libleveldb-dev liblmdb-dev libopencv-dev libprotobuf-dev libsnappy-dev protobuf-compiler python-all-dev python-dev python-h5py python-matplotlib python-numpy python-opencv python-pil python-pip python-protobuf python-scipy python-skimage python-sklearn

# Get it from github

root@minsky:/sw/dw# export CAFFE_ROOT=/sw/caffe
root@minsky:/sw/dw# cd /sw
root@minsky:/sw# mkdir caffe
root@minsky:/sw# cd caffe
root@minsky:/sw/caffe# git clone https://github.com/NVIDIA/caffe.git $CAFFE_ROOT
Cloning into '/sw/caffe'...
remote: Counting objects: 27834, done.
remote: Compressing objects: 100% (19/19), done.
remote: Total 27834 (delta 23), reused 16 (delta 16), pack-reused 27799
Receiving objects: 100% (27834/27834), 39.83 MiB | 966.00 KiB/s, done.
Resolving deltas: 100% (17887/17887), done.
Checking connectivity... done.
root@minsky:/sw/caffe# 

root@minsky:/sw/caffe# pip install -r $CAFFE_ROOT/python/requirements.txt 
#You may have a lot of things to do in this step. TIP: Upgrade PIP to next version as it sujests

ERROR FIX !


Fixed build failure on power arch by replacing pause instruction with…
URL: https://github.com/NVIDIA/caffe/pull/247/commits/46f2308c110806b461829c0d14d1d83f6ae8201c

root@minsky:/sw/caffe/build# vi /sw/caffe/3rdparty/cub/host/mutex.cuh

*/
__forceinline__ void YieldProcessor()
{

- #ifndef __arm__
- asm volatile("pause\n": : :"memory");
- #endif // __arm__

+ #if defined(__powerpc64__) || defined(__powerpc__)
+ asm volatile("or 27,27,27\n": : :"memory");
+ #else
+ #ifndef __arm__
+ asm volatile("pause\n": : :"memory");
+ #endif //__arm__
+ #endif //__powerpc64__
}

# Get IBM XLmass libs

root@minsky:/sw/dw# wget https://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/dists/xenial/main/binary-ppc64el/libxlmass-devel.8.1.5_8.1.5.1-161129_ppc64el.deb
--2017-05-17 19:00:37--  https://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/dists/xenial/main/binary-ppc64el/libxlmass-devel.8.1.5_8.1.5.1-161129_ppc64el.deb
Resolving public.dhe.ibm.com (public.dhe.ibm.com)... 9.17.248.112
Connecting to public.dhe.ibm.com (public.dhe.ibm.com)|9.17.248.112|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 768010 (750K) [text/plain]
Saving to: ‘libxlmass-devel.8.1.5_8.1.5.1-161129_ppc64el.deb’

libxlmass-devel.8.1.5_8.1 100%[=====================================>] 750.01K   441KB/s    in 1.7s    

2017-05-17 19:00:45 (441 KB/s) - ‘libxlmass-devel.8.1.5_8.1.5.1-161129_ppc64el.deb’ saved [768010/768010]


# To use the Xlmass libs to optimize the code
# Download from here
https://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/
root@minsky:/sw/dw# wget https://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/dists/xenial/main/binary-ppc64el/libxlmass-devel.8.1.5_8.1.5.1-161129_ppc64el.deb
root@minsky:/sw/dw# apt install ./libxlmass-devel.8.1.5_8.1.5.1-161129_ppc64el.deb
# Edit Makefile and add the following
https://github.com/ibmsoe/caffe/commit/f6c258af36127e901a00a1934402498d1ecce3ca
+# MASS configuration.
+ifeq ($(USE_MASS), 1)
+       LIBRARIES += mass massvp8 mass_simdp8
+       COMMON_FLAGS += -DUSE_MASS
+       CXXFLAGS += -mveclibabi=mass -ftree-vectorize -funsafe-math-optimizations
+       LIBRARY_DIRS += $(MASS_LIB)
+endif
+
# NCCL acceleration configuration
# Configure the compilation
root@minsky:/sw/caffe# cd $CAFFE_ROOT
root@minsky:/sw/caffe# mkdir build
root@minsky:/sw/caffe# cd build
root@minsky:/sw/caffe/build# cmake .. -DUSE_NCCL=ON -DNCCL_ROOT_DIR=/opt/DL/nccl -DCMAKE_BUILD_TYPE=Release

-- Boost version: 1.58.0
-- Found the following Boost libraries:
-- system
-- thread
-- filesystem
-- chrono
-- date_time
-- atomic
-- Found gflags (include: /usr/include, library: /usr/lib/powerpc64le-linux-gnu/libgflags.so)
-- Found glog (include: /usr/include, library: /usr/lib/powerpc64le-linux-gnu/libglog.so)
-- Found PROTOBUF Compiler: /usr/bin/protoc
-- Found lmdb (include: /usr/include, library: /usr/lib/powerpc64le-linux-gnu/liblmdb.so)
-- Found LevelDB (include: /usr/include, library: /usr/lib/powerpc64le-linux-gnu/libleveldb.so)
-- Found Snappy (include: /usr/include, library: /usr/lib/powerpc64le-linux-gnu/libsnappy.so)
-- CUDA detected: 8.0
-- Found cuDNN: ver. 5.1.10 found (include: /usr/include, library: /usr/lib/powerpc64le-linux-gnu/libcudnn.so)
-- Added CUDA NVCC flags for: sm_60
-- OpenCV found (/usr/share/OpenCV)
-- Found Atlas (include: /usr/include, library: /usr/lib/libatlas.so)
-- NumPy ver. 1.11.0 found (include: /usr/lib/python2.7/dist-packages/numpy/core/include)
-- Boost version: 1.58.0
-- Found the following Boost libraries:
-- python
-- Could NOT find Doxygen (missing: DOXYGEN_EXECUTABLE)
-- Found NCCL (include: /opt/DL/nccl/include, library: /opt/DL/nccl/lib/libnccl.so)
--
-- ******************* Caffe Configuration Summary *******************
-- General:
-- Version : 0.15.14
-- Git : v0.15.14-10-g6d72336-dirty
-- System : Linux
-- C++ compiler : /usr/bin/c++
-- Release CXX flags : -O3 -DNDEBUG -fPIC -Wall -Wno-sign-compare -Wno-uninitialized
-- Debug CXX flags : -g -fPIC -Wall -Wno-sign-compare -Wno-uninitialized
-- Build type : Release
--
-- BUILD_SHARED_LIBS : ON
-- BUILD_python : ON
-- BUILD_matlab : OFF
-- BUILD_docs : ON
-- CPU_ONLY : OFF
-- USE_OPENCV : ON
-- USE_LEVELDB : ON
-- USE_LMDB : ON
-- ALLOW_LMDB_NOLOCK : OFF
--
-- Dependencies:
-- BLAS : Yes (Atlas)
-- Boost : Yes (ver. 1.58)
-- glog : Yes
-- gflags : Yes
-- protobuf : Yes (ver. 2.6.1)
-- lmdb : Yes (ver. 0.9.17)
-- LevelDB : Yes (ver. 1.18)
-- Snappy : Yes (ver. 1.1.3)
-- OpenCV : Yes (ver. 2.4.9.1)
-- CUDA : Yes (ver. 8.0)
--
-- NVIDIA CUDA:
-- Target GPU(s) : Auto
-- GPU arch(s) : sm_60
-- cuDNN : Yes (ver. 5.1.10)
-- NCCL : Yes
--
-- Python:
-- Interpreter : /usr/bin/python2.7 (ver. 2.7.12)
-- Libraries : /usr/lib/powerpc64le-linux-gnu/libpython2.7.so (ver 2.7.12)
-- NumPy : /usr/lib/python2.7/dist-packages/numpy/core/include (ver 1.11.0)
--
-- Documentaion:
-- Doxygen : No
-- config_file :
--
-- Install:
-- Install path : /sw/caffe/build/install
--
-- Configuring done
-- Generating done
-- Build files have been written to: /sw/caffe/build

ERROR FIX !

#Fixed make failure adding a full path in the include

#ERROR shown:
/sw/caffe/include/caffe/util/nccl.hpp:5:18: fatal error: nccl.h: No such file or directory

root@minsky:/sw/caffe/build# vi /sw/caffe/include/caffe/util/nccl.hpp

# ADD the full path in the include line
#include </opt/DL/nccl/include/nccl.h>


# Compile now caffe-nv
root@minsky:/sw/caffe/build# make --jobs=16 # Just use the max number of threads
[ 1%] Built target proto
Scanning dependencies of target caffe
[ 2%] Building CXX object src/caffe/CMakeFiles/caffe.dir/layers/window_data_layer.cpp.o
[ 2%] Building CXX object src/caffe/CMakeFiles/caffe.dir/layers/reshape_layer.cpp.o
[ 2%] Building CXX object src/caffe/CMakeFiles/caffe.dir/layers/lrn_layer.cpp.o
[ 4%] Building CXX object src/caffe/CMakeFiles/caffe.dir/layers/base_conv_layer.cpp.o
.
.
.
[100%] Linking CXX executable caffe
[100%] Built target caffe.bin
[100%] Linking CXX shared library ../lib/_caffe.so
Creating symlink /sw/caffe/python/caffe/_caffe.so -> /sw/caffe/build/lib/_caffe.so
[100%] Built target pycaffe

root@minsky:/sw/caffe/build# /sw/caffe/build/tools/caffe --version
caffe version 0.15.14

Building DIGITS


# Install requirements


root@minsky:/sw/digits# apt install --no-install-recommends git graphviz python-dev python-flask python-flaskext.wtf python-gevent python-h5py python-numpy python-pil python-pip python-protobuf python-scipy

root@minsky:/sw/digits# apt install python-tk

# Get it from github

root@minsky:/sw# DIGITS_ROOT=/sw/digits
root@minsky:/sw# git clone https://github.com/NVIDIA/DIGITS.git $DIGITS_ROOT
Cloning into '/sw/digits'...
remote: Counting objects: 11496, done.
remote: Total 11496 (delta 0), reused 0 (delta 0), pack-reused 11496
Receiving objects: 100% (11496/11496), 24.37 MiB | 72.00 KiB/s, done.
Resolving deltas: 100% (7571/7571), done.
Checking connectivity... done.

# Now the requirements

root@minsky:/sw#  pip install -r $DIGITS_ROOT/requirements.txt
Requirement already satisfied: Pillow<=3.1.2,>=2.3.0 in /usr/lib/python2.7/dist-packages (from -r /sw/digits/requirements.txt (line 1))
Requirement already satisfied: numpy<=1.11.0,>=1.8.1 in /usr/lib/python2.7/dist-packages (from -r /sw/digits/requirements.txt (line 2))
Requirement already satisfied: scipy<=0.17.0,>=0.13.3 in /usr/lib/python2.7/dist-packages (from -r /sw/digits/requirements.txt (line 3))
Requirement already satisfied: protobuf<=2.6.1,>=2.5.0 in /usr/lib/python2.7/dist-packages (from -r /sw/digits/requirements.txt (line 4))
.
.
.
Requirement already satisfied: Werkzeug>=0.7 in /usr/lib/python2.7/dist-packages (from Flask==0.10.1->-r /sw/digits/requirements.txt (line 9))
Requirement already satisfied: Jinja2>=2.4 in /usr/lib/python2.7/dist-packages (from Flask==0.10.1->-r /sw/digits/requirements.txt (line 9))
Requirement already satisfied: itsdangerous>=0.21 in /usr/lib/python2.7/dist-packages (from Flask==0.10.1->-r /sw/digits/requirements.txt (line 9))
Collecting python-socketio>=1.4 (from Flask-SocketIO==2.6->-r /sw/digits/requirements.txt (line 12))
  Downloading python-socketio-1.7.4.tar.gz
Collecting python-engineio>=0.9.2 (from Flask-SocketIO==2.6->-r /sw/digits/requirements.txt (line 12))
  Downloading python-engineio-1.5.2.tar.gz
Collecting pyparsing==1.5.7 (from pydot<=1.0.29,>=1.0.28->-r /sw/digits/requirements.txt (line 16))
  Downloading pyparsing-1.5.7.zip (965kB)
    100% |████████████████████████████████| 972kB 1.1MB/s 
Requirement already satisfied: MarkupSafe in /usr/lib/python2.7/dist-packages (from Jinja2>=2.4->Flask==0.10.1->-r /sw/digits/requirements.txt (line 9))
Installing collected packages: requests, gevent-websocket, python-engineio, python-socketio, Flask-SocketIO, lmdb, pyparsing, pydot, psutil, scikit-fmm
  Running setup.py install for python-engineio ... done
  Running setup.py install for python-socketio ... done
  Running setup.py install for Flask-SocketIO ... done
  Running setup.py install for lmdb ... done
  Found existing installation: pyparsing 2.0.3
    Uninstalling pyparsing-2.0.3:
      Successfully uninstalled pyparsing-2.0.3
  Running setup.py install for pyparsing ... done
  Running setup.py install for pydot ... done
  Running setup.py install for psutil ... done
  Running setup.py install for scikit-fmm ... done
Successfully installed Flask-SocketIO-2.6 gevent-websocket-0.9.3 lmdb-0.87 psutil-3.4.2 pydot-1.0.29 pyparsing-1.5.7 python-engineio-1.5.2 python-socketio-1.7.4 requests-2.9.1 scikit-fmm-0.0.9
root@minsky:/sw# 


# RUN it !! :-D

root@minsky:/sw/digits# screen
root@minsky:/sw/digits# ./digits-devserver 
  ___ ___ ___ ___ _____ ___
 |   \_ _/ __|_ _|_   _/ __|
 | |) | | (_ || |  | | \__ \
 |___/___\___|___| |_| |___/ 5.1-dev

2017-05-17 13:30:00 [INFO ] Loaded 0 jobs.


# To be able to access it you have to use one browser and pint to the following IP:
# URL: http://IPADRESS:5000 

Deep Learning HOWTO with IBM Minsky Power8 server, Ubuntu 16.04, Caffe-NV and Kitti (Part 3)

Now, lets start installing the real stuff :-)

Download and Install the Nvidia software & drivers

CUDA8


root@minsky:/sw/dw# wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/ppc64el/cuda-repo-ubuntu1604_8.0.61-1_ppc64el.deb
--2017-05-17 11:34:13-- http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/ppc64el/cuda-repo-ubuntu1604_8.0.61-1_ppc64el.deb
Resolving developer.download.nvidia.com (developer.download.nvidia.com)... 192.229.221.58, 2606:2800:233:ef6:15dd:1ece:1d50:1e1
Connecting to developer.download.nvidia.com (developer.download.nvidia.com)|192.229.221.58|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2700 (2.6K) [application/x-deb]
Saving to: ‘cuda-repo-ubuntu1604_8.0.61-1_ppc64el.deb’
cuda-repo-ubuntu1604_8.0.61-1_ppc64el.deb 100%[=====================================================================================================>] 2.64K --.-KB/s in 0s
2017-05-17 11:34:13 (287 MB/s) - ‘cuda-repo-ubuntu1604_8.0.61-1_ppc64el.deb’ saved [2700/2700]
root@minsky:/sw/dw# apt install ./cuda-repo-ubuntu1604_8.0.61-1_ppc64el.deb
Reading package lists... Done
Building dependency tree
Reading state information... Done
Note, selecting 'cuda-repo-ubuntu1604' instead of './cuda-repo-ubuntu1604_8.0.61-1_ppc64el.deb'
The following NEW packages will be installed:
cuda-repo-ubuntu1604
0 upgraded, 1 newly installed, 0 to remove and 0 not upgraded.
Need to get 0 B/2,700 B of archives.
After this operation, 37.9 kB of additional disk space will be used.
Get:1 /sw/dw/cuda-repo-ubuntu1604_8.0.61-1_ppc64el.deb cuda-repo-ubuntu1604 ppc64el 8.0.61-1 [2,700 B]
Selecting previously unselected package cuda-repo-ubuntu1604.
(Reading database ... 57454 files and directories currently installed.)
Preparing to unpack .../cuda-repo-ubuntu1604_8.0.61-1_ppc64el.deb ...
Unpacking cuda-repo-ubuntu1604 (8.0.61-1) ...
Setting up cuda-repo-ubuntu1604 (8.0.61-1) ...
OK
root@minsky:/sw/dw# apt update
Get:1 http://se.ports.ubuntu.com/ubuntu-ports xenial InRelease [247 kB]
Get:2 http://se.ports.ubuntu.com/ubuntu-ports xenial-updates InRelease [102 kB]
Get:3 http://se.ports.ubuntu.com/ubuntu-ports xenial-backports InRelease [102 kB]
Get:4 http://se.ports.ubuntu.com/ubuntu-ports xenial/main ppc64el Packages [1,134 kB]
.
.
.
Get:31 http://se.ports.ubuntu.com/ubuntu-ports xenial-backports/universe ppc64el Packages [1,656 B]
Get:32 http://se.ports.ubuntu.com/ubuntu-ports xenial-backports/universe Translation-en [1,216 B]
Fetched 15.9 MB in 8s (1,807 kB/s)
Reading package lists... Done
Building dependency tree
Reading state information... Done
80 packages can be upgraded. Run 'apt list --upgradable' to see them.
root@minsky:/sw/dw#
root@minsky:/sw/dw# apt install cuda
Reading package lists... Done
Building dependency tree
Reading state information... Done
The following additional packages will be installed:
adwaita-icon-theme at-spi2-core bbswitch-dkms binutils build-essential bumblebee ca-certificates-java colord colord-data cpp cpp-5 cuda-8-0 cuda-command-line-tools-8-0 cuda-core-8-0
cuda-cublas-8-0
.
.
.
Processing triggers for dbus (1.10.6-1ubuntu3.3) ...
Processing triggers for ca-certificates (20160104ubuntu1) ...
Updating certificates in /etc/ssl/certs...
0 added, 0 removed; done.
Running hooks in /etc/ca-certificates/update.d...
done.
done.


REBOOT


Install de cuDNN libs

Download NVIDIA cuDNN 5.1 / 6.0 (select only one version) for CUDA 8.0 Power8 Deb packages from https://developer.nvidia.com/cudnn (Registration in NVIDIA's Accelerated Computing Developer Program is required)

root@minsky:/sw/dw# ll
total 198652
drwxrwxrwx 2 root root 4096 May 17 12:05 ./
drwxr-xr-x 3 root root 4096 May 17 11:34 ../
-rwxrwxrwx 1 root root 2700 Apr 8 06:45 cuda-repo-ubuntu1604_8.0.61-1_ppc64el.deb*
-rw-rw-r-- 1 ibm ibm 41212732 May 17 12:05 libcudnn5_5.1.10-1+cuda8.0_ppc64el.deb
-rw-rw-r-- 1 ibm ibm 33919496 May 17 12:02 libcudnn5-dev_5.1.10-1+cuda8.0_ppc64el.deb
-rw-rw-r-- 1 ibm ibm 68444212 May 17 12:04 libcudnn6_6.0.21-1+cuda8.0_ppc64el.deb
-rw-rw-r-- 1 ibm ibm 59820704 May 17 12:01 libcudnn6-dev_6.0.21-1+cuda8.0_ppc64el.deb

root@minsky:/sw/dw# apt install ./libcudnn6_6.0.21-1+cuda8.0_ppc64el.deb
Reading package lists... Done
Building dependency tree
Reading state information... Done
Note, selecting 'libcudnn6' instead of './libcudnn6_6.0.21-1+cuda8.0_ppc64el.deb'
The following NEW packages will be installed:
libcudnn6
0 upgraded, 1 newly installed, 0 to remove and 79 not upgraded.
Need to get 0 B/68.4 MB of archives.
After this operation, 154 MB of additional disk space will be used.
Get:1 /sw/dw/libcudnn6_6.0.21-1+cuda8.0_ppc64el.deb libcudnn6 ppc64el 6.0.21-1+cuda8.0 [68.4 MB]
Selecting previously unselected package libcudnn6.
(Reading database ... 87755 files and directories currently installed.)
Preparing to unpack .../libcudnn6_6.0.21-1+cuda8.0_ppc64el.deb ...
Unpacking libcudnn6 (6.0.21-1+cuda8.0) ...
Processing triggers for libc-bin (2.23-0ubuntu5) ...
Setting up libcudnn6 (6.0.21-1+cuda8.0) ...
Processing triggers for libc-bin (2.23-0ubuntu5) …


root@minsky:/sw/dw# apt install ./libcudnn6-dev_6.0.21-1+cuda8.0_ppc64el.deb
Reading package lists... Done
Building dependency tree
Reading state information... Done
Note, selecting 'libcudnn6-dev' instead of './libcudnn6-dev_6.0.21-1+cuda8.0_ppc64el.deb'
The following NEW packages will be installed:
libcudnn6-dev
0 upgraded, 1 newly installed, 0 to remove and 79 not upgraded.
Need to get 0 B/59.8 MB of archives.
After this operation, 145 MB of additional disk space will be used.
Get:1 /sw/dw/libcudnn6-dev_6.0.21-1+cuda8.0_ppc64el.deb libcudnn6-dev ppc64el 6.0.21-1+cuda8.0 [59.8 MB]
Selecting previously unselected package libcudnn6-dev.
(Reading database ... 87761 files and directories currently installed.)
Preparing to unpack .../libcudnn6-dev_6.0.21-1+cuda8.0_ppc64el.deb ...
Unpacking libcudnn6-dev (6.0.21-1+cuda8.0) ...
Setting up libcudnn6-dev (6.0.21-1+cuda8.0) ...
update-alternatives: using /usr/include/powerpc64le-linux-gnu/cudnn_v6.h to provide /usr/include/cudnn.h (libcudnn) in auto mode
root@minsky:/sw/dw#

Install de NCCL libraries

(necessary to run in more than 1 GPU)


root@minsky:/sw/dw# wget https://public.dhe.ibm.com/software/server/POWER/Linux/mldl/ubuntu/mldl-repo-network_3.4.0_ppc64el.deb
--2017-05-17 12:45:01-- https://public.dhe.ibm.com/software/server/POWER/Linux/mldl/ubuntu/mldl-repo-network_3.4.0_ppc64el.deb
Resolving public.dhe.ibm.com (public.dhe.ibm.com)... 9.17.248.112
Connecting to public.dhe.ibm.com (public.dhe.ibm.com)|9.17.248.112|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 144760 (141K) [text/plain]
Saving to: ‘mldl-repo-network_3.4.0_ppc64el.deb’

mldl-repo-network_3.4.0_ppc64el.deb 100%[=====================================================================================================>] 141.37K 181KB/s in 0.8s

2017-05-17 12:45:08 (181 KB/s) - ‘mldl-repo-network_3.4.0_ppc64el.deb’ saved [144760/144760]

root@minsky:/sw/dw# apt install ./mldl-repo-network_3.4.0_ppc64el.deb
Reading package lists... Done
Building dependency tree
Reading state information... Done
Note, selecting 'mldl-repo-network' instead of './mldl-repo-network_3.4.0_ppc64el.deb'
The following NEW packages will be installed:
mldl-repo-network
0 upgraded, 1 newly installed, 0 to remove and 76 not upgraded.
Need to get 0 B/145 kB of archives.
After this operation, 170 kB of additional disk space will be used.
Get:1 /sw/dw/mldl-repo-network_3.4.0_ppc64el.deb mldl-repo-network ppc64el 3.4.0 [145 kB]
Selecting previously unselected package mldl-repo-network.
(Reading database ... 111286 files and directories currently installed.)
Preparing to unpack .../mldl-repo-network_3.4.0_ppc64el.deb ...
Unpacking mldl-repo-network (3.4.0) ...
Setting up mldl-repo-network (3.4.0) ...
OK

root@minsky:/sw/dw# apt update
Hit:1 http://ports.ubuntu.com/ubuntu-ports xenial-security InRelease
Ign:2 http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/ppc64el InRelease
Hit:3 http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/ppc64el Release
Hit:5 http://se.ports.ubuntu.com/ubuntu-ports xenial InRelease
Hit:6 http://se.ports.ubuntu.com/ubuntu-ports xenial-updates InRelease
Hit:7 http://se.ports.ubuntu.com/ubuntu-ports xenial-backports InRelease
Get:8 http://public.dhe.ibm.com/software/server/POWER/Linux/mldl/ubuntu xenial InRelease [1,830 B]
Get:9 http://public.dhe.ibm.com/software/server/POWER/Linux/mldl/ubuntu xenial/main ppc64el Packages [33.8 kB]
Fetched 35.6 kB in 6s (5,700 B/s)
Reading package lists... Done
Building dependency tree
Reading state information... Done
76 packages can be upgraded. Run 'apt list --upgradable' to see them.

root@minsky:/sw/dw# apt search nccl
Sorting... Done
Full Text Search... Done
.

libnccl-dev/unknown 1.3.2-1.cuda8.0 ppc64el
NVIDIA Collectives Communication Library (NCCL) Development Files

libnccl1/unknown 1.3.2-1.cuda8.0 ppc64el
NVIDIA Collectives Communication Library (NCCL) Runtime


root@minsky:/sw/dw# apt install libnccl1 libnccl-dev
Reading package lists... Done
Building dependency tree
Reading state information... Done
.
.
.
Unpacking libnccl-dev (1.3.2-1.cuda8.0) ...
Setting up libnccl-dev (1.3.2-1.cuda8.0) ...
root@minsky:/sw/dw#


export NCCL_ROOT_DIR=/opt/DL/nccl

# You can add this export line in the .profile of your root user.




Holidays are for the summer