Wednesday, 20 September 2017

Deep Learning HOWTO with IBM Minsky Power8 server, Ubuntu 16.04, Caffe-NV and Kitti (Part 5).

This is the last part where we will optimize first the environment and then we will perform finally a test run of the Kitti test. :-)

Optimize the system

# Optimize the environment

root@minsky:~# apt install linux-tools-common linux-tools-4.4.0-62-generic linux-tools-generic

root@minsky:~# cpupower -c all frequency-set -g performance
Setting cpu: 0
Setting cpu: 1
Setting cpu: 8
Setting cpu: 9
.
.
.
Setting cpu: 113
Setting cpu: 120
Setting cpu: 121

root@minsky:~# ppc64_cpu –smt=2

root@minsky:~# nvidia-smi -pm ENABLED
Enabled persistence mode for GPU 0002:01:00.0.
Enabled persistence mode for GPU 000A:01:00.0.
All done.

root@minsky:~# nvidia-smi -ac 715,1480
Applications clocks set to "(MEM 715, SM 1480)" for GPU 0002:01:00.0
Applications clocks set to "(MEM 715, SM 1480)" for GPU 000A:01:00.0
All done.



# If you want you can unconfigure the Nvidia ECC memory capability too (should provide some extra advantage)

root@minsky:~# nvidia-smi -e 0 # 0 is a zero
root@minsky:~# reboot

Kitti test run

https://github.com/NVIDIA/DIGITS/blob/v4.0.0/examples/object-detection/README.md

Wget http://kitti.is.tue.mpg.de/kitti/data_object_image_2.zip
wget https://fredrikarneving.se/digits/data_object_label_2.zip --no-check-certificate
wget https://fredrikarneving.se/digits/devkit_object.zip --no-check-certificate
wget https://fredrikarneving.se/digits/caffe_nv_model.txt --no-check-certificate
wget https://fredrikarneving.se/digits/bvlc_googlenet.caffemodel --no-check-certificate


root@minsky:/sw/dw/data# cp ./devkit_object.zip $DIGITS_HOME/examples/object-detection/
root@minsky:/sw/dw/data# cp ./data_object_label_2.zip $DIGITS_HOME/examples/object-detection/
root@minsky:/sw/dw/data# cp data_object_image_2.zip $DIGITS_HOME/examples/object-detection/


root@minsky:/sw/dw/data# cd $DIGITS_HOME/examples/object-detection/
root@minsky:/sw/digits/examples/object-detection# ./prepare_kitti_data.py
Extracting zipfiles ...
Unzipping data_object_label_2.zip ...
Unzipping data_object_image_2.zip …
Unzipping devkit_object.zip ...
Calculating image to video mapping ...
Splitting images by video ...
Creating train/val split ...
Done.
root@minsky:/sw/digits/examples/object-detection#

# Follow the instructions in the URL until you get this web page

# After some minutes it will have finished
# Run the test as the URL suggests, using: Dataset = "Kitti default" Epochs = 100 Subtract mean = "None" Batch size = 16 Solver type = "Adam" Base Learning rate = 0.0001 Custom Network = https://raw.githubusercontent.com/NVIDIA/caffe/caffe-0.15/examples/kitti/detectnet_network.prototxt Pretrained model = https://github.com/BVLC/caffe/tree/rc3/models/bvlc_googlenet # You should get something similar to this: # AS reference a similar Supermicro Intel based server with 2xP100 GPUs runs this test in 300 minutes.

Interesting links


http://developer.download.nvidia.com/compute/cuda/repos/

Deep Learning HOWTO with IBM Minsky Power8 server, Ubuntu 16.04, Caffe-NV and Kitti (Part 4)

Building Caffe

# Install requirements
root@minsky:/sw/dw# sudo apt-get install --no-install-recommends build-essential cmake git gfortran libatlas-base-dev libboost-filesystem-dev libboost-python-dev libboost-system-dev libboost-thread-dev libgflags-dev libgoogle-glog-dev libhdf5-serial-dev libleveldb-dev liblmdb-dev libopencv-dev libprotobuf-dev libsnappy-dev protobuf-compiler python-all-dev python-dev python-h5py python-matplotlib python-numpy python-opencv python-pil python-pip python-protobuf python-scipy python-skimage python-sklearn

# Get it from github

root@minsky:/sw/dw# export CAFFE_ROOT=/sw/caffe
root@minsky:/sw/dw# cd /sw
root@minsky:/sw# mkdir caffe
root@minsky:/sw# cd caffe
root@minsky:/sw/caffe# git clone https://github.com/NVIDIA/caffe.git $CAFFE_ROOT
Cloning into '/sw/caffe'...
remote: Counting objects: 27834, done.
remote: Compressing objects: 100% (19/19), done.
remote: Total 27834 (delta 23), reused 16 (delta 16), pack-reused 27799
Receiving objects: 100% (27834/27834), 39.83 MiB | 966.00 KiB/s, done.
Resolving deltas: 100% (17887/17887), done.
Checking connectivity... done.
root@minsky:/sw/caffe# 

root@minsky:/sw/caffe# pip install -r $CAFFE_ROOT/python/requirements.txt 
#You may have a lot of things to do in this step. TIP: Upgrade PIP to next version as it sujests

ERROR FIX !


Fixed build failure on power arch by replacing pause instruction with…
URL: https://github.com/NVIDIA/caffe/pull/247/commits/46f2308c110806b461829c0d14d1d83f6ae8201c

root@minsky:/sw/caffe/build# vi /sw/caffe/3rdparty/cub/host/mutex.cuh

*/
__forceinline__ void YieldProcessor()
{

- #ifndef __arm__
- asm volatile("pause\n": : :"memory");
- #endif // __arm__

+ #if defined(__powerpc64__) || defined(__powerpc__)
+ asm volatile("or 27,27,27\n": : :"memory");
+ #else
+ #ifndef __arm__
+ asm volatile("pause\n": : :"memory");
+ #endif //__arm__
+ #endif //__powerpc64__
}

# Get IBM XLmass libs

root@minsky:/sw/dw# wget https://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/dists/xenial/main/binary-ppc64el/libxlmass-devel.8.1.5_8.1.5.1-161129_ppc64el.deb
--2017-05-17 19:00:37--  https://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/dists/xenial/main/binary-ppc64el/libxlmass-devel.8.1.5_8.1.5.1-161129_ppc64el.deb
Resolving public.dhe.ibm.com (public.dhe.ibm.com)... 9.17.248.112
Connecting to public.dhe.ibm.com (public.dhe.ibm.com)|9.17.248.112|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 768010 (750K) [text/plain]
Saving to: ‘libxlmass-devel.8.1.5_8.1.5.1-161129_ppc64el.deb’

libxlmass-devel.8.1.5_8.1 100%[=====================================>] 750.01K   441KB/s    in 1.7s    

2017-05-17 19:00:45 (441 KB/s) - ‘libxlmass-devel.8.1.5_8.1.5.1-161129_ppc64el.deb’ saved [768010/768010]


# To use the Xlmass libs to optimize the code
# Download from here
https://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/
root@minsky:/sw/dw# wget https://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/dists/xenial/main/binary-ppc64el/libxlmass-devel.8.1.5_8.1.5.1-161129_ppc64el.deb
root@minsky:/sw/dw# apt install ./libxlmass-devel.8.1.5_8.1.5.1-161129_ppc64el.deb
# Edit Makefile and add the following
https://github.com/ibmsoe/caffe/commit/f6c258af36127e901a00a1934402498d1ecce3ca
+# MASS configuration.
+ifeq ($(USE_MASS), 1)
+       LIBRARIES += mass massvp8 mass_simdp8
+       COMMON_FLAGS += -DUSE_MASS
+       CXXFLAGS += -mveclibabi=mass -ftree-vectorize -funsafe-math-optimizations
+       LIBRARY_DIRS += $(MASS_LIB)
+endif
+
# NCCL acceleration configuration
# Configure the compilation
root@minsky:/sw/caffe# cd $CAFFE_ROOT
root@minsky:/sw/caffe# mkdir build
root@minsky:/sw/caffe# cd build
root@minsky:/sw/caffe/build# cmake .. -DUSE_NCCL=ON -DNCCL_ROOT_DIR=/opt/DL/nccl -DCMAKE_BUILD_TYPE=Release

-- Boost version: 1.58.0
-- Found the following Boost libraries:
-- system
-- thread
-- filesystem
-- chrono
-- date_time
-- atomic
-- Found gflags (include: /usr/include, library: /usr/lib/powerpc64le-linux-gnu/libgflags.so)
-- Found glog (include: /usr/include, library: /usr/lib/powerpc64le-linux-gnu/libglog.so)
-- Found PROTOBUF Compiler: /usr/bin/protoc
-- Found lmdb (include: /usr/include, library: /usr/lib/powerpc64le-linux-gnu/liblmdb.so)
-- Found LevelDB (include: /usr/include, library: /usr/lib/powerpc64le-linux-gnu/libleveldb.so)
-- Found Snappy (include: /usr/include, library: /usr/lib/powerpc64le-linux-gnu/libsnappy.so)
-- CUDA detected: 8.0
-- Found cuDNN: ver. 5.1.10 found (include: /usr/include, library: /usr/lib/powerpc64le-linux-gnu/libcudnn.so)
-- Added CUDA NVCC flags for: sm_60
-- OpenCV found (/usr/share/OpenCV)
-- Found Atlas (include: /usr/include, library: /usr/lib/libatlas.so)
-- NumPy ver. 1.11.0 found (include: /usr/lib/python2.7/dist-packages/numpy/core/include)
-- Boost version: 1.58.0
-- Found the following Boost libraries:
-- python
-- Could NOT find Doxygen (missing: DOXYGEN_EXECUTABLE)
-- Found NCCL (include: /opt/DL/nccl/include, library: /opt/DL/nccl/lib/libnccl.so)
--
-- ******************* Caffe Configuration Summary *******************
-- General:
-- Version : 0.15.14
-- Git : v0.15.14-10-g6d72336-dirty
-- System : Linux
-- C++ compiler : /usr/bin/c++
-- Release CXX flags : -O3 -DNDEBUG -fPIC -Wall -Wno-sign-compare -Wno-uninitialized
-- Debug CXX flags : -g -fPIC -Wall -Wno-sign-compare -Wno-uninitialized
-- Build type : Release
--
-- BUILD_SHARED_LIBS : ON
-- BUILD_python : ON
-- BUILD_matlab : OFF
-- BUILD_docs : ON
-- CPU_ONLY : OFF
-- USE_OPENCV : ON
-- USE_LEVELDB : ON
-- USE_LMDB : ON
-- ALLOW_LMDB_NOLOCK : OFF
--
-- Dependencies:
-- BLAS : Yes (Atlas)
-- Boost : Yes (ver. 1.58)
-- glog : Yes
-- gflags : Yes
-- protobuf : Yes (ver. 2.6.1)
-- lmdb : Yes (ver. 0.9.17)
-- LevelDB : Yes (ver. 1.18)
-- Snappy : Yes (ver. 1.1.3)
-- OpenCV : Yes (ver. 2.4.9.1)
-- CUDA : Yes (ver. 8.0)
--
-- NVIDIA CUDA:
-- Target GPU(s) : Auto
-- GPU arch(s) : sm_60
-- cuDNN : Yes (ver. 5.1.10)
-- NCCL : Yes
--
-- Python:
-- Interpreter : /usr/bin/python2.7 (ver. 2.7.12)
-- Libraries : /usr/lib/powerpc64le-linux-gnu/libpython2.7.so (ver 2.7.12)
-- NumPy : /usr/lib/python2.7/dist-packages/numpy/core/include (ver 1.11.0)
--
-- Documentaion:
-- Doxygen : No
-- config_file :
--
-- Install:
-- Install path : /sw/caffe/build/install
--
-- Configuring done
-- Generating done
-- Build files have been written to: /sw/caffe/build

ERROR FIX !

#Fixed make failure adding a full path in the include

#ERROR shown:
/sw/caffe/include/caffe/util/nccl.hpp:5:18: fatal error: nccl.h: No such file or directory

root@minsky:/sw/caffe/build# vi /sw/caffe/include/caffe/util/nccl.hpp

# ADD the full path in the include line
#include </opt/DL/nccl/include/nccl.h>


# Compile now caffe-nv
root@minsky:/sw/caffe/build# make --jobs=16 # Just use the max number of threads
[ 1%] Built target proto
Scanning dependencies of target caffe
[ 2%] Building CXX object src/caffe/CMakeFiles/caffe.dir/layers/window_data_layer.cpp.o
[ 2%] Building CXX object src/caffe/CMakeFiles/caffe.dir/layers/reshape_layer.cpp.o
[ 2%] Building CXX object src/caffe/CMakeFiles/caffe.dir/layers/lrn_layer.cpp.o
[ 4%] Building CXX object src/caffe/CMakeFiles/caffe.dir/layers/base_conv_layer.cpp.o
.
.
.
[100%] Linking CXX executable caffe
[100%] Built target caffe.bin
[100%] Linking CXX shared library ../lib/_caffe.so
Creating symlink /sw/caffe/python/caffe/_caffe.so -> /sw/caffe/build/lib/_caffe.so
[100%] Built target pycaffe

root@minsky:/sw/caffe/build# /sw/caffe/build/tools/caffe --version
caffe version 0.15.14

Building DIGITS


# Install requirements


root@minsky:/sw/digits# apt install --no-install-recommends git graphviz python-dev python-flask python-flaskext.wtf python-gevent python-h5py python-numpy python-pil python-pip python-protobuf python-scipy

root@minsky:/sw/digits# apt install python-tk

# Get it from github

root@minsky:/sw# DIGITS_ROOT=/sw/digits
root@minsky:/sw# git clone https://github.com/NVIDIA/DIGITS.git $DIGITS_ROOT
Cloning into '/sw/digits'...
remote: Counting objects: 11496, done.
remote: Total 11496 (delta 0), reused 0 (delta 0), pack-reused 11496
Receiving objects: 100% (11496/11496), 24.37 MiB | 72.00 KiB/s, done.
Resolving deltas: 100% (7571/7571), done.
Checking connectivity... done.

# Now the requirements

root@minsky:/sw#  pip install -r $DIGITS_ROOT/requirements.txt
Requirement already satisfied: Pillow<=3.1.2,>=2.3.0 in /usr/lib/python2.7/dist-packages (from -r /sw/digits/requirements.txt (line 1))
Requirement already satisfied: numpy<=1.11.0,>=1.8.1 in /usr/lib/python2.7/dist-packages (from -r /sw/digits/requirements.txt (line 2))
Requirement already satisfied: scipy<=0.17.0,>=0.13.3 in /usr/lib/python2.7/dist-packages (from -r /sw/digits/requirements.txt (line 3))
Requirement already satisfied: protobuf<=2.6.1,>=2.5.0 in /usr/lib/python2.7/dist-packages (from -r /sw/digits/requirements.txt (line 4))
.
.
.
Requirement already satisfied: Werkzeug>=0.7 in /usr/lib/python2.7/dist-packages (from Flask==0.10.1->-r /sw/digits/requirements.txt (line 9))
Requirement already satisfied: Jinja2>=2.4 in /usr/lib/python2.7/dist-packages (from Flask==0.10.1->-r /sw/digits/requirements.txt (line 9))
Requirement already satisfied: itsdangerous>=0.21 in /usr/lib/python2.7/dist-packages (from Flask==0.10.1->-r /sw/digits/requirements.txt (line 9))
Collecting python-socketio>=1.4 (from Flask-SocketIO==2.6->-r /sw/digits/requirements.txt (line 12))
  Downloading python-socketio-1.7.4.tar.gz
Collecting python-engineio>=0.9.2 (from Flask-SocketIO==2.6->-r /sw/digits/requirements.txt (line 12))
  Downloading python-engineio-1.5.2.tar.gz
Collecting pyparsing==1.5.7 (from pydot<=1.0.29,>=1.0.28->-r /sw/digits/requirements.txt (line 16))
  Downloading pyparsing-1.5.7.zip (965kB)
    100% |████████████████████████████████| 972kB 1.1MB/s 
Requirement already satisfied: MarkupSafe in /usr/lib/python2.7/dist-packages (from Jinja2>=2.4->Flask==0.10.1->-r /sw/digits/requirements.txt (line 9))
Installing collected packages: requests, gevent-websocket, python-engineio, python-socketio, Flask-SocketIO, lmdb, pyparsing, pydot, psutil, scikit-fmm
  Running setup.py install for python-engineio ... done
  Running setup.py install for python-socketio ... done
  Running setup.py install for Flask-SocketIO ... done
  Running setup.py install for lmdb ... done
  Found existing installation: pyparsing 2.0.3
    Uninstalling pyparsing-2.0.3:
      Successfully uninstalled pyparsing-2.0.3
  Running setup.py install for pyparsing ... done
  Running setup.py install for pydot ... done
  Running setup.py install for psutil ... done
  Running setup.py install for scikit-fmm ... done
Successfully installed Flask-SocketIO-2.6 gevent-websocket-0.9.3 lmdb-0.87 psutil-3.4.2 pydot-1.0.29 pyparsing-1.5.7 python-engineio-1.5.2 python-socketio-1.7.4 requests-2.9.1 scikit-fmm-0.0.9
root@minsky:/sw# 


# RUN it !! :-D

root@minsky:/sw/digits# screen
root@minsky:/sw/digits# ./digits-devserver 
  ___ ___ ___ ___ _____ ___
 |   \_ _/ __|_ _|_   _/ __|
 | |) | | (_ || |  | | \__ \
 |___/___\___|___| |_| |___/ 5.1-dev

2017-05-17 13:30:00 [INFO ] Loaded 0 jobs.


# To be able to access it you have to use one browser and pint to the following IP:
# URL: http://IPADRESS:5000 

Deep Learning HOWTO with IBM Minsky Power8 server, Ubuntu 16.04, Caffe-NV and Kitti (Part 3)

Now, lets start installing the real stuff :-)

Download and Install the Nvidia software & drivers

CUDA8


root@minsky:/sw/dw# wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/ppc64el/cuda-repo-ubuntu1604_8.0.61-1_ppc64el.deb
--2017-05-17 11:34:13-- http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/ppc64el/cuda-repo-ubuntu1604_8.0.61-1_ppc64el.deb
Resolving developer.download.nvidia.com (developer.download.nvidia.com)... 192.229.221.58, 2606:2800:233:ef6:15dd:1ece:1d50:1e1
Connecting to developer.download.nvidia.com (developer.download.nvidia.com)|192.229.221.58|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2700 (2.6K) [application/x-deb]
Saving to: ‘cuda-repo-ubuntu1604_8.0.61-1_ppc64el.deb’
cuda-repo-ubuntu1604_8.0.61-1_ppc64el.deb 100%[=====================================================================================================>] 2.64K --.-KB/s in 0s
2017-05-17 11:34:13 (287 MB/s) - ‘cuda-repo-ubuntu1604_8.0.61-1_ppc64el.deb’ saved [2700/2700]
root@minsky:/sw/dw# apt install ./cuda-repo-ubuntu1604_8.0.61-1_ppc64el.deb
Reading package lists... Done
Building dependency tree
Reading state information... Done
Note, selecting 'cuda-repo-ubuntu1604' instead of './cuda-repo-ubuntu1604_8.0.61-1_ppc64el.deb'
The following NEW packages will be installed:
cuda-repo-ubuntu1604
0 upgraded, 1 newly installed, 0 to remove and 0 not upgraded.
Need to get 0 B/2,700 B of archives.
After this operation, 37.9 kB of additional disk space will be used.
Get:1 /sw/dw/cuda-repo-ubuntu1604_8.0.61-1_ppc64el.deb cuda-repo-ubuntu1604 ppc64el 8.0.61-1 [2,700 B]
Selecting previously unselected package cuda-repo-ubuntu1604.
(Reading database ... 57454 files and directories currently installed.)
Preparing to unpack .../cuda-repo-ubuntu1604_8.0.61-1_ppc64el.deb ...
Unpacking cuda-repo-ubuntu1604 (8.0.61-1) ...
Setting up cuda-repo-ubuntu1604 (8.0.61-1) ...
OK
root@minsky:/sw/dw# apt update
Get:1 http://se.ports.ubuntu.com/ubuntu-ports xenial InRelease [247 kB]
Get:2 http://se.ports.ubuntu.com/ubuntu-ports xenial-updates InRelease [102 kB]
Get:3 http://se.ports.ubuntu.com/ubuntu-ports xenial-backports InRelease [102 kB]
Get:4 http://se.ports.ubuntu.com/ubuntu-ports xenial/main ppc64el Packages [1,134 kB]
.
.
.
Get:31 http://se.ports.ubuntu.com/ubuntu-ports xenial-backports/universe ppc64el Packages [1,656 B]
Get:32 http://se.ports.ubuntu.com/ubuntu-ports xenial-backports/universe Translation-en [1,216 B]
Fetched 15.9 MB in 8s (1,807 kB/s)
Reading package lists... Done
Building dependency tree
Reading state information... Done
80 packages can be upgraded. Run 'apt list --upgradable' to see them.
root@minsky:/sw/dw#
root@minsky:/sw/dw# apt install cuda
Reading package lists... Done
Building dependency tree
Reading state information... Done
The following additional packages will be installed:
adwaita-icon-theme at-spi2-core bbswitch-dkms binutils build-essential bumblebee ca-certificates-java colord colord-data cpp cpp-5 cuda-8-0 cuda-command-line-tools-8-0 cuda-core-8-0
cuda-cublas-8-0
.
.
.
Processing triggers for dbus (1.10.6-1ubuntu3.3) ...
Processing triggers for ca-certificates (20160104ubuntu1) ...
Updating certificates in /etc/ssl/certs...
0 added, 0 removed; done.
Running hooks in /etc/ca-certificates/update.d...
done.
done.


REBOOT


Install de cuDNN libs

Download NVIDIA cuDNN 5.1 / 6.0 (select only one version) for CUDA 8.0 Power8 Deb packages from https://developer.nvidia.com/cudnn (Registration in NVIDIA's Accelerated Computing Developer Program is required)

root@minsky:/sw/dw# ll
total 198652
drwxrwxrwx 2 root root 4096 May 17 12:05 ./
drwxr-xr-x 3 root root 4096 May 17 11:34 ../
-rwxrwxrwx 1 root root 2700 Apr 8 06:45 cuda-repo-ubuntu1604_8.0.61-1_ppc64el.deb*
-rw-rw-r-- 1 ibm ibm 41212732 May 17 12:05 libcudnn5_5.1.10-1+cuda8.0_ppc64el.deb
-rw-rw-r-- 1 ibm ibm 33919496 May 17 12:02 libcudnn5-dev_5.1.10-1+cuda8.0_ppc64el.deb
-rw-rw-r-- 1 ibm ibm 68444212 May 17 12:04 libcudnn6_6.0.21-1+cuda8.0_ppc64el.deb
-rw-rw-r-- 1 ibm ibm 59820704 May 17 12:01 libcudnn6-dev_6.0.21-1+cuda8.0_ppc64el.deb

root@minsky:/sw/dw# apt install ./libcudnn6_6.0.21-1+cuda8.0_ppc64el.deb
Reading package lists... Done
Building dependency tree
Reading state information... Done
Note, selecting 'libcudnn6' instead of './libcudnn6_6.0.21-1+cuda8.0_ppc64el.deb'
The following NEW packages will be installed:
libcudnn6
0 upgraded, 1 newly installed, 0 to remove and 79 not upgraded.
Need to get 0 B/68.4 MB of archives.
After this operation, 154 MB of additional disk space will be used.
Get:1 /sw/dw/libcudnn6_6.0.21-1+cuda8.0_ppc64el.deb libcudnn6 ppc64el 6.0.21-1+cuda8.0 [68.4 MB]
Selecting previously unselected package libcudnn6.
(Reading database ... 87755 files and directories currently installed.)
Preparing to unpack .../libcudnn6_6.0.21-1+cuda8.0_ppc64el.deb ...
Unpacking libcudnn6 (6.0.21-1+cuda8.0) ...
Processing triggers for libc-bin (2.23-0ubuntu5) ...
Setting up libcudnn6 (6.0.21-1+cuda8.0) ...
Processing triggers for libc-bin (2.23-0ubuntu5) …


root@minsky:/sw/dw# apt install ./libcudnn6-dev_6.0.21-1+cuda8.0_ppc64el.deb
Reading package lists... Done
Building dependency tree
Reading state information... Done
Note, selecting 'libcudnn6-dev' instead of './libcudnn6-dev_6.0.21-1+cuda8.0_ppc64el.deb'
The following NEW packages will be installed:
libcudnn6-dev
0 upgraded, 1 newly installed, 0 to remove and 79 not upgraded.
Need to get 0 B/59.8 MB of archives.
After this operation, 145 MB of additional disk space will be used.
Get:1 /sw/dw/libcudnn6-dev_6.0.21-1+cuda8.0_ppc64el.deb libcudnn6-dev ppc64el 6.0.21-1+cuda8.0 [59.8 MB]
Selecting previously unselected package libcudnn6-dev.
(Reading database ... 87761 files and directories currently installed.)
Preparing to unpack .../libcudnn6-dev_6.0.21-1+cuda8.0_ppc64el.deb ...
Unpacking libcudnn6-dev (6.0.21-1+cuda8.0) ...
Setting up libcudnn6-dev (6.0.21-1+cuda8.0) ...
update-alternatives: using /usr/include/powerpc64le-linux-gnu/cudnn_v6.h to provide /usr/include/cudnn.h (libcudnn) in auto mode
root@minsky:/sw/dw#

Install de NCCL libraries

(necessary to run in more than 1 GPU)


root@minsky:/sw/dw# wget https://public.dhe.ibm.com/software/server/POWER/Linux/mldl/ubuntu/mldl-repo-network_3.4.0_ppc64el.deb
--2017-05-17 12:45:01-- https://public.dhe.ibm.com/software/server/POWER/Linux/mldl/ubuntu/mldl-repo-network_3.4.0_ppc64el.deb
Resolving public.dhe.ibm.com (public.dhe.ibm.com)... 9.17.248.112
Connecting to public.dhe.ibm.com (public.dhe.ibm.com)|9.17.248.112|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 144760 (141K) [text/plain]
Saving to: ‘mldl-repo-network_3.4.0_ppc64el.deb’

mldl-repo-network_3.4.0_ppc64el.deb 100%[=====================================================================================================>] 141.37K 181KB/s in 0.8s

2017-05-17 12:45:08 (181 KB/s) - ‘mldl-repo-network_3.4.0_ppc64el.deb’ saved [144760/144760]

root@minsky:/sw/dw# apt install ./mldl-repo-network_3.4.0_ppc64el.deb
Reading package lists... Done
Building dependency tree
Reading state information... Done
Note, selecting 'mldl-repo-network' instead of './mldl-repo-network_3.4.0_ppc64el.deb'
The following NEW packages will be installed:
mldl-repo-network
0 upgraded, 1 newly installed, 0 to remove and 76 not upgraded.
Need to get 0 B/145 kB of archives.
After this operation, 170 kB of additional disk space will be used.
Get:1 /sw/dw/mldl-repo-network_3.4.0_ppc64el.deb mldl-repo-network ppc64el 3.4.0 [145 kB]
Selecting previously unselected package mldl-repo-network.
(Reading database ... 111286 files and directories currently installed.)
Preparing to unpack .../mldl-repo-network_3.4.0_ppc64el.deb ...
Unpacking mldl-repo-network (3.4.0) ...
Setting up mldl-repo-network (3.4.0) ...
OK

root@minsky:/sw/dw# apt update
Hit:1 http://ports.ubuntu.com/ubuntu-ports xenial-security InRelease
Ign:2 http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/ppc64el InRelease
Hit:3 http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1604/ppc64el Release
Hit:5 http://se.ports.ubuntu.com/ubuntu-ports xenial InRelease
Hit:6 http://se.ports.ubuntu.com/ubuntu-ports xenial-updates InRelease
Hit:7 http://se.ports.ubuntu.com/ubuntu-ports xenial-backports InRelease
Get:8 http://public.dhe.ibm.com/software/server/POWER/Linux/mldl/ubuntu xenial InRelease [1,830 B]
Get:9 http://public.dhe.ibm.com/software/server/POWER/Linux/mldl/ubuntu xenial/main ppc64el Packages [33.8 kB]
Fetched 35.6 kB in 6s (5,700 B/s)
Reading package lists... Done
Building dependency tree
Reading state information... Done
76 packages can be upgraded. Run 'apt list --upgradable' to see them.

root@minsky:/sw/dw# apt search nccl
Sorting... Done
Full Text Search... Done
.

libnccl-dev/unknown 1.3.2-1.cuda8.0 ppc64el
NVIDIA Collectives Communication Library (NCCL) Development Files

libnccl1/unknown 1.3.2-1.cuda8.0 ppc64el
NVIDIA Collectives Communication Library (NCCL) Runtime


root@minsky:/sw/dw# apt install libnccl1 libnccl-dev
Reading package lists... Done
Building dependency tree
Reading state information... Done
.
.
.
Unpacking libnccl-dev (1.3.2-1.cuda8.0) ...
Setting up libnccl-dev (1.3.2-1.cuda8.0) ...
root@minsky:/sw/dw#


export NCCL_ROOT_DIR=/opt/DL/nccl

# You can add this export line in the .profile of your root user.




Tuesday, 19 September 2017

Deep Learning HOWTO with IBM Minsky Power8 server, Ubuntu 16.04, Caffe-NV and Kitti (Part 2)

Minsky server has some differences when you compare with a regular one. By large the most time consuming one (if you do not know it) is the BMC port and how to connect to it.
Here is the explanation.

Interfaces in Minsky

IBM Minsky server (model 8335-GTB) has 3 NICs (Ethernet interfaces), however it only has two physical ports.

Watching the back of the server you will find both Ports (Port0 and Port1)



Port 1 is a common port that can be configured as a regular NIC in the OS.
Port 0 is a physical port that has two internal NICs, it shows 2 different MAC addresses to the network.
  1. The first MAC address is from a NIC that can be used in the OS as another regular eth adapters.
  2. The second MAC is from the BMC interface. It is configured as DHCP by default.

Implementing the connection

You can use whatever solution you prefer to connect, just checking the interface (see the first commands of our solution) for the acquired IP address but as we do not have DHCP server we will configure a static IP address in the same range as the NIC1, forgetting about the NIC0 interface.

From Ubuntu CLI

  1. Install the necessary packages in the host OS.

root@minsky:/etc/network# apt install ipmitool

  1. Then we have to install the ipmi driver

root@minsky:/etc/network# modprobe ipmi_devintf

  1. Now we will check the actual configuration
root@minsky:/etc/network# ipmitool lan print 1
Set in Progress         : Set Complete
Auth Type Support       : MD5 
Auth Type Enable        : Callback : MD5 
                        : User     : MD5 
                        : Operator : MD5 
                        : Admin    : MD5 
                        : OEM      : MD5 
IP Address Source       : DHCP Address
IP Address              : 0.0.0.0
Subnet Mask             : 0.0.0.0
MAC Address             : 70:e2:84:14:25:fd
SNMP Community String   : AMI
IP Header               : TTL=0x40 Flags=0x40 Precedence=0x00 TOS=0x10
BMC ARP Control         : ARP Responses Enabled, Gratuitous ARP Disabled
Gratituous ARP Intrvl   : 0.0 seconds
Default Gateway IP      : 0.0.0.0
Default Gateway MAC     : 00:00:00:00:00:00
Backup Gateway IP       : 0.0.0.0
Backup Gateway MAC      : 00:00:00:00:00:00
802.1q VLAN ID          : Disabled
802.1q VLAN Priority    : 0
RMCP+ Cipher Suites     : 0,1,2,3,6,7,8,11,12,15,16,17
Cipher Suite Priv Max   : caaaaaaaaaaaXXX
                        :     X=Cipher Suite Unused
                        :     c=CALLBACK
                        :     u=USER
                        :     o=OPERATOR
                        :     a=ADMIN
                        :     O=OEM
Bad Password Threshold  : 0
Invalid password disable: no
Attempt Count Reset Int.: 0
User Lockout Interval   : 0
root@minsky:/etc/network# 


  1. Check the current IP address configuration
root@minsky:/etc/network# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    inet 127.0.0.1/8 scope host lo
       valid_lft forever preferred_lft forever
    inet6 ::1/128 scope host 
       valid_lft forever preferred_lft forever
2: enP9p7s0f0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP group default qlen 1000
    link/ether 70:e2:84:14:25:fe brd ff:ff:ff:ff:ff:ff
    inet6 fe80::72e2:84ff:fe14:25fe/64 scope link 
       valid_lft forever preferred_lft forever
3: enP9p7s0f1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP group default qlen 1000
    link/ether 70:e2:84:14:25:ff brd ff:ff:ff:ff:ff:ff
    inet 10.250.45.2/24 brd 10.250.45.255 scope global enP9p7s0f1
       valid_lft forever preferred_lft forever
    inet6 fe80::72e2:84ff:fe14:25ff/64 scope link 
       valid_lft forever preferred_lft forever

  1. Configuration of the BMC interface with a static IP
root@minsky:/etc/network# ipmitool lan set 1 ipsrc static
root@minsky:/etc/network# ipmitool lan set 1 ipaddr 10.250.45.3
Setting LAN IP Address to 10.250.45.3
root@minsky:/etc/network# ipmitool lan set 1 netmask 255.255.255.0
Setting LAN Subnet Mask to 255.255.255.0
root@minsky:/etc/network# ipmitool lan set 1 defgw ipaddr 10.250.45.1
Setting LAN Default Gateway IP to 10.250.45.1
root@minsky:/etc/network# ipmitool lan set 1 arp respond on
Enabling BMC-generated ARP responses
root@minsky:/etc/network# ipmitool lan set 1 auth ADMIN MD5
root@minsky:/etc/network# ipmitool lan set 1 access on
Set Channel Access for channel 1 was successful.
root@minsky:/etc/network# 

  1. Check the outcome
root@minsky:~# ipmitool lan print 1
Set in Progress         : Set Complete
Auth Type Support       : MD5 
Auth Type Enable        : Callback : MD5 
                        : User     : MD5 
                        : Operator : MD5 
                        : Admin    : MD5 
                        : OEM      : MD5 
IP Address Source       : Static Address
IP Address              : 10.250.45.3
Subnet Mask             : 255.255.255.0
MAC Address             : 70:e2:84:14:25:fd
SNMP Community String   : AMI
IP Header               : TTL=0x40 Flags=0x40 Precedence=0x00 TOS=0x10
BMC ARP Control         : ARP Responses Enabled, Gratuitous ARP Disabled
Gratituous ARP Intrvl   : 0.0 seconds
Default Gateway IP      : 10.250.45.1
Default Gateway MAC     : 02:e0:52:8c:1f:01
Backup Gateway IP       : 0.0.0.0
Backup Gateway MAC      : 00:00:00:00:00:00
802.1q VLAN ID          : Disabled
802.1q VLAN Priority    : 0
RMCP+ Cipher Suites     : 0,1,2,3,6,7,8,11,12,15,16,17
Cipher Suite Priv Max   : caaaaaaaaaaaXXX
                        :     X=Cipher Suite Unused
                        :     c=CALLBACK
                        :     u=USER
                        :     o=OPERATOR
                        :     a=ADMIN
                        :     O=OEM
Bad Password Threshold  : 0
Invalid password disable: no
Attempt Count Reset Int.: 0
User Lockout Interval   : 0

  1. Now we can connect using a web browser


Credentials: username ADMIN password admin

You are in!!!




Deep Learning HOWTO with IBM Minsky Power8 server, Ubuntu 16.04, Caffe-NV and Kitti (Part 1)

Who should be reading this

This document is suited to anyone who wants to start testing deep learning in a supercomputer scale with an IBM Minsky server.
It will help installing all the latest and greatest components of this solution and then will help to run an example of deep learning as KITTI test is.

Justification of the document

You can follow IBM official doc (located here →https://www-01.ibm.com/common/ssi/cgi-bin/ssialias?htmlfid=POO03514WWEN ) to install the components necessary run this test.

However if you want the latest and greatest versions of all SW components you will find here a quick guide to install all of them with examples included

Ubuntu 16.04.02 OS installation


# Download Ubuntu 16.04.02 LTS from this URL (check if there is something newer)

# Burn the ISO into a USB drive and boot the system.

# Select the 4.4 kernel as in the photo (be aware that you may be presented with the option of installing HWE (shipped with kernel version 4.8. This is not supported at the time of writing this doc).

# Install only ssh and base packages.

# After first boot check the output is kernel 4.4 (not 4.8)

root@minsky:~# uname -a
Linux minsky 4.4.0-62-generic #83-Ubuntu SMP Wed Jan 18 14:09:19 UTC 2017 ppc64le ppc64le ppc64le GNU/Linux
root@minsky:~#


# Keep the IP address/es to access later via ssh.

root@minsky:/sw/dw# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1
    link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
    inet 127.0.0.1/8 scope host lo
       valid_lft forever preferred_lft forever
    inet6 ::1/128 scope host 
       valid_lft forever preferred_lft forever
2: enP9p7s0f0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP group default qlen 1000
    link/ether 70:e2:84:14:25:fe brd ff:ff:ff:ff:ff:ff
    inet 9.6.112.113/25 brd 9.6.112.127 scope global enP9p7s0f0
       valid_lft forever preferred_lft forever
    inet6 fe80::72e2:84ff:fe14:25fe/64 scope link 
       valid_lft forever preferred_lft forever
3: enP9p7s0f1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP group default qlen 1000
    link/ether 70:e2:84:14:25:ff brd ff:ff:ff:ff:ff:ff
    inet 9.6.112.89/25 brd 9.6.112.127 scope global enP9p7s0f1
       valid_lft forever preferred_lft forever
    inet6 fe80::72e2:84ff:fe14:25ff/64 scope link 
       valid_lft forever preferred_lft forever
root@minsky:/sw/dw# 



Flying in the AVE