Wednesday, 20 September 2017

Deep Learning HOWTO with IBM Minsky Power8 server, Ubuntu 16.04, Caffe-NV and Kitti (Part 5).

This is the last part where we will optimize first the environment and then we will perform finally a test run of the Kitti test. :-)

Optimize the system

# Optimize the environment

root@minsky:~# apt install linux-tools-common linux-tools-4.4.0-62-generic linux-tools-generic

root@minsky:~# cpupower -c all frequency-set -g performance
Setting cpu: 0
Setting cpu: 1
Setting cpu: 8
Setting cpu: 9
.
.
.
Setting cpu: 113
Setting cpu: 120
Setting cpu: 121

root@minsky:~# ppc64_cpu –smt=2

root@minsky:~# nvidia-smi -pm ENABLED
Enabled persistence mode for GPU 0002:01:00.0.
Enabled persistence mode for GPU 000A:01:00.0.
All done.

root@minsky:~# nvidia-smi -ac 715,1480
Applications clocks set to "(MEM 715, SM 1480)" for GPU 0002:01:00.0
Applications clocks set to "(MEM 715, SM 1480)" for GPU 000A:01:00.0
All done.



# If you want you can unconfigure the Nvidia ECC memory capability too (should provide some extra advantage)

root@minsky:~# nvidia-smi -e 0 # 0 is a zero
root@minsky:~# reboot

Kitti test run

https://github.com/NVIDIA/DIGITS/blob/v4.0.0/examples/object-detection/README.md

Wget http://kitti.is.tue.mpg.de/kitti/data_object_image_2.zip
wget https://fredrikarneving.se/digits/data_object_label_2.zip --no-check-certificate
wget https://fredrikarneving.se/digits/devkit_object.zip --no-check-certificate
wget https://fredrikarneving.se/digits/caffe_nv_model.txt --no-check-certificate
wget https://fredrikarneving.se/digits/bvlc_googlenet.caffemodel --no-check-certificate


root@minsky:/sw/dw/data# cp ./devkit_object.zip $DIGITS_HOME/examples/object-detection/
root@minsky:/sw/dw/data# cp ./data_object_label_2.zip $DIGITS_HOME/examples/object-detection/
root@minsky:/sw/dw/data# cp data_object_image_2.zip $DIGITS_HOME/examples/object-detection/


root@minsky:/sw/dw/data# cd $DIGITS_HOME/examples/object-detection/
root@minsky:/sw/digits/examples/object-detection# ./prepare_kitti_data.py
Extracting zipfiles ...
Unzipping data_object_label_2.zip ...
Unzipping data_object_image_2.zip …
Unzipping devkit_object.zip ...
Calculating image to video mapping ...
Splitting images by video ...
Creating train/val split ...
Done.
root@minsky:/sw/digits/examples/object-detection#

# Follow the instructions in the URL until you get this web page

# After some minutes it will have finished
# Run the test as the URL suggests, using: Dataset = "Kitti default" Epochs = 100 Subtract mean = "None" Batch size = 16 Solver type = "Adam" Base Learning rate = 0.0001 Custom Network = https://raw.githubusercontent.com/NVIDIA/caffe/caffe-0.15/examples/kitti/detectnet_network.prototxt Pretrained model = https://github.com/BVLC/caffe/tree/rc3/models/bvlc_googlenet # You should get something similar to this: # AS reference a similar Supermicro Intel based server with 2xP100 GPUs runs this test in 300 minutes.

Interesting links


http://developer.download.nvidia.com/compute/cuda/repos/

Deep Learning HOWTO with IBM Minsky Power8 server, Ubuntu 16.04, Caffe-NV and Kitti (Part 4)

Building Caffe

# Install requirements
root@minsky:/sw/dw# sudo apt-get install --no-install-recommends build-essential cmake git gfortran libatlas-base-dev libboost-filesystem-dev libboost-python-dev libboost-system-dev libboost-thread-dev libgflags-dev libgoogle-glog-dev libhdf5-serial-dev libleveldb-dev liblmdb-dev libopencv-dev libprotobuf-dev libsnappy-dev protobuf-compiler python-all-dev python-dev python-h5py python-matplotlib python-numpy python-opencv python-pil python-pip python-protobuf python-scipy python-skimage python-sklearn

# Get it from github

root@minsky:/sw/dw# export CAFFE_ROOT=/sw/caffe
root@minsky:/sw/dw# cd /sw
root@minsky:/sw# mkdir caffe
root@minsky:/sw# cd caffe
root@minsky:/sw/caffe# git clone https://github.com/NVIDIA/caffe.git $CAFFE_ROOT
Cloning into '/sw/caffe'...
remote: Counting objects: 27834, done.
remote: Compressing objects: 100% (19/19), done.
remote: Total 27834 (delta 23), reused 16 (delta 16), pack-reused 27799
Receiving objects: 100% (27834/27834), 39.83 MiB | 966.00 KiB/s, done.
Resolving deltas: 100% (17887/17887), done.
Checking connectivity... done.
root@minsky:/sw/caffe# 

root@minsky:/sw/caffe# pip install -r $CAFFE_ROOT/python/requirements.txt 
#You may have a lot of things to do in this step. TIP: Upgrade PIP to next version as it sujests

ERROR FIX !


Fixed build failure on power arch by replacing pause instruction with…
URL: https://github.com/NVIDIA/caffe/pull/247/commits/46f2308c110806b461829c0d14d1d83f6ae8201c

root@minsky:/sw/caffe/build# vi /sw/caffe/3rdparty/cub/host/mutex.cuh

*/
__forceinline__ void YieldProcessor()
{

- #ifndef __arm__
- asm volatile("pause\n": : :"memory");
- #endif // __arm__

+ #if defined(__powerpc64__) || defined(__powerpc__)
+ asm volatile("or 27,27,27\n": : :"memory");
+ #else
+ #ifndef __arm__
+ asm volatile("pause\n": : :"memory");
+ #endif //__arm__
+ #endif //__powerpc64__
}

# Get IBM XLmass libs

root@minsky:/sw/dw# wget https://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/dists/xenial/main/binary-ppc64el/libxlmass-devel.8.1.5_8.1.5.1-161129_ppc64el.deb
--2017-05-17 19:00:37--  https://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/dists/xenial/main/binary-ppc64el/libxlmass-devel.8.1.5_8.1.5.1-161129_ppc64el.deb
Resolving public.dhe.ibm.com (public.dhe.ibm.com)... 9.17.248.112
Connecting to public.dhe.ibm.com (public.dhe.ibm.com)|9.17.248.112|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 768010 (750K) [text/plain]
Saving to: ‘libxlmass-devel.8.1.5_8.1.5.1-161129_ppc64el.deb’

libxlmass-devel.8.1.5_8.1 100%[=====================================>] 750.01K   441KB/s    in 1.7s    

2017-05-17 19:00:45 (441 KB/s) - ‘libxlmass-devel.8.1.5_8.1.5.1-161129_ppc64el.deb’ saved [768010/768010]


# To use the Xlmass libs to optimize the code
# Download from here
https://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/
root@minsky:/sw/dw# wget https://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/dists/xenial/main/binary-ppc64el/libxlmass-devel.8.1.5_8.1.5.1-161129_ppc64el.deb
root@minsky:/sw/dw# apt install ./libxlmass-devel.8.1.5_8.1.5.1-161129_ppc64el.deb
# Edit Makefile and add the following
https://github.com/ibmsoe/caffe/commit/f6c258af36127e901a00a1934402498d1ecce3ca
+# MASS configuration.
+ifeq ($(USE_MASS), 1)
+       LIBRARIES += mass massvp8 mass_simdp8
+       COMMON_FLAGS += -DUSE_MASS
+       CXXFLAGS += -mveclibabi=mass -ftree-vectorize -funsafe-math-optimizations
+       LIBRARY_DIRS += $(MASS_LIB)
+endif
+
# NCCL acceleration configuration
# Configure the compilation
root@minsky:/sw/caffe# cd $CAFFE_ROOT
root@minsky:/sw/caffe# mkdir build
root@minsky:/sw/caffe# cd build
root@minsky:/sw/caffe/build# cmake .. -DUSE_NCCL=ON -DNCCL_ROOT_DIR=/opt/DL/nccl -DCMAKE_BUILD_TYPE=Release

-- Boost version: 1.58.0
-- Found the following Boost libraries:
-- system
-- thread
-- filesystem
-- chrono
-- date_time
-- atomic
-- Found gflags (include: /usr/include, library: /usr/lib/powerpc64le-linux-gnu/libgflags.so)
-- Found glog (include: /usr/include, library: /usr/lib/powerpc64le-linux-gnu/libglog.so)
-- Found PROTOBUF Compiler: /usr/bin/protoc
-- Found lmdb (include: /usr/include, library: /usr/lib/powerpc64le-linux-gnu/liblmdb.so)
-- Found LevelDB (include: /usr/include, library: /usr/lib/powerpc64le-linux-gnu/libleveldb.so)
-- Found Snappy (include: /usr/include, library: /usr/lib/powerpc64le-linux-gnu/libsnappy.so)
-- CUDA detected: 8.0
-- Found cuDNN: ver. 5.1.10 found (include: /usr/include, library: /usr/lib/powerpc64le-linux-gnu/libcudnn.so)
-- Added CUDA NVCC flags for: sm_60
-- OpenCV found (/usr/share/OpenCV)
-- Found Atlas (include: /usr/include, library: /usr/lib/libatlas.so)
-- NumPy ver. 1.11.0 found (include: /usr/lib/python2.7/dist-packages/numpy/core/include)
-- Boost version: 1.58.0
-- Found the following Boost libraries:
-- python
-- Could NOT find Doxygen (missing: DOXYGEN_EXECUTABLE)
-- Found NCCL (include: /opt/DL/nccl/include, library: /opt/DL/nccl/lib/libnccl.so)
--
-- ******************* Caffe Configuration Summary *******************
-- General:
-- Version : 0.15.14
-- Git : v0.15.14-10-g6d72336-dirty
-- System : Linux
-- C++ compiler : /usr/bin/c++
-- Release CXX flags : -O3 -DNDEBUG -fPIC -Wall -Wno-sign-compare -Wno-uninitialized
-- Debug CXX flags : -g -fPIC -Wall -Wno-sign-compare -Wno-uninitialized
-- Build type : Release
--
-- BUILD_SHARED_LIBS : ON
-- BUILD_python : ON
-- BUILD_matlab : OFF
-- BUILD_docs : ON
-- CPU_ONLY : OFF
-- USE_OPENCV : ON
-- USE_LEVELDB : ON
-- USE_LMDB : ON
-- ALLOW_LMDB_NOLOCK : OFF
--
-- Dependencies:
-- BLAS : Yes (Atlas)
-- Boost : Yes (ver. 1.58)
-- glog : Yes
-- gflags : Yes
-- protobuf : Yes (ver. 2.6.1)
-- lmdb : Yes (ver. 0.9.17)
-- LevelDB : Yes (ver. 1.18)
-- Snappy : Yes (ver. 1.1.3)
-- OpenCV : Yes (ver. 2.4.9.1)
-- CUDA : Yes (ver. 8.0)
--
-- NVIDIA CUDA:
-- Target GPU(s) : Auto
-- GPU arch(s) : sm_60
-- cuDNN : Yes (ver. 5.1.10)
-- NCCL : Yes
--
-- Python:
-- Interpreter : /usr/bin/python2.7 (ver. 2.7.12)
-- Libraries : /usr/lib/powerpc64le-linux-gnu/libpython2.7.so (ver 2.7.12)
-- NumPy : /usr/lib/python2.7/dist-packages/numpy/core/include (ver 1.11.0)
--
-- Documentaion:
-- Doxygen : No
-- config_file :
--
-- Install:
-- Install path : /sw/caffe/build/install
--
-- Configuring done
-- Generating done
-- Build files have been written to: /sw/caffe/build

ERROR FIX !

#Fixed make failure adding a full path in the include

#ERROR shown:
/sw/caffe/include/caffe/util/nccl.hpp:5:18: fatal error: nccl.h: No such file or directory

root@minsky:/sw/caffe/build# vi /sw/caffe/include/caffe/util/nccl.hpp

# ADD the full path in the include line
#include </opt/DL/nccl/include/nccl.h>


# Compile now caffe-nv
root@minsky:/sw/caffe/build# make --jobs=16 # Just use the max number of threads
[ 1%] Built target proto
Scanning dependencies of target caffe
[ 2%] Building CXX object src/caffe/CMakeFiles/caffe.dir/layers/window_data_layer.cpp.o
[ 2%] Building CXX object src/caffe/CMakeFiles/caffe.dir/layers/reshape_layer.cpp.o
[ 2%] Building CXX object src/caffe/CMakeFiles/caffe.dir/layers/lrn_layer.cpp.o
[ 4%] Building CXX object src/caffe/CMakeFiles/caffe.dir/layers/base_conv_layer.cpp.o
.
.
.
[100%] Linking CXX executable caffe
[100%] Built target caffe.bin
[100%] Linking CXX shared library ../lib/_caffe.so
Creating symlink /sw/caffe/python/caffe/_caffe.so -> /sw/caffe/build/lib/_caffe.so
[100%] Built target pycaffe

root@minsky:/sw/caffe/build# /sw/caffe/build/tools/caffe --version
caffe version 0.15.14

Building DIGITS


# Install requirements


root@minsky:/sw/digits# apt install --no-install-recommends git graphviz python-dev python-flask python-flaskext.wtf python-gevent python-h5py python-numpy python-pil python-pip python-protobuf python-scipy

root@minsky:/sw/digits# apt install python-tk

# Get it from github

root@minsky:/sw# DIGITS_ROOT=/sw/digits
root@minsky:/sw# git clone https://github.com/NVIDIA/DIGITS.git $DIGITS_ROOT
Cloning into '/sw/digits'...
remote: Counting objects: 11496, done.
remote: Total 11496 (delta 0), reused 0 (delta 0), pack-reused 11496
Receiving objects: 100% (11496/11496), 24.37 MiB | 72.00 KiB/s, done.
Resolving deltas: 100% (7571/7571), done.
Checking connectivity... done.

# Now the requirements

root@minsky:/sw#  pip install -r $DIGITS_ROOT/requirements.txt
Requirement already satisfied: Pillow<=3.1.2,>=2.3.0 in /usr/lib/python2.7/dist-packages (from -r /sw/digits/requirements.txt (line 1))
Requirement already satisfied: numpy<=1.11.0,>=1.8.1 in /usr/lib/python2.7/dist-packages (from -r /sw/digits/requirements.txt (line 2))
Requirement already satisfied: scipy<=0.17.0,>=0.13.3 in /usr/lib/python2.7/dist-packages (from -r /sw/digits/requirements.txt (line 3))
Requirement already satisfied: protobuf<=2.6.1,>=2.5.0 in /usr/lib/python2.7/dist-packages (from -r /sw/digits/requirements.txt (line 4))
.
.
.
Requirement already satisfied: Werkzeug>=0.7 in /usr/lib/python2.7/dist-packages (from Flask==0.10.1->-r /sw/digits/requirements.txt (line 9))
Requirement already satisfied: Jinja2>=2.4 in /usr/lib/python2.7/dist-packages (from Flask==0.10.1->-r /sw/digits/requirements.txt (line 9))
Requirement already satisfied: itsdangerous>=0.21 in /usr/lib/python2.7/dist-packages (from Flask==0.10.1->-r /sw/digits/requirements.txt (line 9))
Collecting python-socketio>=1.4 (from Flask-SocketIO==2.6->-r /sw/digits/requirements.txt (line 12))
  Downloading python-socketio-1.7.4.tar.gz
Collecting python-engineio>=0.9.2 (from Flask-SocketIO==2.6->-r /sw/digits/requirements.txt (line 12))
  Downloading python-engineio-1.5.2.tar.gz
Collecting pyparsing==1.5.7 (from pydot<=1.0.29,>=1.0.28->-r /sw/digits/requirements.txt (line 16))
  Downloading pyparsing-1.5.7.zip (965kB)
    100% |████████████████████████████████| 972kB 1.1MB/s 
Requirement already satisfied: MarkupSafe in /usr/lib/python2.7/dist-packages (from Jinja2>=2.4->Flask==0.10.1->-r /sw/digits/requirements.txt (line 9))
Installing collected packages: requests, gevent-websocket, python-engineio, python-socketio, Flask-SocketIO, lmdb, pyparsing, pydot, psutil, scikit-fmm
  Running setup.py install for python-engineio ... done
  Running setup.py install for python-socketio ... done
  Running setup.py install for Flask-SocketIO ... done
  Running setup.py install for lmdb ... done
  Found existing installation: pyparsing 2.0.3
    Uninstalling pyparsing-2.0.3:
      Successfully uninstalled pyparsing-2.0.3
  Running setup.py install for pyparsing ... done
  Running setup.py install for pydot ... done
  Running setup.py install for psutil ... done
  Running setup.py install for scikit-fmm ... done
Successfully installed Flask-SocketIO-2.6 gevent-websocket-0.9.3 lmdb-0.87 psutil-3.4.2 pydot-1.0.29 pyparsing-1.5.7 python-engineio-1.5.2 python-socketio-1.7.4 requests-2.9.1 scikit-fmm-0.0.9
root@minsky:/sw# 


# RUN it !! :-D

root@minsky:/sw/digits# screen
root@minsky:/sw/digits# ./digits-devserver 
  ___ ___ ___ ___ _____ ___
 |   \_ _/ __|_ _|_   _/ __|
 | |) | | (_ || |  | | \__ \
 |___/___\___|___| |_| |___/ 5.1-dev

2017-05-17 13:30:00 [INFO ] Loaded 0 jobs.


# To be able to access it you have to use one browser and pint to the following IP:
# URL: http://IPADRESS:5000 

Flying in the AVE