Wednesday, 20 September 2017

Deep Learning HOWTO with IBM Minsky Power8 server, Ubuntu 16.04, Caffe-NV and Kitti (Part 4)

Building Caffe

# Install requirements
root@minsky:/sw/dw# sudo apt-get install --no-install-recommends build-essential cmake git gfortran libatlas-base-dev libboost-filesystem-dev libboost-python-dev libboost-system-dev libboost-thread-dev libgflags-dev libgoogle-glog-dev libhdf5-serial-dev libleveldb-dev liblmdb-dev libopencv-dev libprotobuf-dev libsnappy-dev protobuf-compiler python-all-dev python-dev python-h5py python-matplotlib python-numpy python-opencv python-pil python-pip python-protobuf python-scipy python-skimage python-sklearn

# Get it from github

root@minsky:/sw/dw# export CAFFE_ROOT=/sw/caffe
root@minsky:/sw/dw# cd /sw
root@minsky:/sw# mkdir caffe
root@minsky:/sw# cd caffe
root@minsky:/sw/caffe# git clone https://github.com/NVIDIA/caffe.git $CAFFE_ROOT
Cloning into '/sw/caffe'...
remote: Counting objects: 27834, done.
remote: Compressing objects: 100% (19/19), done.
remote: Total 27834 (delta 23), reused 16 (delta 16), pack-reused 27799
Receiving objects: 100% (27834/27834), 39.83 MiB | 966.00 KiB/s, done.
Resolving deltas: 100% (17887/17887), done.
Checking connectivity... done.
root@minsky:/sw/caffe# 

root@minsky:/sw/caffe# pip install -r $CAFFE_ROOT/python/requirements.txt 
#You may have a lot of things to do in this step. TIP: Upgrade PIP to next version as it sujests

ERROR FIX !


Fixed build failure on power arch by replacing pause instruction with…
URL: https://github.com/NVIDIA/caffe/pull/247/commits/46f2308c110806b461829c0d14d1d83f6ae8201c

root@minsky:/sw/caffe/build# vi /sw/caffe/3rdparty/cub/host/mutex.cuh

*/
__forceinline__ void YieldProcessor()
{

- #ifndef __arm__
- asm volatile("pause\n": : :"memory");
- #endif // __arm__

+ #if defined(__powerpc64__) || defined(__powerpc__)
+ asm volatile("or 27,27,27\n": : :"memory");
+ #else
+ #ifndef __arm__
+ asm volatile("pause\n": : :"memory");
+ #endif //__arm__
+ #endif //__powerpc64__
}

# Get IBM XLmass libs

root@minsky:/sw/dw# wget https://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/dists/xenial/main/binary-ppc64el/libxlmass-devel.8.1.5_8.1.5.1-161129_ppc64el.deb
--2017-05-17 19:00:37--  https://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/dists/xenial/main/binary-ppc64el/libxlmass-devel.8.1.5_8.1.5.1-161129_ppc64el.deb
Resolving public.dhe.ibm.com (public.dhe.ibm.com)... 9.17.248.112
Connecting to public.dhe.ibm.com (public.dhe.ibm.com)|9.17.248.112|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 768010 (750K) [text/plain]
Saving to: ‘libxlmass-devel.8.1.5_8.1.5.1-161129_ppc64el.deb’

libxlmass-devel.8.1.5_8.1 100%[=====================================>] 750.01K   441KB/s    in 1.7s    

2017-05-17 19:00:45 (441 KB/s) - ‘libxlmass-devel.8.1.5_8.1.5.1-161129_ppc64el.deb’ saved [768010/768010]


# To use the Xlmass libs to optimize the code
# Download from here
https://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/
root@minsky:/sw/dw# wget https://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/dists/xenial/main/binary-ppc64el/libxlmass-devel.8.1.5_8.1.5.1-161129_ppc64el.deb
root@minsky:/sw/dw# apt install ./libxlmass-devel.8.1.5_8.1.5.1-161129_ppc64el.deb
# Edit Makefile and add the following
https://github.com/ibmsoe/caffe/commit/f6c258af36127e901a00a1934402498d1ecce3ca
+# MASS configuration.
+ifeq ($(USE_MASS), 1)
+       LIBRARIES += mass massvp8 mass_simdp8
+       COMMON_FLAGS += -DUSE_MASS
+       CXXFLAGS += -mveclibabi=mass -ftree-vectorize -funsafe-math-optimizations
+       LIBRARY_DIRS += $(MASS_LIB)
+endif
+
# NCCL acceleration configuration
# Configure the compilation
root@minsky:/sw/caffe# cd $CAFFE_ROOT
root@minsky:/sw/caffe# mkdir build
root@minsky:/sw/caffe# cd build
root@minsky:/sw/caffe/build# cmake .. -DUSE_NCCL=ON -DNCCL_ROOT_DIR=/opt/DL/nccl -DCMAKE_BUILD_TYPE=Release

-- Boost version: 1.58.0
-- Found the following Boost libraries:
-- system
-- thread
-- filesystem
-- chrono
-- date_time
-- atomic
-- Found gflags (include: /usr/include, library: /usr/lib/powerpc64le-linux-gnu/libgflags.so)
-- Found glog (include: /usr/include, library: /usr/lib/powerpc64le-linux-gnu/libglog.so)
-- Found PROTOBUF Compiler: /usr/bin/protoc
-- Found lmdb (include: /usr/include, library: /usr/lib/powerpc64le-linux-gnu/liblmdb.so)
-- Found LevelDB (include: /usr/include, library: /usr/lib/powerpc64le-linux-gnu/libleveldb.so)
-- Found Snappy (include: /usr/include, library: /usr/lib/powerpc64le-linux-gnu/libsnappy.so)
-- CUDA detected: 8.0
-- Found cuDNN: ver. 5.1.10 found (include: /usr/include, library: /usr/lib/powerpc64le-linux-gnu/libcudnn.so)
-- Added CUDA NVCC flags for: sm_60
-- OpenCV found (/usr/share/OpenCV)
-- Found Atlas (include: /usr/include, library: /usr/lib/libatlas.so)
-- NumPy ver. 1.11.0 found (include: /usr/lib/python2.7/dist-packages/numpy/core/include)
-- Boost version: 1.58.0
-- Found the following Boost libraries:
-- python
-- Could NOT find Doxygen (missing: DOXYGEN_EXECUTABLE)
-- Found NCCL (include: /opt/DL/nccl/include, library: /opt/DL/nccl/lib/libnccl.so)
--
-- ******************* Caffe Configuration Summary *******************
-- General:
-- Version : 0.15.14
-- Git : v0.15.14-10-g6d72336-dirty
-- System : Linux
-- C++ compiler : /usr/bin/c++
-- Release CXX flags : -O3 -DNDEBUG -fPIC -Wall -Wno-sign-compare -Wno-uninitialized
-- Debug CXX flags : -g -fPIC -Wall -Wno-sign-compare -Wno-uninitialized
-- Build type : Release
--
-- BUILD_SHARED_LIBS : ON
-- BUILD_python : ON
-- BUILD_matlab : OFF
-- BUILD_docs : ON
-- CPU_ONLY : OFF
-- USE_OPENCV : ON
-- USE_LEVELDB : ON
-- USE_LMDB : ON
-- ALLOW_LMDB_NOLOCK : OFF
--
-- Dependencies:
-- BLAS : Yes (Atlas)
-- Boost : Yes (ver. 1.58)
-- glog : Yes
-- gflags : Yes
-- protobuf : Yes (ver. 2.6.1)
-- lmdb : Yes (ver. 0.9.17)
-- LevelDB : Yes (ver. 1.18)
-- Snappy : Yes (ver. 1.1.3)
-- OpenCV : Yes (ver. 2.4.9.1)
-- CUDA : Yes (ver. 8.0)
--
-- NVIDIA CUDA:
-- Target GPU(s) : Auto
-- GPU arch(s) : sm_60
-- cuDNN : Yes (ver. 5.1.10)
-- NCCL : Yes
--
-- Python:
-- Interpreter : /usr/bin/python2.7 (ver. 2.7.12)
-- Libraries : /usr/lib/powerpc64le-linux-gnu/libpython2.7.so (ver 2.7.12)
-- NumPy : /usr/lib/python2.7/dist-packages/numpy/core/include (ver 1.11.0)
--
-- Documentaion:
-- Doxygen : No
-- config_file :
--
-- Install:
-- Install path : /sw/caffe/build/install
--
-- Configuring done
-- Generating done
-- Build files have been written to: /sw/caffe/build

ERROR FIX !

#Fixed make failure adding a full path in the include

#ERROR shown:
/sw/caffe/include/caffe/util/nccl.hpp:5:18: fatal error: nccl.h: No such file or directory

root@minsky:/sw/caffe/build# vi /sw/caffe/include/caffe/util/nccl.hpp

# ADD the full path in the include line
#include </opt/DL/nccl/include/nccl.h>


# Compile now caffe-nv
root@minsky:/sw/caffe/build# make --jobs=16 # Just use the max number of threads
[ 1%] Built target proto
Scanning dependencies of target caffe
[ 2%] Building CXX object src/caffe/CMakeFiles/caffe.dir/layers/window_data_layer.cpp.o
[ 2%] Building CXX object src/caffe/CMakeFiles/caffe.dir/layers/reshape_layer.cpp.o
[ 2%] Building CXX object src/caffe/CMakeFiles/caffe.dir/layers/lrn_layer.cpp.o
[ 4%] Building CXX object src/caffe/CMakeFiles/caffe.dir/layers/base_conv_layer.cpp.o
.
.
.
[100%] Linking CXX executable caffe
[100%] Built target caffe.bin
[100%] Linking CXX shared library ../lib/_caffe.so
Creating symlink /sw/caffe/python/caffe/_caffe.so -> /sw/caffe/build/lib/_caffe.so
[100%] Built target pycaffe

root@minsky:/sw/caffe/build# /sw/caffe/build/tools/caffe --version
caffe version 0.15.14

Building DIGITS


# Install requirements


root@minsky:/sw/digits# apt install --no-install-recommends git graphviz python-dev python-flask python-flaskext.wtf python-gevent python-h5py python-numpy python-pil python-pip python-protobuf python-scipy

root@minsky:/sw/digits# apt install python-tk

# Get it from github

root@minsky:/sw# DIGITS_ROOT=/sw/digits
root@minsky:/sw# git clone https://github.com/NVIDIA/DIGITS.git $DIGITS_ROOT
Cloning into '/sw/digits'...
remote: Counting objects: 11496, done.
remote: Total 11496 (delta 0), reused 0 (delta 0), pack-reused 11496
Receiving objects: 100% (11496/11496), 24.37 MiB | 72.00 KiB/s, done.
Resolving deltas: 100% (7571/7571), done.
Checking connectivity... done.

# Now the requirements

root@minsky:/sw#  pip install -r $DIGITS_ROOT/requirements.txt
Requirement already satisfied: Pillow<=3.1.2,>=2.3.0 in /usr/lib/python2.7/dist-packages (from -r /sw/digits/requirements.txt (line 1))
Requirement already satisfied: numpy<=1.11.0,>=1.8.1 in /usr/lib/python2.7/dist-packages (from -r /sw/digits/requirements.txt (line 2))
Requirement already satisfied: scipy<=0.17.0,>=0.13.3 in /usr/lib/python2.7/dist-packages (from -r /sw/digits/requirements.txt (line 3))
Requirement already satisfied: protobuf<=2.6.1,>=2.5.0 in /usr/lib/python2.7/dist-packages (from -r /sw/digits/requirements.txt (line 4))
.
.
.
Requirement already satisfied: Werkzeug>=0.7 in /usr/lib/python2.7/dist-packages (from Flask==0.10.1->-r /sw/digits/requirements.txt (line 9))
Requirement already satisfied: Jinja2>=2.4 in /usr/lib/python2.7/dist-packages (from Flask==0.10.1->-r /sw/digits/requirements.txt (line 9))
Requirement already satisfied: itsdangerous>=0.21 in /usr/lib/python2.7/dist-packages (from Flask==0.10.1->-r /sw/digits/requirements.txt (line 9))
Collecting python-socketio>=1.4 (from Flask-SocketIO==2.6->-r /sw/digits/requirements.txt (line 12))
  Downloading python-socketio-1.7.4.tar.gz
Collecting python-engineio>=0.9.2 (from Flask-SocketIO==2.6->-r /sw/digits/requirements.txt (line 12))
  Downloading python-engineio-1.5.2.tar.gz
Collecting pyparsing==1.5.7 (from pydot<=1.0.29,>=1.0.28->-r /sw/digits/requirements.txt (line 16))
  Downloading pyparsing-1.5.7.zip (965kB)
    100% |████████████████████████████████| 972kB 1.1MB/s 
Requirement already satisfied: MarkupSafe in /usr/lib/python2.7/dist-packages (from Jinja2>=2.4->Flask==0.10.1->-r /sw/digits/requirements.txt (line 9))
Installing collected packages: requests, gevent-websocket, python-engineio, python-socketio, Flask-SocketIO, lmdb, pyparsing, pydot, psutil, scikit-fmm
  Running setup.py install for python-engineio ... done
  Running setup.py install for python-socketio ... done
  Running setup.py install for Flask-SocketIO ... done
  Running setup.py install for lmdb ... done
  Found existing installation: pyparsing 2.0.3
    Uninstalling pyparsing-2.0.3:
      Successfully uninstalled pyparsing-2.0.3
  Running setup.py install for pyparsing ... done
  Running setup.py install for pydot ... done
  Running setup.py install for psutil ... done
  Running setup.py install for scikit-fmm ... done
Successfully installed Flask-SocketIO-2.6 gevent-websocket-0.9.3 lmdb-0.87 psutil-3.4.2 pydot-1.0.29 pyparsing-1.5.7 python-engineio-1.5.2 python-socketio-1.7.4 requests-2.9.1 scikit-fmm-0.0.9
root@minsky:/sw# 


# RUN it !! :-D

root@minsky:/sw/digits# screen
root@minsky:/sw/digits# ./digits-devserver 
  ___ ___ ___ ___ _____ ___
 |   \_ _/ __|_ _|_   _/ __|
 | |) | | (_ || |  | | \__ \
 |___/___\___|___| |_| |___/ 5.1-dev

2017-05-17 13:30:00 [INFO ] Loaded 0 jobs.


# To be able to access it you have to use one browser and pint to the following IP:
# URL: http://IPADRESS:5000 

No comments:

Flying in the AVE