modules_2mtrnn_2include_2mtrnn_8h_source.html

//##############################################################################################################################################################################################################//

//Aquila - An Open-Source GPU-Accelerated Toolkit for Cognitive and Neuro-Robotics Research                                                                                                                     //

//                                                                                                                                                                                                              //

//Copyright (c) <2012>, <Martin Peniak - www.martinpeniak.com>                                                                                                                                                  //

//All rights reserved.                                                                                                                                                                                          //

//                                                                                                                                                                                                              //

//Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:                                                                //

//                                                                                                                                                                                                              //

// - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.                                                                               //

// - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. //

//                                                                                                                                                                                                              //

//THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR   //

//A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT  //

//LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR    //

//TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.                                                               //

//                                                                                                                                                                                                              //

//The views and conclusions contained in the software and documentation are those of the authors and should not be interpreted                                                                                  //

//as representing official policies,either expressed or implied, of the FreeBSD Project.                                                                                                                        //

//##############################################################################################################################################################################################################//


#ifndef MTRNN_H

#define MTRNN_H

#define MAX_GPU_DEVICES 4

#define MAX_SEQUENCES 100


#include <vector>

#include <cuda.h>

#include <cuda_runtime.h>

#include <omp.h>

#include "interface.h"


using namespace std;


class MTRNN : public yarp::os::Thread

{

public:

    MTRNN(Interface *pInterface);


private:

    Interface *intrfc;


    cudaDeviceProp deviceInfo[MAX_GPU_DEVICES];

    string trainingFileName;

    string networkFileName;

    Bottle trainingData;

    size_t totalMemory;


    //host memory pointers

    int *deltaT_h;

    float *input_h;

    float *activity_h;

    float *error_h;

    float *zeroError_h;

    float *delta_h;

    float *previousDelta_h;

    float *individualError_h;

    float *mse_h;

    float *previousDeltaWeight_h;

    float *deltaWeight_h[MAX_GPU_DEVICES];

    float *weight_h;

    float *previousPotential_h;

    float *potential_h;

    float *zeroWeight_h;

    float *zeroNeuron_h;


    //device memory pointers

    int *deltaT_d[MAX_GPU_DEVICES];

    float *mse_d[MAX_GPU_DEVICES];

    float *activity_d[MAX_GPU_DEVICES];

    float *input_d[MAX_GPU_DEVICES];

    float *error_d[MAX_GPU_DEVICES];

    float *delta_d[MAX_GPU_DEVICES];

    float *buffer_d[MAX_GPU_DEVICES];

    float *previousDelta_d[MAX_GPU_DEVICES];

    float *individualError_d[MAX_GPU_DEVICES];

    float *deltaWeight_d[MAX_GPU_DEVICES];

    float *previousDeltaWeight_d[MAX_GPU_DEVICES];

    float *weight_d[MAX_GPU_DEVICES];

    float *potential_d[MAX_GPU_DEVICES];

    float *previousPotential_d[MAX_GPU_DEVICES];


    int debug;

    int feedbackInterval;

    int P2P;

    int availableGPUs;

    int requestedGPUs;

    int showProgress;

    int maxThreads;

    int seed;

    int ioDeltaT;

    int fastDeltaT;

    int slowDeltaT;

    int iteration;

    int maxIterations;

    int numSequences;

    int sequenceWidth;

    int totalSequenceSteps;

    int maxSequenceSteps;

    int numIONeurons;

    int numFastNeurons;

    int numSlowNeurons;

    int numControlNeurons;

    int numLinguisticNeurons;

    int numVisionNeurons;

    int numActionNeurons;

    int numNeurons;

    int numWeights;

    int *sequenceSteps;

    int *sequenceOffsets;

    float minValue;

    float maxValue;

    float *errors;

    bool GPU;


    //grid block

    int neuronThreads;

    int neuronBlocks;

    int seqNeuronBlocks;

    int numIoBlocks;

    int threads2D;

    int numFThreads;

    int numHBlocks;

    int numFBlocks;

    int numEThreads;

    int numEBlocks;

    int smemSize;

    int smemESize;

    dim3 dim2DBlock;

    dim3 dim2DGrid;

    dim3 dim2DWBlock;

    dim3 dim2DWGrid;


    int gpuDevice[MAX_GPU_DEVICES];

    float initWeightRange;

    float threshold;

    float learningRate;

    float momentum;


    void run();

    void allocateMemory();

    void copyMemoryToDevice();

    int nextPow2(int x);

    float scaleRange(float in, float oldMin, float oldMax, float newMin, float newMax);


    void forwardPass(int step, int sequenceOffset, float *activity, float *input, float *weight, float *previousPotential, float *potential, float *error, int *deltaT, int numNeurons, int numIONeurons);

    void backwardPass(int step, int sequenceOffset, int numNeurons, int numIONeurons, float *input, float *activity, float *delta, float *previousDelta, float *error, float *weight, float *deltaWeight, float *mse, int *deltaT);

    void updateWeights(float learningRate, float momentum, float *weight, float *deltaWeight, float *previousDeltaWeight, int numWeights);

    void setInitStates(float initState, float *activity, float *zeroActivity, int numNeurons, int numIONeurons, int numFastNeurons);

    void resetParameters(int numNeurons, int maxsequenceSteps, float *delta, float *previousDelta, float *potential, float *previousPotential, float *error, float *zeroNeuron, float *zeroError);

    void resetDeltaWeights(int numWeights, float *deltaWeight, float *zeroWeight);


public:

    bool terminalMode;


    void initialise();

    void deinitialise();

    void saveNetwork();

    void testNetwork();

    void randomiseWeights();

    bool loadTrainingData();

    vector<string> queryGPU();


    void printGPUProperties(int deviceID);

    void printOptions();

    void printConfiguration();

    void printKernelConfiguration();

    void printNetworkConfiguration();


    void setGPUMode(bool gpuMode);

    void setDebuggingLevel(int level);

    void setTrainingFile(string fileName);

    void setTrainingData(Bottle data);

    void setNetworkFile(string fileName);

    void setShowProgress(bool show);

    void setMaxThreads(int threads);

    void setMaxIterations(int iterations);

    void setSeed(int value);

    void setIODeltaT(int value);

    void setFastDeltaT(int value);

    void setSlowDeltaT(int value);

    void setNumFastNeurons(int fastNeurons);

    void setNumSlowNeurons(int slowNeurons);

    void setInitWeightRange(float value);

    void setThreshold(float value);

    void setLearningRate(float value);

    void setMomentum(float value);

    void setDevice(int deviceID);

    void setDevices(int deviceID[MAX_GPU_DEVICES]);

    void setDeltaT();

    void setGridBlock();

    void setFeedbackInterval(int interval);


    void getWeightsAsBottle(Bottle *bottle);

    void getErrorsAsBottle(Bottle *bottle);

    int getDevice();

    int getNumDevices();

    int getProgress();

    int getMaxIterations();

    int getSeed();

    int getNumFastNeurons();

    int getNumSlowNeurons();

    int getIODeltaT();

    int getFastDeltaT();

    int getSlowDeltaT();

    int getDebuggingLevel();

    int getFeedbackInterval();

    float getLearningRate();

    float getMomentum();

    float getWeightRange();

    float getThreshold();

    string getNetworkFile();

    string getTrainingFile();

};


//GPU kernel wrappers

void resetDeltaWeightsOnDevice(dim3 grid, dim3 block, cudaStream_t stream, int numWeights, int numIONeurons, float *deltaWeight, float *individualError);

void setInitStatesOnDevice(dim3 grid, dim3 block, cudaStream_t stream, float initState, float *activity, int numNeurons, int numIONeurons, int numFastNeurons);

void resetParametersOnDevice(dim3 grid, dim3 block, cudaStream_t stream, int numNeurons, int maxSequenceSteps, float *delta, float *previousDelta, float *potential, float *previousPotential, float *error);

void updateWeightsOnDevice(dim3 grid, dim3 block, float learningRate, float momentum, float *weight, float *deltaWeight, float *previousDeltaWeight, int numWeights);

void forwardPassV1onDevice(dim3 grid, dim3 block, cudaStream_t stream, int step, int sequenceOffset, float *activity, float *input, float *weight, float *previousPotential, float *error, float *potential, int *deltaT, int numNeurons, int numIONeurons);

void forwardPassV2onDevice(dim3 grid, dim3 block, cudaStream_t stream, int step, int sequenceOffset, float *activity, float *input, float *weight, int numNeurons, int numIONeurons, float *buffer);

void forwardPassV21onDevice(dim3 grid, dim3 block, int smemSize, cudaStream_t stream, int step, int sequenceOffset, float *activity, float *input, float *buffer, float *potential, float *weight, float *previousPotential, float *error, int *deltaT, int numNeurons, int numIONeurons);

void backwardPassV1onDevice(dim3 grid, dim3 block, cudaStream_t stream, int step, int sequenceOffset, int numNeurons, int numIONeurons, float *input, float *activity, float *delta, float *deltaWeight, float *previousDelta, float *error, float *individualError, int *deltaT, float *weight);

void backwardPassV11onDevice(dim3 grid, dim3 block, cudaStream_t stream, int step,  int numNeurons, int numIONeurons,  float *activity, float *delta, float *previousDelta, int *deltaT, float *weight);

void backwardPassV2onDevice(dim3 grid, dim3 block, cudaStream_t stream, int step, int sequenceOffset, int numNeurons, int numIONeurons, float *input, float *activity, float *delta, float *deltaWeight, float *previousDelta, float *error, float *individualError, int *deltaT, float *weight, float *buffer);

void backwardPassV21onDevice(dim3 grid, dim3 block, int smemSize, cudaStream_t stream, float *input, float *output, int numNeurons, int numIONeurons);

void backwardPassV3onDevice(dim3 grid, dim3 block, cudaStream_t stream, int step, int numNeurons, int numIONeurons, float *activity, float *delta, float *previousDelta, float *deltaWeight, int *deltaT, float *weight);

void sumDeltaWeightsP2PonDevice(dim3 grid, dim3 block, int numWeights, float *masterDeltaWeight, float *peerDeltaWeight);

void updateWeightsP2PonDevice(dim3 grid, dim3 block, int numWeights, float learningRate, float momentum, float *masterWeight, float *peerWeight, float *deltaWeight, float *previousDeltaWeight);

void sumErrorP2PonDevice(dim3 grid, dim3 block, float *masterError, float *peerError);

void reduceOnDevice(int size, dim3 grid, dim3 block, int smemSize, cudaStream_t stream, float *input, float *output, unsigned int n, bool nIsPow2);


#endif//MTRNN_H