Aquila  2.0 prealpha
Cognitive Robotics Architecture
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
mtrnn.h
Go to the documentation of this file.
1 //##############################################################################################################################################################################################################//
2 //Aquila - An Open-Source GPU-Accelerated Toolkit for Cognitive and Neuro-Robotics Research //
3 // //
4 //Copyright (c) <2012>, <Martin Peniak - www.martinpeniak.com> //
5 //All rights reserved. //
6 // //
7 //Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: //
8 // //
9 // - Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. //
10 // - Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. //
11 // //
12 //THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR //
13 //A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT //
14 //LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR //
15 //TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. //
16 // //
17 //The views and conclusions contained in the software and documentation are those of the authors and should not be interpreted //
18 //as representing official policies,either expressed or implied, of the FreeBSD Project. //
19 //##############################################################################################################################################################################################################//
20 
21 #ifndef MTRNN_H
22 #define MTRNN_H
23 #define MAX_GPU_DEVICES 4
24 #define MAX_SEQUENCES 100
25 
26 #include <vector>
27 #include <cuda.h>
28 #include <cuda_runtime.h>
29 #include <omp.h>
30 #include "interface.h"
31 
32 using namespace std;
33 
34 class MTRNN : public yarp::os::Thread
35 {
36 public:
37  MTRNN(Interface *pInterface);
38 
39 private:
40  Interface *intrfc;
41 
42  cudaDeviceProp deviceInfo[MAX_GPU_DEVICES];
43  string trainingFileName;
44  string networkFileName;
45  Bottle trainingData;
46  size_t totalMemory;
47 
48  //host memory pointers
49  int *deltaT_h;
50  float *input_h;
51  float *activity_h;
52  float *error_h;
53  float *zeroError_h;
54  float *delta_h;
55  float *previousDelta_h;
56  float *individualError_h;
57  float *mse_h;
58  float *previousDeltaWeight_h;
59  float *deltaWeight_h[MAX_GPU_DEVICES];
60  float *weight_h;
61  float *previousPotential_h;
62  float *potential_h;
63  float *zeroWeight_h;
64  float *zeroNeuron_h;
65 
66  //device memory pointers
67  int *deltaT_d[MAX_GPU_DEVICES];
68  float *mse_d[MAX_GPU_DEVICES];
69  float *activity_d[MAX_GPU_DEVICES];
70  float *input_d[MAX_GPU_DEVICES];
71  float *error_d[MAX_GPU_DEVICES];
72  float *delta_d[MAX_GPU_DEVICES];
73  float *buffer_d[MAX_GPU_DEVICES];
74  float *previousDelta_d[MAX_GPU_DEVICES];
75  float *individualError_d[MAX_GPU_DEVICES];
76  float *deltaWeight_d[MAX_GPU_DEVICES];
77  float *previousDeltaWeight_d[MAX_GPU_DEVICES];
78  float *weight_d[MAX_GPU_DEVICES];
79  float *potential_d[MAX_GPU_DEVICES];
80  float *previousPotential_d[MAX_GPU_DEVICES];
81 
82  int debug;
83  int feedbackInterval;
84  int P2P;
85  int availableGPUs;
86  int requestedGPUs;
87  int showProgress;
88  int maxThreads;
89  int seed;
90  int ioDeltaT;
91  int fastDeltaT;
92  int slowDeltaT;
93  int iteration;
94  int maxIterations;
95  int numSequences;
96  int sequenceWidth;
97  int totalSequenceSteps;
98  int maxSequenceSteps;
99  int numIONeurons;
100  int numFastNeurons;
101  int numSlowNeurons;
102  int numControlNeurons;
103  int numLinguisticNeurons;
104  int numVisionNeurons;
105  int numActionNeurons;
106  int numNeurons;
107  int numWeights;
108  int *sequenceSteps;
109  int *sequenceOffsets;
110  float minValue;
111  float maxValue;
112  float *errors;
113  bool GPU;
114 
115  //grid block
116  int neuronThreads;
117  int neuronBlocks;
118  int seqNeuronBlocks;
119  int numIoBlocks;
120  int threads2D;
121  int numFThreads;
122  int numHBlocks;
123  int numFBlocks;
124  int numEThreads;
125  int numEBlocks;
126  int smemSize;
127  int smemESize;
128  dim3 dim2DBlock;
129  dim3 dim2DGrid;
130  dim3 dim2DWBlock;
131  dim3 dim2DWGrid;
132 
133  int gpuDevice[MAX_GPU_DEVICES];
134  float initWeightRange;
135  float threshold;
136  float learningRate;
137  float momentum;
138 
139  void run();
140  void allocateMemory();
141  void copyMemoryToDevice();
142  int nextPow2(int x);
143  float scaleRange(float in, float oldMin, float oldMax, float newMin, float newMax);
144 
145  void forwardPass(int step, int sequenceOffset, float *activity, float *input, float *weight, float *previousPotential, float *potential, float *error, int *deltaT, int numNeurons, int numIONeurons);
146  void backwardPass(int step, int sequenceOffset, int numNeurons, int numIONeurons, float *input, float *activity, float *delta, float *previousDelta, float *error, float *weight, float *deltaWeight, float *mse, int *deltaT);
147  void updateWeights(float learningRate, float momentum, float *weight, float *deltaWeight, float *previousDeltaWeight, int numWeights);
148  void setInitStates(float initState, float *activity, float *zeroActivity, int numNeurons, int numIONeurons, int numFastNeurons);
149  void resetParameters(int numNeurons, int maxsequenceSteps, float *delta, float *previousDelta, float *potential, float *previousPotential, float *error, float *zeroNeuron, float *zeroError);
150  void resetDeltaWeights(int numWeights, float *deltaWeight, float *zeroWeight);
151 
152 public:
154 
155  void initialise();
156  void deinitialise();
157  void saveNetwork();
158  void testNetwork();
159  void randomiseWeights();
160  bool loadTrainingData();
161  vector<string> queryGPU();
162 
163  void printGPUProperties(int deviceID);
164  void printOptions();
165  void printConfiguration();
166  void printKernelConfiguration();
167  void printNetworkConfiguration();
168 
169  void setGPUMode(bool gpuMode);
170  void setDebuggingLevel(int level);
171  void setTrainingFile(string fileName);
172  void setTrainingData(Bottle data);
173  void setNetworkFile(string fileName);
174  void setShowProgress(bool show);
175  void setMaxThreads(int threads);
176  void setMaxIterations(int iterations);
177  void setSeed(int value);
178  void setIODeltaT(int value);
179  void setFastDeltaT(int value);
180  void setSlowDeltaT(int value);
181  void setNumFastNeurons(int fastNeurons);
182  void setNumSlowNeurons(int slowNeurons);
183  void setInitWeightRange(float value);
184  void setThreshold(float value);
185  void setLearningRate(float value);
186  void setMomentum(float value);
187  void setDevice(int deviceID);
188  void setDevices(int deviceID[MAX_GPU_DEVICES]);
189  void setDeltaT();
190  void setGridBlock();
191  void setFeedbackInterval(int interval);
192 
193  void getWeightsAsBottle(Bottle *bottle);
194  void getErrorsAsBottle(Bottle *bottle);
195  int getDevice();
196  int getNumDevices();
197  int getProgress();
198  int getMaxIterations();
199  int getSeed();
200  int getNumFastNeurons();
201  int getNumSlowNeurons();
202  int getIODeltaT();
203  int getFastDeltaT();
204  int getSlowDeltaT();
205  int getDebuggingLevel();
206  int getFeedbackInterval();
207  float getLearningRate();
208  float getMomentum();
209  float getWeightRange();
210  float getThreshold();
211  string getNetworkFile();
212  string getTrainingFile();
213 };
214 
215 //GPU kernel wrappers
216 void resetDeltaWeightsOnDevice(dim3 grid, dim3 block, cudaStream_t stream, int numWeights, int numIONeurons, float *deltaWeight, float *individualError);
217 void setInitStatesOnDevice(dim3 grid, dim3 block, cudaStream_t stream, float initState, float *activity, int numNeurons, int numIONeurons, int numFastNeurons);
218 void resetParametersOnDevice(dim3 grid, dim3 block, cudaStream_t stream, int numNeurons, int maxSequenceSteps, float *delta, float *previousDelta, float *potential, float *previousPotential, float *error);
219 void updateWeightsOnDevice(dim3 grid, dim3 block, float learningRate, float momentum, float *weight, float *deltaWeight, float *previousDeltaWeight, int numWeights);
220 void forwardPassV1onDevice(dim3 grid, dim3 block, cudaStream_t stream, int step, int sequenceOffset, float *activity, float *input, float *weight, float *previousPotential, float *error, float *potential, int *deltaT, int numNeurons, int numIONeurons);
221 void forwardPassV2onDevice(dim3 grid, dim3 block, cudaStream_t stream, int step, int sequenceOffset, float *activity, float *input, float *weight, int numNeurons, int numIONeurons, float *buffer);
222 void forwardPassV21onDevice(dim3 grid, dim3 block, int smemSize, cudaStream_t stream, int step, int sequenceOffset, float *activity, float *input, float *buffer, float *potential, float *weight, float *previousPotential, float *error, int *deltaT, int numNeurons, int numIONeurons);
223 void backwardPassV1onDevice(dim3 grid, dim3 block, cudaStream_t stream, int step, int sequenceOffset, int numNeurons, int numIONeurons, float *input, float *activity, float *delta, float *deltaWeight, float *previousDelta, float *error, float *individualError, int *deltaT, float *weight);
224 void backwardPassV11onDevice(dim3 grid, dim3 block, cudaStream_t stream, int step, int numNeurons, int numIONeurons, float *activity, float *delta, float *previousDelta, int *deltaT, float *weight);
225 void backwardPassV2onDevice(dim3 grid, dim3 block, cudaStream_t stream, int step, int sequenceOffset, int numNeurons, int numIONeurons, float *input, float *activity, float *delta, float *deltaWeight, float *previousDelta, float *error, float *individualError, int *deltaT, float *weight, float *buffer);
226 void backwardPassV21onDevice(dim3 grid, dim3 block, int smemSize, cudaStream_t stream, float *input, float *output, int numNeurons, int numIONeurons);
227 void backwardPassV3onDevice(dim3 grid, dim3 block, cudaStream_t stream, int step, int numNeurons, int numIONeurons, float *activity, float *delta, float *previousDelta, float *deltaWeight, int *deltaT, float *weight);
228 void sumDeltaWeightsP2PonDevice(dim3 grid, dim3 block, int numWeights, float *masterDeltaWeight, float *peerDeltaWeight);
229 void updateWeightsP2PonDevice(dim3 grid, dim3 block, int numWeights, float learningRate, float momentum, float *masterWeight, float *peerWeight, float *deltaWeight, float *previousDeltaWeight);
230 void sumErrorP2PonDevice(dim3 grid, dim3 block, float *masterError, float *peerError);
231 void reduceOnDevice(int size, dim3 grid, dim3 block, int smemSize, cudaStream_t stream, float *input, float *output, unsigned int n, bool nIsPow2);
232 
233 #endif//MTRNN_H