#include <vector>
#include <cuda.h>
#include <cuda_runtime.h>
#include <omp.h>
#include "interface.h"

Classes
class	MTRNN

Macros
#define	MAX_GPU_DEVICES 4

#define	MAX_SEQUENCES 100

Functions
void	resetDeltaWeightsOnDevice (dim3 grid, dim3 block, cudaStream_t stream, int numWeights, int numIONeurons, float deltaWeight, float individualError)
	Wrapper for resetDeltaWeightsKernel.

void	setInitStatesOnDevice (dim3 grid, dim3 block, cudaStream_t stream, float initState, float *activity, int numNeurons, int numIONeurons, int numFastNeurons)
	Wrapper for setInitStatesKernel.

void	resetParametersOnDevice (dim3 grid, dim3 block, cudaStream_t stream, int numNeurons, int maxSequenceSteps, float delta, float previousDelta, float potential, float previousPotential, float *error)
	Wrapper for resetParametersKernel.

void	updateWeightsOnDevice (dim3 grid, dim3 block, float learningRate, float momentum, float weight, float deltaWeight, float *previousDeltaWeight, int numWeights)
	Wrapper for updateWeightsKernel.

void	forwardPassV1onDevice (dim3 grid, dim3 block, cudaStream_t stream, int step, int sequenceOffset, float activity, float input, float weight, float previousPotential, float error, float potential, int *deltaT, int numNeurons, int numIONeurons)
	Wrapper for forwardPassV1Kernel.

void	forwardPassV2onDevice (dim3 grid, dim3 block, cudaStream_t stream, int step, int sequenceOffset, float activity, float input, float weight, int numNeurons, int numIONeurons, float buffer)
	Wrapper for forwardPassV2Kernel.

void	forwardPassV21onDevice (dim3 grid, dim3 block, int smemSize, cudaStream_t stream, int step, int sequenceOffset, float activity, float input, float buffer, float potential, float weight, float previousPotential, float error, int deltaT, int numNeurons, int numIONeurons)
	Wrapper for forwardPassV21Kernel.

void	backwardPassV1onDevice (dim3 grid, dim3 block, cudaStream_t stream, int step, int sequenceOffset, int numNeurons, int numIONeurons, float input, float activity, float delta, float deltaWeight, float previousDelta, float error, float individualError, int deltaT, float *weight)
	Wrapper for backwardPassV1Kernel.

void	backwardPassV11onDevice (dim3 grid, dim3 block, cudaStream_t stream, int step, int numNeurons, int numIONeurons, float activity, float delta, float previousDelta, int deltaT, float *weight)
	Wrapper for backwardPassV11Kernel.

void	backwardPassV2onDevice (dim3 grid, dim3 block, cudaStream_t stream, int step, int sequenceOffset, int numNeurons, int numIONeurons, float input, float activity, float delta, float deltaWeight, float previousDelta, float error, float individualError, int deltaT, float weight, float buffer)
	Wrapper for backwardPassV2Kernel.

void	backwardPassV21onDevice (dim3 grid, dim3 block, int smemSize, cudaStream_t stream, float input, float output, int numNeurons, int numIONeurons)
	Wrapper for backwardPassV21Kernel.

void	backwardPassV3onDevice (dim3 grid, dim3 block, cudaStream_t stream, int step, int numNeurons, int numIONeurons, float activity, float delta, float previousDelta, float deltaWeight, int deltaT, float weight)
	Wrapper for backwardPassV3Kernel.

void	sumDeltaWeightsP2PonDevice (dim3 grid, dim3 block, int numWeights, float masterDeltaWeight, float peerDeltaWeight)
	Wrapper for sumDeltaWeightsP2PKernel.

void	updateWeightsP2PonDevice (dim3 grid, dim3 block, int numWeights, float learningRate, float momentum, float masterWeight, float peerWeight, float deltaWeight, float previousDeltaWeight)
	Wrapper for updateWeightsP2PKernel.

void	sumErrorP2PonDevice (dim3 grid, dim3 block, float masterError, float peerError)
	Wrapper for sumErrorP2PKernel.

void	reduceOnDevice (int size, dim3 grid, dim3 block, int smemSize, cudaStream_t stream, float input, float output, unsigned int n, bool nIsPow2)
	Wrapper for reduceKernel.

Macro Definition Documentation

#define MAX_GPU_DEVICES 4

#define MAX_SEQUENCES 100

Function Documentation

void backwardPassV11onDevice	(	dim3	grid,
		dim3	block,
		cudaStream_t	stream,
		int	step,
		int	numNeurons,
		int	numIONeurons,
		float *	activity,
		float *	delta,
		float *	previousDelta,
		int *	deltaT,
		float *	weight
	)

Wrapper for backwardPassV11Kernel.

Parameters

[in]	grid	- CUDA grid size
[in]	block	- CUDA block size
[in]	stream	- CUDA stream
[in]	step	- current step
[in]	numNeurons	- number of neurons
[in]	numIONeurons	- number of input-ouput neurons
[in]	activity	- activities
[in]	deltaT	- delta-t values
[in]	weight	- weights
[in]	previousDelta	- previous deltas
[out]	delta	- deltas

void backwardPassV1onDevice	(	dim3	grid,
		dim3	block,
		cudaStream_t	stream,
		int	step,
		int	sequenceOffset,
		int	numNeurons,
		int	numIONeurons,
		float *	input,
		float *	activity,
		float *	delta,
		float *	deltaWeight,
		float *	previousDelta,
		float *	error,
		float *	individualError,
		int *	deltaT,
		float *	weight
	)

Wrapper for backwardPassV1Kernel.

Parameters

[in]	grid	- CUDA grid size
[in]	block	- CUDA block size
[in]	stream	- CUDA stream
[in]	step	- current step
[in]	sequenceOffset	- sequence offsets
[in]	numNeurons	- number of neurons
[in]	numIONeurons	- number of input-output neurons
[in]	input	- input
[in]	activity	- activities
[in]	error	- errors
[in]	individualError	- error buffer
[in]	deltaT	- delta-t values
[in]	weight	- weights
[in]	previousDelta	- previous deltas
[out]	delta	- deltas
[out]	deltaWeight	- delta weights

void backwardPassV21onDevice	(	dim3	grid,
		dim3	block,
		int	smemSize,
		cudaStream_t	stream,
		float *	input,
		float *	output,
		int	numNeurons,
		int	numIONeurons
	)

Wrapper for backwardPassV21Kernel.

Parameters

[in]	grid	- CUDA grid size
[in]	block	- CUDA block size
[in]	smemSize	- CUDA shared memory size
[in]	stream	- CUDA stream
[in]	input	- input
[in]	numNeurons	- number of neurons
[in]	numIONeurons	- number of input-output neurons
[out]	output	- output

void backwardPassV2onDevice	(	dim3	grid,
		dim3	block,
		cudaStream_t	stream,
		int	step,
		int	sequenceOffset,
		int	numNeurons,
		int	numIONeurons,
		float *	input,
		float *	activity,
		float *	delta,
		float *	deltaWeight,
		float *	previousDelta,
		float *	error,
		float *	individualError,
		int *	deltaT,
		float *	weight,
		float *	buffer
	)

Wrapper for backwardPassV2Kernel.

Parameters

[in]	grid	- CUDA grid size
[in]	block	- CUDA block size
[in]	stream	- CUDA stream
[in]	step	- current step
[in]	sequenceOffset	- sequence offsets
[in]	numNeurons	- number of neurons
[in]	numIONeurons	- number of input-output neurons
[in]	input	- input
[in]	activity	- activities
[in]	previousDelta	- previous deltas
[in]	error	- errors
[in]	individualError	- error buffer
[in]	deltaT	- delta-t values
[in]	weight	- weights
[out]	delta	- deltas
[out]	deltaWeight	- delta weights
[out]	buffer	- buffer used for storing delta weights

void backwardPassV3onDevice	(	dim3	grid,
		dim3	block,
		cudaStream_t	stream,
		int	step,
		int	numNeurons,
		int	numIONeurons,
		float *	activity,
		float *	delta,
		float *	previousDelta,
		float *	deltaWeight,
		int *	deltaT,
		float *	weight
	)

Wrapper for backwardPassV3Kernel.

Parameters

[in]	grid	- CUDA grid size
[in]	block	- CUDA block size
[in]	stream	- CUDA stream
[in]	step	- current step
[in]	numNeurons	- number of neurons
[in]	numIONeurons	- number of input-output neurons
[in]	activity	- activities
[in]	deltaT	- delta-t values
[in]	weight	- weights
[in]	delta	- deltas
[out]	previousDelta	- previous deltas
[out]	deltaWeight	- delta weights

void forwardPassV1onDevice	(	dim3	grid,
		dim3	block,
		cudaStream_t	stream,
		int	step,
		int	sequenceOffset,
		float *	activity,
		float *	input,
		float *	weight,
		float *	previousPotential,
		float *	error,
		float *	potential,
		int *	deltaT,
		int	numNeurons,
		int	numIONeurons
	)

Wrapper for forwardPassV1Kernel.

Parameters

[in]	grid	- CUDA grid size
[in]	block	- CUDA block size
[in]	stream	- CUDA stream
[in]	step	- current step
[in]	sequenceOffset	- sequence offsets
[in]	input	- input
[in]	weight	- weights
[in]	deltaT	- delta-t values
[in]	numNeurons	- number of neurons
[in]	numIONeurons	- number of input-output neurons
[out]	potential	- potentials
[out]	previousPotential	- previous potentials
[out]	activity	- activities
[out]	error	- errors

void forwardPassV21onDevice	(	dim3	grid,
		dim3	block,
		int	smemSize,
		cudaStream_t	stream,
		int	step,
		int	sequenceOffset,
		float *	activity,
		float *	input,
		float *	buffer,
		float *	potential,
		float *	weight,
		float *	previousPotential,
		float *	error,
		int *	deltaT,
		int	numNeurons,
		int	numIONeurons
	)

Wrapper for forwardPassV21Kernel.

Parameters

[in]	grid	- CUDA grid size
[in]	block	- CUDA block size
[in]	smemSize	- CUDA shared memory size
[in]	stream	- CUDA stream
[in]	step	- current step
[in]	sequenceOffset	- sequence offsets
[in]	activity	- activations
[in]	input	- input
[in]	weight	- weights
[in]	numNeurons	- number of neurons
[in]	numIONeurons	- number of input-output neurons
[out]	buffer	- buffer used for storing new activations

void forwardPassV2onDevice	(	dim3	grid,
		dim3	block,
		cudaStream_t	stream,
		int	step,
		int	sequenceOffset,
		float *	activity,
		float *	input,
		float *	weight,
		int	numNeurons,
		int	numIONeurons,
		float *	buffer
	)

Wrapper for forwardPassV2Kernel.

Parameters

[in]	grid	- CUDA grid size
[in]	block	- CUDA block size
[in]	stream	- CUDA stream
[in]	step	- current step
[in]	sequenceOffset	- sequence offsets
[in]	activity	- activations
[in]	input	- input
[in]	weight	- weights
[in]	numNeurons	- number of neurons
[in]	numIONeurons	- number of input-output neurons
[out]	buffer	- buffer used for storing new activations

void reduceOnDevice	(	int	size,
		dim3	grid,
		dim3	block,
		int	smemSize,
		cudaStream_t	stream,
		float *	input,
		float *	output,
		unsigned int	n,
		bool	nIsPow2
	)

Wrapper for reduceKernel.

Parameters

[in]	size	- number of elements to sum
[in]	grid	- CUDA grid size
[in]	block	- CUDA block size
[in]	smemSize	- CUDA shared memory size
[in]	stream	- CUDA stream
[in]	input	- input
[in]	n	- number of elements to sum
[in]	nIsPow2	- determines if the number is of power of two
[out]	output	- output

void resetDeltaWeightsOnDevice	(	dim3	grid,
		dim3	block,
		cudaStream_t	stream,
		int	numWeights,
		int	numIONeurons,
		float *	deltaWeight,
		float *	individualError
	)

Wrapper for resetDeltaWeightsKernel.

Parameters

[in]	grid	- CUDA grid size
[in]	block	- CUDA block size
[in]	stream	- CUDA stream
[in]	numWeights	- number of weights
[in]	numIONeurons	- number of input-output neurons
[out]	deltaWeight	- delta weights
[out]	individualError	- error buffer

void resetParametersOnDevice	(	dim3	grid,
		dim3	block,
		cudaStream_t	stream,
		int	numNeurons,
		int	maxSequenceSteps,
		float *	delta,
		float *	previousDelta,
		float *	potential,
		float *	previousPotential,
		float *	error
	)

Wrapper for resetParametersKernel.

Parameters

[in]	grid	- CUDA grid size
[in]	block	- CUDA block size
[in]	stream	- CUDA stream
[in]	numNeurons	- number of neurons
[in]	maxSequenceSteps	- maximum number of sequence steps
[out]	delta	- deltas
[out]	previousDelta	- previous deltas
[out]	potential	- potentials
[out]	previousPotential	- previous potentials
[out]	error	- errors

void setInitStatesOnDevice	(	dim3	grid,
		dim3	block,
		cudaStream_t	stream,
		float	initState,
		float *	activity,
		int	numNeurons,
		int	numIONeurons,
		int	numFastNeurons
	)

Wrapper for setInitStatesKernel.

Parameters

[in]	grid	- CUDA grid size
[in]	block	- CUDA block size
[in]	stream	- CUDA stream
[in]	initState	- initial state
[in]	numNeurons	- number of neurons
[in]	numIONeurons	- number of input-output neurons
[in]	numFastNeurons	- number of fast neurons
[out]	activity	- activities

void sumDeltaWeightsP2PonDevice	(	dim3	grid,
		dim3	block,
		int	numWeights,
		float *	masterDeltaWeight,
		float *	peerDeltaWeight
	)

Wrapper for sumDeltaWeightsP2PKernel.

Parameters

[in]	grid	- CUDA grid size
[in]	block	- CUDA block size
[in]	numWeights	- number of weights
[in]	peerDeltaWeight	- delta weights from peer device
[out]	masterDeltaWeight	- delta weights from master device

void sumErrorP2PonDevice	(	dim3	grid,
		dim3	block,
		float *	masterError,
		float *	peerError
	)

Wrapper for sumErrorP2PKernel.

Parameters

[in]	grid	- CUDA grid size
[in]	block	- CUDA block size
[in]	peerError	- error from peer device
[out]	masterError	- error from master device

void updateWeightsOnDevice	(	dim3	grid,
		dim3	block,
		float	learningRate,
		float	momentum,
		float *	weight,
		float *	deltaWeight,
		float *	previousDeltaWeight,
		int	numWeights
	)

Wrapper for updateWeightsKernel.

Parameters

[in]	grid	- CUDA grid size
[in]	block	- CUDA block size
[in]	learningRate	- learning rate
[in]	momentum	- momentum
[in]	numWeights	- number of weights
[in]	deltaWeight	- delta weights
[out]	previousDeltaWeight	- previous delta weights
[out]	weight	- weights

void updateWeightsP2PonDevice	(	dim3	grid,
		dim3	block,
		int	numWeights,
		float	learningRate,
		float	momentum,
		float *	masterWeight,
		float *	peerWeight,
		float *	deltaWeight,
		float *	previousDeltaWeight
	)

Wrapper for updateWeightsP2PKernel.

Parameters

[in]	grid	- CUDA grid size
[in]	block	- CUDA block size
[in]	numWeights	- number of weights
[in]	learningRate	- learning rate
[in]	momentum	- momentum
[in]	deltaWeight	- delta weights
[out]	previousDeltaWeight	- previous delta weights
[out]	masterWeight	- weigths from master device
[out]	peerWeight	- weights from peer device

Classes

Macros

Functions

Macro Definition Documentation

Function Documentation