HoviTron Video Pipeline: KernelCuda.cuh Source File

/* ----------------------
* Copyright 2023 Universit� Libre de Bruxelles(ULB), Universidad Polit�cnica de Madrid(UPM), CREAL, Deutsches Zentrum f�r Luft - und Raumfahrt(DLR)
 
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at < http://www.apache.org/licenses/LICENSE-2.0%3E
 
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissionsand
* limitations under the License.
---------------------- */
 
 
#ifndef KERNEL_CUDA_H
#define KERNEL_CUDA_H
 
#include <cuda.h>
#include <cuda_runtime.h>
#include "device_launch_parameters.h"
#include "surface_functions.h" 
#include "types.h"
#include "HvtStreamingAPI.h"
#include "math.h"
 
#define unreachable() (unreachable_kernel())
 
/*------------ CUDA kernel params*/
const dim3 threadsperblock(512);
const dim3 blockspergrid(30 * 16);
/*-------------------------------*/
 
/*------------ CUDA KERNEL*/
/// <summary>
/// Copy RGB data from CUDA array into a cudaSurfaceObject_t. IT IS A CUDA KERNEL.
/// </summary>
/// <param name="RGB_vulkan"> The cudaSurfaceObject_t receiving the data.</param>
/// <param name="RGB_cuda"> The RGB CUDA array that will give data.</param>
/// <param name="baseWidth"> The width of the CUDA array.</param>
/// <param name="baseHeight"> The height of the CUDA array.</param>
__global__ void copy_color(cudaSurfaceObject_t RGB_vulkan, uchar * RGB_cuda, size_t baseWidth, size_t baseHeight);
/// <summary>
/// Copy the depth from a CUDA array into a cudaSurfaceObject_t. IT IS A CUDA KERNEL.
/// </summary>
/// <param name="D_vulkan"> The cudaSurfaceObject_t that will receive data.</param>
/// <param name="D_cuda"> The depth CUDA array that will give data.</param>
/// <param name="baseWidth"> The width of the CUDA array.</param>
/// <param name="baseHeight"> The height of the CUDA array.</param>
__global__ void copy_depth(cudaSurfaceObject_t D_vulkan, float* D_cuda, size_t baseWidth, size_t baseHeight);
/// <summary>
/// Remove the pitch added by another instance by copying the data in a new array. IT IS A CUDA KERNEL.
/// </summary>
/// <typeparam name="T"> The type of the array data.</typeparam> 
/// <param name="data_in"> The data to remove the pitch.</param>
/// <param name="data_out"> The CUDA array that is receiving data.</param>
/// <param name="baseWidth"> The width of the CUDA array.</param>
/// <param name="baseHeight"> The height of the CUDA array.</param>
/// <param name="nbInRow"> Number of elements in one row.</param>
/// <param name="channels"> Number of channels. (Ex: RGBA = 4).</param>
template<typename T> __global__ void remove_pitch(T* data_in, T* data_out, size_t baseWidth, size_t baseHeight, size_t nbInRow, size_t channels);
/// <summary>
/// Convert a CUDA array of type USHORT into a CUDA array of type UCHAR. IT IS A CUDA KERNEL.
/// </summary>
/// <param name="data_in"> The data to convert.</param>
/// <param name="data_out"> The CUDA array that is receiving data.</param>
/// <param name="baseWidth"> The width of the CUDA array.</param>
/// <param name="baseHeight"> The height of the CUDA array.</param>
/// <param name="nbInRow"> Number of elements in one row.</param>
/// <param name="channels"> Number of channels. (Ex: RGBA = 4).</param>
__global__ void uShort_2_uChar(unsigned short* data_in, unsigned char* data_out, size_t baseWidth, size_t baseHeight, size_t nbInRow, size_t channels);
/// <summary>
/// Convert a CUDA array of type USHORT into a CUDA array of type FLOAT. IT IS A CUDA KERNEL.
/// </summary>
/// <param name="data_in"> The data to convert.</param>
/// <param name="data_out"> The CUDA array that is receiving data.</param>
/// <param name="baseWidth"> The width of the CUDA array.</param>
/// <param name="baseHeight"> The height of the CUDA array.</param>
/// <param name="nbInRow"> Number of elements in one row.</param>
/// <param name="channels"> Number of channels. (Ex: RGBA = 4).</param>
__global__ void uShort_2_Float(unsigned short* data_in, float* data_out, size_t baseWidth, size_t baseHeight, size_t nbInRow, size_t channels);
/// <summary>
/// Copy and scale a float CUDA array into another one. IT IS A CUDA KERNEL.
/// </summary>
/// <typeparam name="T"> The type of the array data.</typeparam>
/// <param name="data_in"> The data to scale.</param>
/// <param name="data_out"> The CUDA array that is receiving data.</param>
/// <param name="baseWidth"> The width of the CUDA array.</param>
/// <param name="baseHeight"> The height of the CUDA array.</param>
/// <param name="nbInRow"> Number of elements in one row.</param>
/// <param name="channels"> Number of channels. (Ex: RGBA = 4).</param>
/// <param name="scale"> The scaling factor. </param>
template<typename T> __global__ void scale_data_array(T* data_in, T* data_out, size_t baseWidth, size_t baseHeight, size_t nbInRow, size_t channels, float scale);
/// <summary>
/// Copy a float CUDA array into another one and add an offset to the second one. IT IS A CUDA KERNEL.
/// </summary>
/// <typeparam name="T"> The type of the array data.</typeparam>
/// <param name="data_in"> The data to add an offset.</param>
/// <param name="data_out"> The CUDA array that is receiving data.</param>
/// <param name="baseWidth"> The width of the CUDA array.</param>
/// <param name="baseHeight"> The height of the CUDA array.</param>
/// <param name="nbInRow"> Number of elements in one row.</param>
/// <param name="channels"> Number of channels. (Ex: RGBA = 4).</param>
/// <param name="offset"> The offset to add. </param>
template<typename T> __global__ void add_offset_data_array(T* data_in, T* data_out, size_t baseWidth, size_t baseHeight, size_t nbInRow, size_t channels, float offset);
/// <summary>
/// Copy, add an offset, and scale one channel of a float CUDA array into another float cuda array. IT IS A CUDA KERNEL.
/// </summary>
/// <typeparam name="T"> The type of the array data.</typeparam>
/// <param name="data_in"> The data to scale.</param>
/// <param name="data_out"> The CUDA array that is receiving data.</param>
/// <param name="baseWidth"> The width of the CUDA array.</param>
/// <param name="baseHeight"> The height of the CUDA array.</param>
/// <param name="nbInRow"> Number of elements in one row.</param>
/// <param name="channels"> Number of channels. (Ex: RGBA = 4).</param>
/// <param name="nbOfChannel"> The index of the channel that will be copied.</param>
/// <param name="scale"> The scaling factor. </param>
/// <param name="offset"> The offset to add.</param>
template<typename T> __global__ void scale_add_data_array_uchannel(T* data_in, T* data_out, size_t baseWidth, size_t baseHeight, size_t nbInRow, size_t channels, size_t nbOfChannel, float scale, float offset);
/// <summary>
/// Copy and scale one channel of a float CUDA array into another float cuda array. IT IS A CUDA KERNEL.
/// </summary>
/// <typeparam name="T"> The type of the array data.</typeparam>
/// <param name="data_in"> The data to scale.</param>
/// <param name="data_out"> The CUDA array that is receiving data.</param>
/// <param name="baseWidth"> The width of the CUDA array.</param>
/// <param name="baseHeight"> The height of the CUDA array.</param>
/// <param name="nbInRow"> Number of elements in one row.</param>
/// <param name="channels"> Number of channels. (Ex: RGBA = 4).</param>
/// <param name="nbOfChannel"> The index of the channel that will be copied.</param>
/// <param name="scale"> The scaling factor. </param>
template<typename T> __global__ void scale_data_array_uchannel(T* data_in, T* data_out, size_t baseWidth, size_t baseHeight, size_t nbInRow, size_t channels, size_t nbOfChannel, float scale);
/// <summary>
/// Compute the average L1 norm in a patch for all pixels between the current depth and the previous depth. Then correct the current depth if the average error is lower than the treshold taking into account the adjsutment factor. 
/// </summary>
/// <typeparam name="T"> The type of the array data.</typeparam>
/// <param name="prev_depth"> The previous depth map to compare with.</param>
/// <param name="curr_depth"> The current depth map that will be adjusted.</param>
/// <param name="baseWidth"> The width of the CUDA array.</param>
/// <param name="baseHeight"> The height of the CUDA array.</param>
/// <param name="sizePatch"> The size of the patch where the comparison around the pixels will be done.</param>
/// <param name="treshold"> The treshold value in meters.</param>
/// <param name="adjustementFactor"> The factor where 1.0 will correct the current depth with only the previous one, 0.5 will correct equally the current depth with both the current and the previous depth, and 0.0 will apply no ccorection. </param>
template<typename T> __global__ void temporal_consistency_adjustement(T* prev_depth, T* curr_depth, size_t baseWidth, size_t baseHeight, unsigned int sizePatch,float treshold, float adjustementFactor);
 
/*------------------------*/
 
/*---------- C++ launching CUDA kernel functions*/
/// <summary>
/// Launch the CUDA kernel "copy_color" on a specific CUDA stream.
/// </summary>
/// <param name="RGB_vulkan"> The cudaSurfaceObject_t receiving the data.</param>
/// <param name="RGB_cuda"> The RGB CUDA array that will give data.</param>
/// <param name="baseWidth"> The width of the CUDA array.</param>
/// <param name="baseHeight"> The height of the CUDA array.</param>
/// <param name="stream"> The stream where the kernel will execute.</param>
void copyColor(cudaSurfaceObject_t RGB_vulkan, uchar* RGB_cuda, size_t baseWidth, size_t baseHeight, cudaStream_t& stream);
/// <summary>
/// Launch the CUDA kernel "copy_depth" on a specific CUDA stream.
/// </summary>
/// <param name="D_vulkan"> The cudaSurfaceObject_t that will receive data.</param>
/// <param name="D_cuda"> The depth CUDA array that will give data.</param>
/// <param name="baseWidth"> The width of the CUDA array.</param>
/// <param name="baseHeight"> The height of the CUDA array.</param>
/// <param name="stream"> The stream where the kernel will execute.</param>
void copyDepth(cudaSurfaceObject_t D_vulkan, float* D_cuda, size_t baseWidth, size_t baseHeight, cudaStream_t& stream);
/// <summary>
/// Launch the CUDA kernel "remove_pitch".
/// </summary>
/// <typeparam name="T"> The type of the array data.</typeparam>
/// <param name="data_in"> The data to remove the pitch.</param>
/// <param name="data_out"> The CUDA array that is receiving data.</param>
/// <param name="baseWidth"> The width of the CUDA array.</param>
/// <param name="baseHeight"> The height of the CUDA array.</param>
/// <param name="nbInRow"> Number of elements in one row.</param>
/// <param name="channels"> Number of channels. (Ex: RGBA = 4).</param>
/// <param name="stream"> The stream where the kernel will be executed.</param>
template<typename T> void removePitch(T* data_in, T* data_out, size_t baseWidth, size_t baseHeight, size_t nbInRow, size_t channels, cudaStream_t& stream);
/// <summary>
/// Launch the CUDA kernel "uShort_2_uChar".
/// </summary>
/// <param name="data_in"> The data to convert.</param>
/// <param name="data_out"> The CUDA array that is receiving data.</param>
/// <param name="baseWidth"> The width of the CUDA array.</param>
/// <param name="baseHeight"> The height of the CUDA array.</param>
/// <param name="nbInRow"> Number of elements in one row.</param>
/// <param name="channels"> Number of channels. (Ex: RGBA = 4).</param>
/// <param name="stream"> The stream where the kernel will be executed.</param>
void uShort2uChar(unsigned short* data_in, unsigned char* data_out, size_t baseWidth, size_t baseHeight, size_t nbInRow, size_t channels,cudaStream_t& stream);
/// <summary>
/// Launch the CUDA kernel "uShort_2_Float".
/// </summary>
/// <param name="data_in"> The data to convert.</param>
/// <param name="data_out"> The CUDA array that is receiving data.</param>
/// <param name="baseWidth"> The width of the CUDA array.</param>
/// <param name="baseHeight"> The height of the CUDA array.</param>
/// <param name="nbInRow"> Number of elements in one row.</param>
/// <param name="channels"> Number of channels. (Ex: RGBA = 4).</param>
/// <param name="stream"> The stream where the kernel will be executed.</param>
void uShort2Float(unsigned short* data_in, float* data_out, size_t baseWidth, size_t baseHeight, size_t nbInRow, size_t channels, cudaStream_t& stream);
/// <summary>
/// Launch the CUDA kernel "scale_data_array".
/// </summary>
/// <typeparam name="T"> The type of the array data.</typeparam>
/// <param name="data_in"> The data to scale.</param>
/// <param name="data_out"> The CUDA array that is receiving data.</param>
/// <param name="baseWidth"> The width of the CUDA array.</param>
/// <param name="baseHeight"> The height of the CUDA array.</param>
/// <param name="nbInRow"> Number of elements in one row.</param>
/// <param name="channels"> Number of channels. (Ex: RGBA = 4).</param>
/// <param name="scale"> The scaling factor. </param>
/// <param name="stream"> The stream where the kernel will be executed. </param>
template<typename T> void scaleDataArray(T* data_in, T* data_out, size_t baseWidth, size_t baseHeight, size_t nbInRow, size_t channels, float scale, cudaStream_t& stream);
/// <summary>
/// Launch the CUDA kernel "add_offset_data_array".
/// </summary>
/// <typeparam name="T"> The type of the array data.</typeparam>
/// <param name="data_in"> The data to add an offset.</param>
/// <param name="data_out"> The CUDA array that is receiving data.</param>
/// <param name="baseWidth"> The width of the CUDA array.</param>
/// <param name="baseHeight"> The height of the CUDA array.</param>
/// <param name="nbInRow"> Number of elements in one row.</param>
/// <param name="channels"> Number of channels. (Ex: RGBA = 4).</param>
/// <param name="offset"> The offset to add. </param>
/// <param name="stream"> The stream where the kernel will be executed. </param>
template<typename T> void addOffsetDataArray(T* data_in, T* data_out, size_t baseWidth, size_t baseHeight, size_t nbInRow, size_t channels, float offset, cudaStream_t& stream);
 
/// <summary>
/// Launch the CUDA kernel "scale_data_array_uchannel".
/// </summary>
/// <typeparam name="T"> The type of the array data.</typeparam>
/// <param name="data_in"> The data to scale.</param>
/// <param name="data_out"> The CUDA array that is receiving data.</param>
/// <param name="baseWidth"> The width of the CUDA array.</param>
/// <param name="baseHeight"> The height of the CUDA array.</param>
/// <param name="nbInRow"> Number of elements in one row.</param>
/// <param name="channels"> Number of channels. (Ex: RGBA = 4).</param>
/// <param name="nbOfChannel"> The index of the channel that will be copied.</param>
/// <param name="scale"> The scaling factor. </param>
/// <param name="stream"> The stream where the kernel will be executed.</param>
template<typename T> void scaleDataArrayUChannel(T* data_in, T* data_out, size_t baseWidth, size_t baseHeight, size_t nbInRow, size_t channels, size_t nbOfChannel, float scale, cudaStream_t& stream);
/// <summary>
/// Launch the CUDA kernel "scale_add_data_array_uchannel".
/// </summary>
/// <typeparam name="T"> The type of the array data.</typeparam>
/// <param name="data_in"> The data to scale.</param>
/// <param name="data_out"> The CUDA array that is receiving data.</param>
/// <param name="baseWidth"> The width of the CUDA array.</param>
/// <param name="baseHeight"> The height of the CUDA array.</param>
/// <param name="nbInRow"> Number of elements in one row.</param>
/// <param name="channels"> Number of channels. (Ex: RGBA = 4).</param>
/// <param name="nbOfChannel"> The index of the channel that will be copied.</param>
/// <param name="scale"> The scaling factor. </param>
/// <param name="offset"> The offset to add.</param>
/// <param name="stream"> The stream where the kernel will be executed.</param>
template<typename T> void scaleAddDataArrayUChannel(T* data_in, T* data_out, size_t baseWidth, size_t baseHeight, size_t nbInRow, size_t channels, size_t nbOfChannel, float scale, float offset, cudaStream_t& stream);
/// <summary>
/// Launch the kernel temporal_consistency_adjustement then copy the current depth into the previous depth memory.
/// </summary>
/// <typeparam name="T"> The type of the array data.</typeparam>
/// <param name="prev_depth"> The previous depth map to compare with.</param>
/// <param name="curr_depth"> The current depth map that will be adjusted.</param>
/// <param name="baseWidth"> The width of the CUDA array.</param>
/// <param name="baseHeight"> The height of the CUDA array.</param>
/// <param name="sizePatch"> The size of the patch where the comparison around the pixels will be done.</param>
/// <param name="treshold"> The treshold value in meters.</param>
/// <param name="adjustementFactor"> The factor where 1.0 will correct the current depth with only the previous one, 0.5 will correct equally the current depth with both the current and the previous depth, and 0.0 will apply no ccorection. </param>
/// <param name="stream"> The stream where the kernel will be executed. </param>
template<typename T> void temporalConsistencyAdjustement(T* prev_depth, T* curr_depth, size_t baseWidth, size_t baseHeight, unsigned int sizePatch, float treshold, float adjustementFactor, cudaStream_t& stream);
/*----------------------------------------------*/
 
 
[[noreturn]] void unreachable_kernel();
 
#endif // !KERNEL_CUDA_H