/** 
 * @file    kCudaDef.h
 * @brief   Central Cuda-related definitions.
 *
 * @internal
 * Copyright (C) 2020-2022 by LMI Technologies Inc.  All rights reserved.
 */
#ifndef K_FIRESYNC_CUDA_DEF_H
#define K_FIRESYNC_CUDA_DEF_H

#include <kFireSync/kFsDef.h>
#include <kFireSync/Cuda/kCudaDef.x.h>

/**
 * @struct  kCudaEnvironmentStatus
 * @extends kValue
 * @ingroup kFireSync-Cuda
 * @brief   Represents the overall state of the Cuda processing environment.
 */
typedef k32s kCudaEnvironmentStatus; 

/** @relates kCudaEnvironmentStatus @{ */
#define kCUDA_ENVIRONMENT_STATUS_READY                  (0)         ///< Cuda execution supported in current environment.
#define kCUDA_ENVIRONMENT_STATUS_NOT_INTEGRATED         (1)         ///< FireSync platform built without Cuda integration enabled.
#define kCUDA_ENVIRONMENT_STATUS_NO_DEVICE              (2)         ///< No Cuda device found.
#define kCUDA_ENVIRONMENT_STATUS_INCOMPATIBLE_RUNTIME   (3)         ///< Cuda runtime incompatible with FireSync platform requirements.
#define kCUDA_ENVIRONMENT_STATUS_INCOMPATIBLE_DRIVER    (4)         ///< Cuda driver incompatible with Cuda runtime requirements.
#define kCUDA_ENVIRONMENT_STATUS_INCOMPATIBLE_DEVICE    (5)         ///< Cuda device incompatible with FireSync platform requirements.
/** @} */

/**
 * @struct  kCudaSync
 * @extends kValue
 * @ingroup kFireSync-Cuda
 * @brief   Represents the synchronization mode for a Cuda operation.
 */
typedef k32s kCudaSync; 

/** @relates kCudaSync @{ */
#define kCUDA_SYNC_DEFAULT          (0)     ///< Use default synchronization mode of the associated stream. 
#define kCUDA_SYNC_WAIT             (1)     ///< Block until operation is synchronously completed. 
#define kCUDA_SYNC_WAIT_HOST        (2)     ///< Block until operation is synchronously completed, if failing to do so might affect host memory access. 
#define kCUDA_SYNC_DEFER            (3)     ///< Allow operation to execute asynchronously, if supported.
/** @} */

/**
 * @struct  kCudaProperties
 * @ingroup kFireSync-Cuda
 * @brief   Collection of Cuda device properties. 
 */
typedef struct kCudaProperties
{
    kVersion computeCapability;             ///< Overall level of capabilities supported by hardware.
    k64s clockRate;                         ///< Device clock rate (Hz). 
    k64s memoryClockRate;                   ///< Memory bus clock rate (Hz). 
    k32s memoryWidth;                       ///< Memory bus width, in bits. 
    k64s totalGlobalMem;                    ///< Global memory available on device, in bytes.
    kBool isIntegrated;                     ///< Is the device integrated (as opposed to discrete)?
    kBool concurrentManagedAccess;          ///< Does the device support concurrent access by host and device to managed memory?
    kBool canMapHostMemory;                 ///< Can the device map page-locked host memory into the device address space?
    k32s multiprocesorCount;                ///< Streaming multiprocessor count.
    k32s asyncEngineCount;                  ///< Asynchronous processing (copy) engine count.
    k32s maxThreadsPerMultiprocessor;       ///< Maximum number of threads per multiprocessor.
    k32s maxThreadsPerBlock;                ///< Maximum number of threads per block; 
    k32s maxThreadsDim[3];                  ///< Maximum size of each dimension of a block.
    k32s maxGridSize[3];                    ///< Maximum size of each dimension of a grid.
    k32s warpSize;                          ///< Warp size, in threads. 
} kCudaProperties;

/** 
 * Executes a <em>return</em> statement if the given expression does not evaluate to cudaSuccess. 
 * 
 * This macro provides the equivalent of kCheck for Cuda runtime methods. 
 * 
 * If the expression result is not cudaSuccess, the current function will return a value that 
 * represents the expression result as a kStatus value.
 * 
 * @param   EXPRESSION  Expression that evaluates to a cuda runtime status value. 
 */
#define kCheckCuda(EXPRESSION)     kCheck(kCudaStatus_Convert(kCudaStatus_Filter(EXPRESSION)));

/** 
 * Used within a kTry block to conditionally jump to the first error handling block (e.g. kCatch). 
 * 
 * This macro provides the equivalent of kTest for Cuda runtime methods. 
 * 
 * If the expression result is not cudaSuccess, the result of the expression is converted to a 
 * kStatus value and passed to an error-handling block. Otherwise, execution continues at the next statement. 
 * 
 * @param   EXPRESSION   Expression that evaluates to a cuda runtime status value. 
 */
#define kTestCuda(EXPRESSION)   kTest(kCudaStatus_Convert(kCudaStatus_Filter(EXPRESSION)));

/**
 * @class   kCudaStatus
 * @ingroup kFireSync-Cuda
 * @brief   Collection of static methods for Cuda status management. 
 */

/**
 * Converts a Cuda status code to a kStatus code.
 * 
 * At the time of this writing, this method does not attempt to recognize all of Cuda's individual 
 * error codes. Any unrecognized code will be reported as kERROR.
 * 
 * @public                  @memberof kCudaStatus
 * @param   cudaStatus      Cuda runtime status code. 
 * @return                  kApi status code (see note).
 */
kFsFx(kStatus) kCudaStatus_Convert(k32s cudaStatus);

/**
 * Ensures that Cuda eror state is cleared after making a Cuda runtime call.
 * 
 * The Cuda runtime accumulates all past errors until they are explicitly cleared. This approach 
 * conflicts with FireSync platform conventions, which recommend that errors should be reported via status
 * codes returned by functions, but not otherwise maintained.
 * 
 * As such, this method can be used to wrap calls to the Cuda runtime. The status code returned by this 
 * method is the same as the status code passed to this method. However, if the status code is not equal to 
 * cudaSuccess, then the cudaGetLastError method will be called, clearing Cuda's error state for the
 * current thread. 
 *
 * @public                  @memberof kCudaStatus
 * @param   cudaStatus      Cuda runtime status code. 
 * @return                  Cuda runtime status code (same as cudaStatus argument).
 */
kFsFx(k32s) kCudaStatus_Filter(k32s cudaStatus);

/**
 * Converts a Cuda status code to a boolean value and clears Cuda error state.
 * 
 * The Cuda runtime accumulates all past errors until they are explicitly cleared. This approach 
 * conflicts with FireSync platform conventions, which recommend that errors should be reported via status
 * codes returned by functions, but not otherwise maintained.
 * 
 * As such, this method can be used to wrap calls to the Cuda runtime. The method is equivalent to calling
 * kCudaStatus_Filter and comparing the result with cudaSuccess.
 * 
 * @public                  @memberof kCudaStatus
 * @param   cudaStatus      Cuda runtime status code. 
 * @return                  kTRUE if operation was successful.
 */
kFsFx(kBool) kCudaStatus_Verify(k32s cudaStatus);

/**
 * Returns a kStatus code that represents any accumulated cuda runtime errors. 
 * 
 * The Cuda runtime accumulates all past errors (on a per-thread basis) until they are explicitly 
 * cleared. This method can be used to get error state without clearing it. It is equivalent to 
 * passing the result of cudaPeekAtLastError to kCudaStatus_Convert. 
 *
 * @public      @memberof kCudaStatus
 * @return      kStatus value representing any accumulated Cuda error state.
 */
kFsFx(kStatus) kCudaStatus_Peek();

/**
 * Returns a kStatus code that represents any accumulated cuda runtime errors and clears error state.
 * 
 * The Cuda runtime accumulates all past errors (on a per-thread basis) until they are explicitly 
 * cleared. This method can be used to get and clear error state. It is equivalent to passing the 
 * result of cudaGetLastError to kCudaStatus_Convert. 
 *
 * @public      @memberof kCudaStatus
 * @return      kStatus value representing any accumulated Cuda error state.
 */
kFsFx(kStatus) kCudaStatus_GetAndClear();

#endif
