Skip to content

Commit

Permalink
Adl changes (#3)
Browse files Browse the repository at this point in the history
* Add some more API's and cuda runtime stuff

* Add missing hipMemcpy define

* hipMemcpy fixes

* Update cuew for cudart calls

* Impmenet ppGetDeviceProperties for CUDA

* Add getCurAPI

* [RPRNEXT-0] Fix a build error on linux.

* [POP-0] Fix a build error on linux.

Co-authored-by: Aaryaman Vasishta <[email protected]>
Co-authored-by: Sho Ikeda <[email protected]>
  • Loading branch information
3 people authored Feb 1, 2022
1 parent 187fbe3 commit e191b73
Show file tree
Hide file tree
Showing 6 changed files with 369 additions and 22 deletions.
58 changes: 51 additions & 7 deletions Pop/Pop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ int ppInitialize( Api api, ppU32 flags )
return hipewInit( HIPEW_INIT_HIP );
return PP_ERROR_OPEN_FAILED;
}
Api ppGetCurAPI(ppU32 flags)
{
return s_api;
}


//=================================
Expand All @@ -53,6 +57,11 @@ ppError cu2pp( CUresult a )
return (ppError)a;
}
inline
ppError cuda2pp(cudaError_t a)
{
return (ppError)a;
}
inline
CUcontext* ppCtx2cu( ppCtx* a )
{
return (CUcontext*)a;
Expand All @@ -74,12 +83,18 @@ pprtcResult nvrtc2pp( nvrtcResult a )
}

#define __PP_FUNC1( cuname, hipname ) if( s_api == API_CUDA ) return cu2pp( cu##cuname ); if( s_api == API_HIP ) return hip2pp( hip##hipname );
#define __PP_FUNC2( cudaname, hipname ) if( s_api == API_CUDA ) return cuda2pp( cuda##cudaname ); if( s_api == API_HIP ) return hip2pp( hip##hipname );
//#define __PP_FUNC1( cuname, hipname ) if( s_api == API_CUDA || API == API_CUDA ) return cu2pp( cu##cuname ); if( s_api == API_HIP || API == API_HIP ) return hip2pp( hip##hipname );
#define __PP_FUNC( name ) if( s_api == API_CUDA ) return cu2pp( cu##name ); if( s_api == API_HIP ) return hip2pp( hip##name );
#define __PP_CTXT_FUNC( name ) __PP_FUNC1(Ctx##name, name)
//#define __PP_CTXT_FUNC( name ) if( s_api == API_CUDA ) return cu2pp( cuCtx##name ); if( s_api == API_HIP ) return hip2pp( hip##name );
#define __PPRTC_FUNC1( cuname, hipname ) if( s_api == API_CUDA ) return nvrtc2pp( nvrtc##cuname ); if( s_api == API_HIP ) return hiprtc2pp( hiprtc##hipname );

#define __PP_FUNC_INSTANCE( funcName, args ) \
template ppError PPAPI funcName <API_AUTOMATIC> args;\
template ppError PPAPI funcName <API_CUDA> args;\
template ppError PPAPI funcName <API_HIP> args;


ppError PPAPI ppGetErrorName(ppError error, const char** pStr)
{
Expand All @@ -106,6 +121,9 @@ ppError PPAPI ppInit(unsigned int Flags)
__PP_FUNC( Init(Flags) );
return ppErrorUnknown;
}

__PP_FUNC_INSTANCE( ppInit, (unsigned int Flags) );

ppError PPAPI ppDriverGetVersion(int* driverVersion)
{
__PP_FUNC( DriverGetVersion(driverVersion) );
Expand All @@ -125,12 +143,14 @@ ppError PPAPI ppGetDeviceProperties(ppDeviceProp* props, int deviceId)
{
if( s_api == API_CUDA )
{
CUdevprop p;
cuDeviceGetProperties( &p, deviceId );
cudaDeviceProp p;
cudaError_t e = cudaGetDeviceProperties( &p, deviceId );
if (e != CUDA_SUCCESS)
return ppErrorUnknown;
char name[128];
cuDeviceGetName( name, 128, deviceId );
strcpy( props->name, name );
strcpy( props->name, p.name );
strcpy( props->gcnArchName, "" );
props->totalGlobalMem = p.totalGlobalMem;
printf("todo. implement me\n");
return ppSuccess;
}
Expand Down Expand Up @@ -268,6 +288,11 @@ ppError PPAPI ppMalloc(ppDeviceptr* dptr, size_t bytesize)
__PP_FUNC1( MemAlloc(dptr, bytesize), Malloc( dptr, bytesize ) );
return ppErrorUnknown;
}
ppError PPAPI ppMalloc2(ppDeviceptr* dptr, size_t bytesize)
{
__PP_FUNC2( Malloc((CUdeviceptr*)dptr, bytesize), Malloc(dptr, bytesize) );
return ppErrorUnknown;
}
ppError PPAPI ppMemAllocPitch(ppDeviceptr* dptr, size_t* pPitch, size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes)
{
return ppErrorUnknown;
Expand All @@ -277,8 +302,20 @@ ppError PPAPI ppFree(ppDeviceptr dptr)
__PP_FUNC1( MemFree( dptr ), Free( dptr ) );
return ppErrorUnknown;
}
ppError PPAPI ppFree2(ppDeviceptr dptr)
{
__PP_FUNC2( Free((CUdeviceptr)dptr), Free(dptr) );
return ppErrorUnknown;
}

//-------------------
ppError PPAPI ppMemcpy(void *dstDevice, void* srcHost, size_t ByteCount, ppMemcpyKind kind)
{
__PP_FUNC2( Memcpy(dstDevice, srcHost, ByteCount, (cudaMemcpyKind)kind),
Memcpy(dstDevice, srcHost, ByteCount, (hipMemcpyKind)kind) );
return ppErrorUnknown;
}

ppError PPAPI ppMemcpyHtoD(ppDeviceptr dstDevice, void* srcHost, size_t ByteCount)
{
__PP_FUNC1( MemcpyHtoD( dstDevice, srcHost, ByteCount ),
Expand All @@ -299,7 +336,7 @@ ppError PPAPI ppMemcpyDtoD(ppDeviceptr dstDevice, ppDeviceptr srcDevice, size_t

ppError PPAPI ppMemset(ppDeviceptr dstDevice, unsigned int ui, size_t N)
{
__PP_FUNC( MemsetD32( dstDevice, ui, N ) );
__PP_FUNC1( MemsetD8( (CUdeviceptr)dstDevice, ui, N ), Memset((void*)dstDevice, ui, N));
return ppErrorUnknown;
}

Expand All @@ -326,6 +363,12 @@ ppError PPAPI ppModuleLaunchKernel(ppFunction f, unsigned int gridDimX, unsigned
ModuleLaunchKernel( (hipFunction_t)f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, (hipStream_t)hStream, kernelParams, extra ) );
return ppErrorUnknown;
}
ppError PPAPI ppGetLastError(ppError pp_error)
{
__PP_FUNC2(GetLastError((cudaError_t)pp_error),
GetLastError((hipError_t)pp_error));
return ppErrorUnknown;
}
//-------------------
pprtcResult PPAPI pprtcGetErrorString(pprtcResult result)
{
Expand Down Expand Up @@ -401,8 +444,9 @@ ppError PPAPI ppPointerGetAttributes(ppPointerAttribute* attr, ppDeviceptr dptr)
//-----------------
ppError PPAPI ppStreamCreate(ppStream* stream)
{
__PP_FUNC1( StreamCreate((CUstream*)stream, CU_STREAM_DEFAULT),
StreamCreate((hipStream_t*)stream) );
__PP_FUNC2(StreamCreate((cudaStream_t*)stream),
StreamCreate((hipStream_t*)stream));

return ppErrorUnknown;
}

Expand Down
38 changes: 25 additions & 13 deletions Pop/Pop.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
//
#pragma once

#include <cstddef>

enum Api
{
API_AUTOMATIC,
Expand All @@ -34,6 +36,15 @@ enum ppError
ppErrorUnknown = 999,
};

enum ppMemcpyKind
{
ppMemcpyHostToHost = 0,
ppMemcpyHostToDevice = 1,
ppMemcpyDeviceToHost = 2,
ppMemcpyDeviceToDevice = 3,
ppMemcpyDefault = 4
};

typedef unsigned int ppU32;
typedef unsigned long long ppDeviceptr;

Expand Down Expand Up @@ -543,10 +554,7 @@ typedef enum hipError_t {
* Stream CallBack struct
*/

#define __PP_FUNC_DEC( funcName, args ) template<Api API=API_AUTOMATIC> ppError PPAPI funcName##args; \
template ppError PPAPI funcName##<API_AUTOMATIC>##args;\
template ppError PPAPI funcName##<API_CUDA>##args;\
template ppError PPAPI funcName##<API_HIP>##args;
#define __PP_FUNC_DEC( funcName, args ) template<Api API=API_AUTOMATIC> ppError PPAPI funcName args


ppError PPAPI ppGetErrorName(ppError error, const char** pStr);
Expand Down Expand Up @@ -589,8 +597,10 @@ ppError PPAPI ppModuleGetGlobal(ppDeviceptr* dptr, size_t* bytes, ppModule hmod,
//ppError PPAPI ppModuleGetTexRef(textureReference** pTexRef, ppModule hmod, const char* name);
ppError PPAPI ppMemGetInfo(size_t* free, size_t* total);
ppError PPAPI ppMalloc(ppDeviceptr* dptr, size_t bytesize);
ppError PPAPI ppMalloc2(ppDeviceptr* dptr, size_t bytesize);
ppError PPAPI ppMemAllocPitch(ppDeviceptr* dptr, size_t* pPitch, size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes);
ppError PPAPI ppFree(ppDeviceptr dptr);
ppError PPAPI ppFree2(ppDeviceptr dptr);
//ppError PPAPI ppMemGetAddressRange(ppDeviceptr* pbase, size_t* psize, ppDeviceptr dptr);
//ppError PPAPI ppHostMalloc(void** pp, size_t bytesize, unsigned int flags);
//ppError PPAPI ppHostFree(void* p);
Expand All @@ -601,7 +611,7 @@ ppError PPAPI ppFree(ppDeviceptr dptr);
//ppError PPAPI ppDeviceGetByPCIBusId(hipDevice_t* dev, const char* pciBusId);
//ppError PPAPI ppDeviceGetPCIBusId(char* pciBusId, int len, hipDevice_t dev);
//ppError PPAPI ppMemHostUnregister(void* p);
//ppError PPAPI ppMemcpy(ppDeviceptr dst, ppDeviceptr src, size_t ByteCount);
ppError PPAPI ppMemcpy(void *dst, void *src, size_t ByteCount, ppMemcpyKind kind);
//ppError PPAPI ppMemcpyPeer(ppDeviceptr dstDevice, hipCtx_t dstContext, ppDeviceptr srcDevice, hipCtx_t srcContext, size_t ByteCount);
ppError PPAPI ppMemcpyHtoD(ppDeviceptr dstDevice, void* srcHost, size_t ByteCount);
ppError PPAPI ppMemcpyDtoH(void* dstHost, ppDeviceptr srcDevice, size_t ByteCount);
Expand Down Expand Up @@ -672,6 +682,7 @@ ppError PPAPI ppModuleLaunchKernel(ppFunction f, unsigned int gridDimX, unsigned
//ppError PPAPI ppGraphicsUnmapResources(unsigned int count, hipGraphicsResource* resources, ppStream hStream);
//ppError PPAPI ppGraphicsGLRegisterBuffer(hipGraphicsResource* pCudaResource, GLuint buffer, unsigned int Flags);
//ppError PPAPI ppGLGetDevices(unsigned int* pHipDeviceCount, int* pHipDevices, unsigned int hipDeviceCount, hipGLDeviceList deviceList);
ppError PPAPI ppGetLastError(ppError pp_error);
pprtcResult PPAPI pprtcGetErrorString(pprtcResult result);
pprtcResult PPAPI pprtcAddNameExpression(pprtcProgram prog, const char* name_expression);
pprtcResult PPAPI pprtcCompileProgram(pprtcProgram prog, int numOptions, const char** options);
Expand All @@ -693,16 +704,17 @@ enum {


int ppInitialize( Api api, ppU32 flags );
Api ppGetCurAPI( ppU32 flags );


#include <stdint.h>

typedef struct dim3 {
uint32_t x; ///< x
uint32_t y; ///< y
uint32_t z; ///< z
#ifdef __cplusplus
constexpr dim3(uint32_t _x = 1, uint32_t _y = 1, uint32_t _z = 1) : x(_x), y(_y), z(_z){};
#endif
} dim3;
//typedef struct dim3 {
// uint32_t x; ///< x
// uint32_t y; ///< y
// uint32_t z; ///< z
//#ifdef __cplusplus
// constexpr dim3(uint32_t _x = 1, uint32_t _y = 1, uint32_t _z = 1) : x(_x), y(_y), z(_z){};
//#endif
//} dim3;

Loading

0 comments on commit e191b73

Please sign in to comment.