diff --git a/src/main/java/com/sheepit/client/hardware/gpu/nvidia/Nvidia.java b/src/main/java/com/sheepit/client/hardware/gpu/nvidia/Nvidia.java index 68d0c00..b888940 100644 --- a/src/main/java/com/sheepit/client/hardware/gpu/nvidia/Nvidia.java +++ b/src/main/java/com/sheepit/client/hardware/gpu/nvidia/Nvidia.java @@ -16,7 +16,142 @@ public class Nvidia implements GPULister { //https://docs.blender.org/manual/en/3.3/render/cycles/gpu_rendering.html#optix-nvidia private static final String MINIMUM_DRIVER_VERSION = "470"; - + + private String getCudaErrorString(int errornum) { + String[] cudaerror = {}; + cudaerror[0] = "cudaSuccess"; + cudaerror[1] = "cudaErrorInvalidValue"; + cudaerror[2] = "cudaErrorMemoryAllocation"; + cudaerror[3] = "cudaErrorInitializationError"; + cudaerror[4] = "cudaErrorCudartUnloading"; + cudaerror[5] = "cudaErrorProfilerDisabled"; + cudaerror[6] = "cudaErrorProfilerNotInitialized"; + cudaerror[7] = "cudaErrorProfilerAlreadyStarted"; + cudaerror[8] = "cudaErrorProfilerAlreadyStopped"; + cudaerror[9] = "cudaErrorInvalidConfiguration"; + cudaerror[12] = "cudaErrorInvalidPitchValue"; + cudaerror[13] = "cudaErrorInvalidSymbol"; + cudaerror[16] = "cudaErrorInvalidHostPointer"; + cudaerror[17] = "cudaErrorInvalidDevicePointer"; + cudaerror[18] = "cudaErrorInvalidTexture"; + cudaerror[19] = "cudaErrorInvalidTextureBinding"; + cudaerror[20] = "cudaErrorInvalidChannelDescriptor"; + cudaerror[21] = "cudaErrorInvalidMemcpyDirection"; + cudaerror[22] = "cudaErrorAddressOfConstant"; + cudaerror[23] = "cudaErrorTextureFetchFailed"; + cudaerror[24] = "cudaErrorTextureNotBound"; + cudaerror[25] = "cudaErrorSynchronizationError"; + cudaerror[26] = "cudaErrorInvalidFilterSetting"; + cudaerror[27] = "cudaErrorInvalidNormSetting"; + cudaerror[28] = "cudaErrorMixedDeviceExecution"; + cudaerror[31] = "cudaErrorNotYetImplemented"; + cudaerror[32] = "cudaErrorMemoryValueTooLarge"; + cudaerror[34] = "cudaErrorStubLibrary"; + cudaerror[35] = "cudaErrorInsufficientDriver"; + cudaerror[36] = "cudaErrorCallRequiresNewerDriver"; + cudaerror[37] = "cudaErrorInvalidSurface"; + cudaerror[43] = "cudaErrorDuplicateVariableName"; + cudaerror[44] = "cudaErrorDuplicateTextureName"; + cudaerror[45] = "cudaErrorDuplicateSurfaceName"; + cudaerror[46] = "cudaErrorDevicesUnavailable"; + cudaerror[49] = "cudaErrorIncompatibleDriverContext"; + cudaerror[52] = "cudaErrorMissingConfiguration"; + cudaerror[53] = "cudaErrorPriorLaunchFailure"; + cudaerror[65] = "cudaErrorLaunchMaxDepthExceeded"; + cudaerror[66] = "cudaErrorLaunchFileScopedTex"; + cudaerror[67] = "cudaErrorLaunchFileScopedSurf"; + cudaerror[68] = "cudaErrorSyncDepthExceeded"; + cudaerror[69] = "cudaErrorLaunchPendingCountExceeded"; + cudaerror[98] = "cudaErrorInvalidDeviceFunction"; + cudaerror[100] = "cudaErrorNoDevice"; + cudaerror[101] = "cudaErrorInvalidDevice"; + cudaerror[102] = "cudaErrorDeviceNotLicensed"; + cudaerror[103] = "cudaErrorSoftwareValidityNotEstablished"; + cudaerror[127] = "cudaErrorStartupFailure"; + cudaerror[200] = "cudaErrorInvalidKernelImage"; + cudaerror[201] = "cudaErrorDeviceUninitialized"; + cudaerror[205] = "cudaErrorMapBufferObjectFailed"; + cudaerror[206] = "cudaErrorUnmapBufferObjectFailed"; + cudaerror[207] = "cudaErrorArrayIsMapped"; + cudaerror[208] = "cudaErrorAlreadyMapped"; + cudaerror[209] = "cudaErrorNoKernelImageForDevice"; + cudaerror[210] = "cudaErrorAlreadyAcquired"; + cudaerror[211] = "cudaErrorNotMapped"; + cudaerror[212] = "cudaErrorNotMappedAsArray"; + cudaerror[213] = "cudaErrorNotMappedAsPointer"; + cudaerror[214] = "cudaErrorECCUncorrectable"; + cudaerror[215] = "cudaErrorUnsupportedLimit"; + cudaerror[216] = "cudaErrorDeviceAlreadyInUse"; + cudaerror[217] = "cudaErrorPeerAccessUnsupported"; + cudaerror[218] = "cudaErrorInvalidPtx"; + cudaerror[219] = "cudaErrorInvalidGraphicsContext"; + cudaerror[220] = "cudaErrorNvlinkUncorrectable"; + cudaerror[221] = "cudaErrorJitCompilerNotFound"; + cudaerror[222] = "cudaErrorUnsupportedPtxVersion"; + cudaerror[223] = "cudaErrorJitCompilationDisabled"; + cudaerror[224] = "cudaErrorUnsupportedExecAffinity"; + cudaerror[225] = "cudaErrorUnsupportedDevSideSync"; + cudaerror[300] = "cudaErrorInvalidSource"; + cudaerror[301] = "cudaErrorFileNotFound"; + cudaerror[302] = "cudaErrorSharedObjectSymbolNotFound"; + cudaerror[303] = "cudaErrorSharedObjectInitFailed"; + cudaerror[304] = "cudaErrorOperatingSystem"; + cudaerror[400] = "cudaErrorInvalidResourceHandle"; + cudaerror[401] = "cudaErrorIllegalState"; + cudaerror[402] = "cudaErrorLossyQuery"; + cudaerror[500] = "cudaErrorSymbolNotFound"; + cudaerror[600] = "cudaErrorNotReady"; + cudaerror[700] = "cudaErrorIllegalAddress"; + cudaerror[701] = "cudaErrorLaunchOutOfResources"; + cudaerror[702] = "cudaErrorLaunchTimeout"; + cudaerror[703] = "cudaErrorLaunchIncompatibleTexturing"; + cudaerror[704] = "cudaErrorPeerAccessAlreadyEnabled"; + cudaerror[705] = "cudaErrorPeerAccessNotEnabled"; + cudaerror[708] = "cudaErrorSetOnActiveProcess"; + cudaerror[709] = "cudaErrorContextIsDestroyed"; + cudaerror[710] = "cudaErrorAssert"; + cudaerror[711] = "cudaErrorTooManyPeers"; + cudaerror[712] = "cudaErrorHostMemoryAlreadyRegistered"; + cudaerror[713] = "cudaErrorHostMemoryNotRegistered"; + cudaerror[714] = "cudaErrorHardwareStackError"; + cudaerror[715] = "cudaErrorIllegalInstruction"; + cudaerror[716] = "cudaErrorMisalignedAddress"; + cudaerror[717] = "cudaErrorInvalidAddressSpace"; + cudaerror[718] = "cudaErrorInvalidPc"; + cudaerror[719] = "cudaErrorLaunchFailure"; + cudaerror[720] = "cudaErrorCooperativeLaunchTooLarge"; + cudaerror[800] = "cudaErrorNotPermitted"; + cudaerror[801] = "cudaErrorNotSupported"; + cudaerror[802] = "cudaErrorSystemNotReady"; + cudaerror[803] = "cudaErrorSystemDriverMismatch"; + cudaerror[804] = "cudaErrorCompatNotSupportedOnDevice"; + cudaerror[805] = "cudaErrorMpsConnectionFailed"; + cudaerror[806] = "cudaErrorMpsRpcFailure"; + cudaerror[807] = "cudaErrorMpsServerNotReady"; + cudaerror[808] = "cudaErrorMpsMaxClientsReached"; + cudaerror[809] = "cudaErrorMpsMaxConnectionsReached"; + cudaerror[810] = "cudaErrorMpsClientTerminated"; + cudaerror[811] = "cudaErrorCdpNotSupported"; + cudaerror[812] = "cudaErrorCdpVersionMismatch"; + cudaerror[900] = "cudaErrorStreamCaptureUnsupported"; + cudaerror[901] = "cudaErrorStreamCaptureInvalidated"; + cudaerror[902] = "cudaErrorStreamCaptureMerge"; + cudaerror[903] = "cudaErrorStreamCaptureUnmatched"; + cudaerror[904] = "cudaErrorStreamCaptureUnjoined"; + cudaerror[905] = "cudaErrorStreamCaptureIsolation"; + cudaerror[906] = "cudaErrorStreamCaptureImplicit"; + cudaerror[907] = "cudaErrorCapturedEvent"; + cudaerror[908] = "cudaErrorStreamCaptureWrongThread"; + cudaerror[909] = "cudaErrorTimeout"; + cudaerror[910] = "cudaErrorGraphExecUpdateFailure"; + cudaerror[911] = "cudaErrorExternalDevice"; + cudaerror[912] = "cudaErrorInvalidClusterSize"; + cudaerror[999] = "cudaErrorUnknown"; + cudaerror[10000] = "cudaErrorApiFailureBase"; + + return cudaerror[errornum]; + } + @Override public List getGpus() { OS os = OS.getOS(); String path = os.getCUDALib(); @@ -43,7 +178,7 @@ public class Nvidia implements GPULister { result = cudalib.cuInit(0); if (result != CUresult.CUDA_SUCCESS) { - System.out.println("Nvidia::getGpus cuInit failed (ret: " + result + ")"); + System.out.println("Nvidia::getGpus cuInit failed: " + getCudaErrorString(result) + " (" + result + ")"); if (result == CUresult.CUDA_ERROR_UNKNOWN) { System.out.println("If you are running Linux, this error is usually due to nvidia kernel module 'nvidia_uvm' not loaded."); System.out.println("Relaunch the application as root or load the module.");