Merge branch 'feature/human-readable-cuda-error' into 'master'

print human readable CUDA error

See merge request sheepitrenderfarm/client!284
This commit is contained in:
harlekin
2024-02-04 15:21:00 +00:00

View File

@@ -16,7 +16,142 @@ public class Nvidia implements GPULister {
//https://docs.blender.org/manual/en/3.3/render/cycles/gpu_rendering.html#optix-nvidia //https://docs.blender.org/manual/en/3.3/render/cycles/gpu_rendering.html#optix-nvidia
private static final String MINIMUM_DRIVER_VERSION = "470"; private static final String MINIMUM_DRIVER_VERSION = "470";
private String getCudaErrorString(int errornum) {
String[] cudaerror = {};
cudaerror[0] = "cudaSuccess";
cudaerror[1] = "cudaErrorInvalidValue";
cudaerror[2] = "cudaErrorMemoryAllocation";
cudaerror[3] = "cudaErrorInitializationError";
cudaerror[4] = "cudaErrorCudartUnloading";
cudaerror[5] = "cudaErrorProfilerDisabled";
cudaerror[6] = "cudaErrorProfilerNotInitialized";
cudaerror[7] = "cudaErrorProfilerAlreadyStarted";
cudaerror[8] = "cudaErrorProfilerAlreadyStopped";
cudaerror[9] = "cudaErrorInvalidConfiguration";
cudaerror[12] = "cudaErrorInvalidPitchValue";
cudaerror[13] = "cudaErrorInvalidSymbol";
cudaerror[16] = "cudaErrorInvalidHostPointer";
cudaerror[17] = "cudaErrorInvalidDevicePointer";
cudaerror[18] = "cudaErrorInvalidTexture";
cudaerror[19] = "cudaErrorInvalidTextureBinding";
cudaerror[20] = "cudaErrorInvalidChannelDescriptor";
cudaerror[21] = "cudaErrorInvalidMemcpyDirection";
cudaerror[22] = "cudaErrorAddressOfConstant";
cudaerror[23] = "cudaErrorTextureFetchFailed";
cudaerror[24] = "cudaErrorTextureNotBound";
cudaerror[25] = "cudaErrorSynchronizationError";
cudaerror[26] = "cudaErrorInvalidFilterSetting";
cudaerror[27] = "cudaErrorInvalidNormSetting";
cudaerror[28] = "cudaErrorMixedDeviceExecution";
cudaerror[31] = "cudaErrorNotYetImplemented";
cudaerror[32] = "cudaErrorMemoryValueTooLarge";
cudaerror[34] = "cudaErrorStubLibrary";
cudaerror[35] = "cudaErrorInsufficientDriver";
cudaerror[36] = "cudaErrorCallRequiresNewerDriver";
cudaerror[37] = "cudaErrorInvalidSurface";
cudaerror[43] = "cudaErrorDuplicateVariableName";
cudaerror[44] = "cudaErrorDuplicateTextureName";
cudaerror[45] = "cudaErrorDuplicateSurfaceName";
cudaerror[46] = "cudaErrorDevicesUnavailable";
cudaerror[49] = "cudaErrorIncompatibleDriverContext";
cudaerror[52] = "cudaErrorMissingConfiguration";
cudaerror[53] = "cudaErrorPriorLaunchFailure";
cudaerror[65] = "cudaErrorLaunchMaxDepthExceeded";
cudaerror[66] = "cudaErrorLaunchFileScopedTex";
cudaerror[67] = "cudaErrorLaunchFileScopedSurf";
cudaerror[68] = "cudaErrorSyncDepthExceeded";
cudaerror[69] = "cudaErrorLaunchPendingCountExceeded";
cudaerror[98] = "cudaErrorInvalidDeviceFunction";
cudaerror[100] = "cudaErrorNoDevice";
cudaerror[101] = "cudaErrorInvalidDevice";
cudaerror[102] = "cudaErrorDeviceNotLicensed";
cudaerror[103] = "cudaErrorSoftwareValidityNotEstablished";
cudaerror[127] = "cudaErrorStartupFailure";
cudaerror[200] = "cudaErrorInvalidKernelImage";
cudaerror[201] = "cudaErrorDeviceUninitialized";
cudaerror[205] = "cudaErrorMapBufferObjectFailed";
cudaerror[206] = "cudaErrorUnmapBufferObjectFailed";
cudaerror[207] = "cudaErrorArrayIsMapped";
cudaerror[208] = "cudaErrorAlreadyMapped";
cudaerror[209] = "cudaErrorNoKernelImageForDevice";
cudaerror[210] = "cudaErrorAlreadyAcquired";
cudaerror[211] = "cudaErrorNotMapped";
cudaerror[212] = "cudaErrorNotMappedAsArray";
cudaerror[213] = "cudaErrorNotMappedAsPointer";
cudaerror[214] = "cudaErrorECCUncorrectable";
cudaerror[215] = "cudaErrorUnsupportedLimit";
cudaerror[216] = "cudaErrorDeviceAlreadyInUse";
cudaerror[217] = "cudaErrorPeerAccessUnsupported";
cudaerror[218] = "cudaErrorInvalidPtx";
cudaerror[219] = "cudaErrorInvalidGraphicsContext";
cudaerror[220] = "cudaErrorNvlinkUncorrectable";
cudaerror[221] = "cudaErrorJitCompilerNotFound";
cudaerror[222] = "cudaErrorUnsupportedPtxVersion";
cudaerror[223] = "cudaErrorJitCompilationDisabled";
cudaerror[224] = "cudaErrorUnsupportedExecAffinity";
cudaerror[225] = "cudaErrorUnsupportedDevSideSync";
cudaerror[300] = "cudaErrorInvalidSource";
cudaerror[301] = "cudaErrorFileNotFound";
cudaerror[302] = "cudaErrorSharedObjectSymbolNotFound";
cudaerror[303] = "cudaErrorSharedObjectInitFailed";
cudaerror[304] = "cudaErrorOperatingSystem";
cudaerror[400] = "cudaErrorInvalidResourceHandle";
cudaerror[401] = "cudaErrorIllegalState";
cudaerror[402] = "cudaErrorLossyQuery";
cudaerror[500] = "cudaErrorSymbolNotFound";
cudaerror[600] = "cudaErrorNotReady";
cudaerror[700] = "cudaErrorIllegalAddress";
cudaerror[701] = "cudaErrorLaunchOutOfResources";
cudaerror[702] = "cudaErrorLaunchTimeout";
cudaerror[703] = "cudaErrorLaunchIncompatibleTexturing";
cudaerror[704] = "cudaErrorPeerAccessAlreadyEnabled";
cudaerror[705] = "cudaErrorPeerAccessNotEnabled";
cudaerror[708] = "cudaErrorSetOnActiveProcess";
cudaerror[709] = "cudaErrorContextIsDestroyed";
cudaerror[710] = "cudaErrorAssert";
cudaerror[711] = "cudaErrorTooManyPeers";
cudaerror[712] = "cudaErrorHostMemoryAlreadyRegistered";
cudaerror[713] = "cudaErrorHostMemoryNotRegistered";
cudaerror[714] = "cudaErrorHardwareStackError";
cudaerror[715] = "cudaErrorIllegalInstruction";
cudaerror[716] = "cudaErrorMisalignedAddress";
cudaerror[717] = "cudaErrorInvalidAddressSpace";
cudaerror[718] = "cudaErrorInvalidPc";
cudaerror[719] = "cudaErrorLaunchFailure";
cudaerror[720] = "cudaErrorCooperativeLaunchTooLarge";
cudaerror[800] = "cudaErrorNotPermitted";
cudaerror[801] = "cudaErrorNotSupported";
cudaerror[802] = "cudaErrorSystemNotReady";
cudaerror[803] = "cudaErrorSystemDriverMismatch";
cudaerror[804] = "cudaErrorCompatNotSupportedOnDevice";
cudaerror[805] = "cudaErrorMpsConnectionFailed";
cudaerror[806] = "cudaErrorMpsRpcFailure";
cudaerror[807] = "cudaErrorMpsServerNotReady";
cudaerror[808] = "cudaErrorMpsMaxClientsReached";
cudaerror[809] = "cudaErrorMpsMaxConnectionsReached";
cudaerror[810] = "cudaErrorMpsClientTerminated";
cudaerror[811] = "cudaErrorCdpNotSupported";
cudaerror[812] = "cudaErrorCdpVersionMismatch";
cudaerror[900] = "cudaErrorStreamCaptureUnsupported";
cudaerror[901] = "cudaErrorStreamCaptureInvalidated";
cudaerror[902] = "cudaErrorStreamCaptureMerge";
cudaerror[903] = "cudaErrorStreamCaptureUnmatched";
cudaerror[904] = "cudaErrorStreamCaptureUnjoined";
cudaerror[905] = "cudaErrorStreamCaptureIsolation";
cudaerror[906] = "cudaErrorStreamCaptureImplicit";
cudaerror[907] = "cudaErrorCapturedEvent";
cudaerror[908] = "cudaErrorStreamCaptureWrongThread";
cudaerror[909] = "cudaErrorTimeout";
cudaerror[910] = "cudaErrorGraphExecUpdateFailure";
cudaerror[911] = "cudaErrorExternalDevice";
cudaerror[912] = "cudaErrorInvalidClusterSize";
cudaerror[999] = "cudaErrorUnknown";
cudaerror[10000] = "cudaErrorApiFailureBase";
return cudaerror[errornum];
}
@Override public List<GPUDevice> getGpus() { @Override public List<GPUDevice> getGpus() {
OS os = OS.getOS(); OS os = OS.getOS();
String path = os.getCUDALib(); String path = os.getCUDALib();
@@ -43,7 +178,7 @@ public class Nvidia implements GPULister {
result = cudalib.cuInit(0); result = cudalib.cuInit(0);
if (result != CUresult.CUDA_SUCCESS) { if (result != CUresult.CUDA_SUCCESS) {
System.out.println("Nvidia::getGpus cuInit failed (ret: " + result + ")"); System.out.println("Nvidia::getGpus cuInit failed: " + getCudaErrorString(result) + " (" + result + ")");
if (result == CUresult.CUDA_ERROR_UNKNOWN) { if (result == CUresult.CUDA_ERROR_UNKNOWN) {
System.out.println("If you are running Linux, this error is usually due to nvidia kernel module 'nvidia_uvm' not loaded."); System.out.println("If you are running Linux, this error is usually due to nvidia kernel module 'nvidia_uvm' not loaded.");
System.out.println("Relaunch the application as root or load the module."); System.out.println("Relaunch the application as root or load the module.");