diff --git a/House3D/common.py b/House3D/common.py index b9e2210..25f4b35 100644 --- a/House3D/common.py +++ b/House3D/common.py @@ -80,7 +80,7 @@ def detect_nvidia_devices(): ret.append(k) else: break - return ret + return list(range(len(ret))) if __name__ == '__main__': print(detect_nvidia_devices()) diff --git a/renderer/README.md b/renderer/README.md index 82971e2..18bdaf7 100644 --- a/renderer/README.md +++ b/renderer/README.md @@ -65,11 +65,11 @@ the following two options on Linux: very low framerate. + Certain types of X session (e.g. a ssh-forwarded X session, a VNC session) may not support the necessary render features needed. - If you are under a SSH session with X forwarding, make sure to + If you are under a SSH session with X forwarding, make sure to `unset DISPLAY` to disable the GLX backend. 2. Otherwise, it will use the __EGL rendering backend__, which requires a decent Nvidia GPU. - It also has the option to choose which GPU to use, therefore you can run + It also has the option to choose which GPU to use, therefore you can run multiple rendering instances on __multiple GPUs__. On Mac, it will always use the CGL backend. @@ -110,5 +110,3 @@ Please tell us the following if you encounter any build issues or the code fails + GPU or driver does not support EGL. + Running inside container (e.g. docker) with an old driver may also result in such error. -8. EGL detected >0 devices but says "Cannot access /dev/nvidiaX": - If you're inside cgroup/container, initialize the renderer with a device id from `detect_nvidia_devices()` in `common.py` diff --git a/renderer/gl/glContext.cc b/renderer/gl/glContext.cc index 969306e..468a34b 100644 --- a/renderer/gl/glContext.cc +++ b/renderer/gl/glContext.cc @@ -14,6 +14,7 @@ #ifdef __linux__ #include #include +#include #endif #include "lib/debugutils.hh" @@ -40,6 +41,16 @@ const EGLint EGLpbufferAttribs[] = { EGL_NONE, }; + +bool check_nvidia_readable(int device) { + string dev = ssprintf("/dev/nvidia%d", device); + int ret = open(dev.c_str(), O_RDONLY); + if (ret == -1) + return false; + close(ret); + return true; +} + const int GLXcontextAttribs[] = { GLX_CONTEXT_MAJOR_VERSION_ARB, 3, GLX_CONTEXT_MINOR_VERSION_ARB, 3, @@ -138,16 +149,32 @@ EGLContext::EGLContext(Geometry win_size, int device): GLContext{win_size} { } eglQueryDevicesEXT(MAX_DEVICES, eglDevs, &numDevices); - cerr << "[EGL] Detected " << numDevices << " devices. Using device " << device << endl; - m_assert(device < numDevices); - eglDpy_ = eglGetPlatformDisplayEXT(EGL_PLATFORM_DEVICE_EXT, eglDevs[device], 0); - // cgroup may block our access to /dev/nvidiaX, but eglQueryDevices can still see them. - string dev = ssprintf("/dev/nvidia%d", device); - int ret = open(dev.c_str(), O_RDONLY); - if (ret == -1) { - error_exit(ssprintf("Cannot access %s! See README if you're inside cgroup/container.", dev.c_str())); + std::vector visible_devices; + if (numDevices > 1) { // we must be using nvidia GPUs + // cgroup may block our access to /dev/nvidiaX, but eglQueryDevices can still see them. + for (int i = 0; i < numDevices; ++i) { + if (check_nvidia_readable(i)) + visible_devices.push_back(i); + } + } else { + // TODO we may still be using nvidia GPUs, but there is no way to tell. + // But it's very rare that you'll start a docker and hide the only one GPU from it. + visible_devices.push_back(0); } + + if (device >= visible_devices.size()) { + error_exit(ssprintf("[EGL] Request device %d but only found %lu devices", device, visible_deviecs.size())); + } + + if (visible_devices.size() == numDevices) { + cerr << "[EGL] Detected " << numDevices << " devices. Using device " << device << endl; + } else { + cerr << "[EGL] " << visible_devices.size() << " out of " << numDevices << + " devices are accessible. Using device " << device << " whose physical id is " << visible_devices[device] << "." << endl; + device = visible_devices[device]; + } + eglDpy_ = eglGetPlatformDisplayEXT(EGL_PLATFORM_DEVICE_EXT, eglDevs[device], 0); } EGLint major, minor;