99 "testing"
1010 "time"
1111
12+ "github.com/stretchr/testify/assert"
1213 "github.com/stretchr/testify/require"
1314
1415 "github.com/coder/envbox/integration/integrationtest"
@@ -41,8 +42,7 @@ func TestDocker_Nvidia(t *testing.T) {
4142 )
4243
4344 // Assert that we can run nvidia-smi in the inner container.
44- _ , err := execContainerCmd (ctx , t , ctID , "docker" , "exec" , "workspace_cvm" , "nvidia-smi" )
45- require .NoError (t , err , "failed to run nvidia-smi in the inner container" )
45+ assertInnerNvidiaSMI (ctx , t , ctID )
4646 })
4747
4848 t .Run ("Redhat" , func (t * testing.T ) {
@@ -52,16 +52,29 @@ func TestDocker_Nvidia(t *testing.T) {
5252
5353 // Start the envbox container.
5454 ctID := startEnvboxCmd (ctx , t , integrationtest .RedhatImage , "root" ,
55- "-v" , "/usr/lib/x86_64-linux-gnu:/var/coder/usr/lib64 " ,
55+ "-v" , "/usr/lib/x86_64-linux-gnu:/var/coder/usr/lib " ,
5656 "--env" , "CODER_ADD_GPU=true" ,
57- "--env" , "CODER_USR_LIB_DIR=/var/coder/usr/lib64 " ,
57+ "--env" , "CODER_USR_LIB_DIR=/var/coder/usr/lib " ,
5858 "--runtime=nvidia" ,
5959 "--gpus=all" ,
6060 )
6161
6262 // Assert that we can run nvidia-smi in the inner container.
63- _ , err := execContainerCmd (ctx , t , ctID , "docker" , "exec" , "workspace_cvm" , "nvidia-smi" )
64- require .NoError (t , err , "failed to run nvidia-smi in the inner container" )
63+ assertInnerNvidiaSMI (ctx , t , ctID )
64+
65+ // Make sure dnf still works. This checks for a regression due to
66+ // gpuExtraRegex matching `libglib.so` in the outer container.
67+ // This had a dependency on `libpcre.so.3` which would cause dnf to fail.
68+ out , err := execContainerCmd (ctx , t , ctID , "docker" , "exec" , "workspace_cvm" , "dnf" )
69+ if ! assert .NoError (t , err , "failed to run dnf in the inner container" ) {
70+ t .Logf ("dnf output:\n %s" , strings .TrimSpace (out ))
71+ }
72+
73+ // Make sure libglib.so is not present in the inner container.
74+ out , err = execContainerCmd (ctx , t , ctID , "docker" , "exec" , "workspace_cvm" , "ls" , "-1" , "/usr/lib/x86_64-linux-gnu/libglib*" )
75+ // An error is expected here.
76+ assert .Error (t , err , "libglib should not be present in the inner container" )
77+ assert .Contains (t , out , "No such file or directory" , "libglib should not be present in the inner container" )
6578 })
6679
6780 t .Run ("InnerUsrLibDirOverride" , func (t * testing.T ) {
@@ -79,11 +92,58 @@ func TestDocker_Nvidia(t *testing.T) {
7992 "--gpus=all" ,
8093 )
8194
82- // Assert that the libraries end up in the expected location in the inner container.
83- out , err := execContainerCmd (ctx , t , ctID , "docker" , "exec" , "workspace_cvm" , "ls" , "-l" , "/usr/lib/coder" )
95+ // Assert that the libraries end up in the expected location in the inner
96+ // container.
97+ out , err := execContainerCmd (ctx , t , ctID , "docker" , "exec" , "workspace_cvm" , "ls" , "-1" , "/usr/lib/coder" )
8498 require .NoError (t , err , "inner usr lib dir override failed" )
8599 require .Regexp (t , `(?i)(libgl|nvidia|vulkan|cuda)` , out )
86100 })
101+
102+ t .Run ("EmptyHostUsrLibDir" , func (t * testing.T ) {
103+ t .Parallel ()
104+ ctx , cancel := context .WithCancel (context .Background ())
105+ t .Cleanup (cancel )
106+ emptyUsrLibDir := t .TempDir ()
107+
108+ // Start the envbox container.
109+ ctID := startEnvboxCmd (ctx , t , integrationtest .UbuntuImage , "root" ,
110+ "-v" , emptyUsrLibDir + ":/var/coder/usr/lib" ,
111+ "--env" , "CODER_ADD_GPU=true" ,
112+ "--env" , "CODER_USR_LIB_DIR=/var/coder/usr/lib" ,
113+ "--runtime=nvidia" ,
114+ "--gpus=all" ,
115+ )
116+
117+ ofs := outerFiles (ctx , t , ctID , "/usr/lib/x86_64-linux-gnu/libnv*" )
118+ // Assert invariant: the outer container has the files we expect.
119+ require .NotEmpty (t , ofs , "failed to list outer container files" )
120+ // Assert that expected files are available in the inner container.
121+ assertInnerFiles (ctx , t , ctID , "/usr/lib/x86_64-linux-gnu/libnv*" , ofs ... )
122+ assertInnerNvidiaSMI (ctx , t , ctID )
123+ })
124+
125+ t .Run ("CUDASample" , func (t * testing.T ) {
126+ t .Parallel ()
127+
128+ ctx , cancel := context .WithCancel (context .Background ())
129+ t .Cleanup (cancel )
130+
131+ // Start the envbox container.
132+ ctID := startEnvboxCmd (ctx , t , integrationtest .CUDASampleImage , "root" ,
133+ "-v" , "/usr/lib/x86_64-linux-gnu:/var/coder/usr/lib" ,
134+ "--env" , "CODER_ADD_GPU=true" ,
135+ "--env" , "CODER_USR_LIB_DIR=/var/coder/usr/lib" ,
136+ "--runtime=nvidia" ,
137+ "--gpus=all" ,
138+ )
139+
140+ // Assert that we can run nvidia-smi in the inner container.
141+ assertInnerNvidiaSMI (ctx , t , ctID )
142+
143+ // Assert that /tmp/vectorAdd runs successfully in the inner container.
144+ _ , err := execContainerCmd (ctx , t , ctID , "docker" , "exec" , "workspace_cvm" , "/tmp/vectorAdd" )
145+ require .NoError (t , err , "failed to run /tmp/vectorAdd in the inner container" )
146+ })
87147}
88148
89149// dockerRuntimes returns the list of container runtimes available on the host.
@@ -101,6 +161,49 @@ func dockerRuntimes(t *testing.T) []string {
101161 return strings .Split (raw , "\n " )
102162}
103163
164+ // outerFiles returns the list of files in the outer container matching the
165+ // given pattern. It does this by running `ls -1` in the outer container.
166+ func outerFiles (ctx context.Context , t * testing.T , containerID , pattern string ) []string {
167+ t .Helper ()
168+ // We need to use /bin/sh -c to avoid the shell interpreting the glob.
169+ out , err := execContainerCmd (ctx , t , containerID , "/bin/sh" , "-c" , "ls -1 " + pattern )
170+ require .NoError (t , err , "failed to list outer container files" )
171+ files := strings .Split (strings .TrimSpace (out ), "\n " )
172+ slices .Sort (files )
173+ return files
174+ }
175+
176+ // assertInnerFiles checks that all the files matching the given pattern exist in the
177+ // inner container.
178+ func assertInnerFiles (ctx context.Context , t * testing.T , containerID , pattern string , expected ... string ) {
179+ t .Helper ()
180+
181+ // Get the list of files in the inner container.
182+ // We need to use /bin/sh -c to avoid the shell interpreting the glob.
183+ out , err := execContainerCmd (ctx , t , containerID , "docker" , "exec" , "workspace_cvm" , "/bin/sh" , "-c" , "ls -1 " + pattern )
184+ require .NoError (t , err , "failed to list inner container files" )
185+ innerFiles := strings .Split (strings .TrimSpace (out ), "\n " )
186+
187+ // Check that the expected files exist in the inner container.
188+ missingFiles := make ([]string , 0 )
189+ for _ , expectedFile := range expected {
190+ if ! slices .Contains (innerFiles , expectedFile ) {
191+ missingFiles = append (missingFiles , expectedFile )
192+ }
193+ }
194+ require .Empty (t , missingFiles , "missing files in inner container: %s" , strings .Join (missingFiles , ", " ))
195+ }
196+
197+ // assertInnerNvidiaSMI checks that nvidia-smi runs successfully in the inner
198+ // container.
199+ func assertInnerNvidiaSMI (ctx context.Context , t * testing.T , containerID string ) {
200+ t .Helper ()
201+ // Assert that we can run nvidia-smi in the inner container.
202+ out , err := execContainerCmd (ctx , t , containerID , "docker" , "exec" , "workspace_cvm" , "nvidia-smi" )
203+ require .NoError (t , err , "failed to run nvidia-smi in the inner container" )
204+ require .Contains (t , out , "NVIDIA-SMI" , "nvidia-smi output does not contain NVIDIA-SMI" )
205+ }
206+
104207// startEnvboxCmd starts the envbox container with the given arguments.
105208// Ideally we would use ory/dockertest for this, but it doesn't support
106209// specifying the runtime. We have alternatively used the docker client library,
0 commit comments