JuliaWeb · JamesWrigley · Dec 20, 2024 · Dec 20, 2024
diff --git a/docs/src/changelog.md b/docs/src/changelog.md
@@ -7,6 +7,14 @@ CurrentModule = LibSSH
 This documents notable changes in LibSSH.jl. The format is based on [Keep a
 Changelog](https://keepachangelog.com).
 
+## Unreleased
+
+### Changed
+- Made the finalizers for [`Session`](@ref), [`SshChannel`](@ref), and
+  [`SftpSession`](@ref) slightly more robust. It's still not recommended to rely
+  on them to clean up all resources but in most cases they should be able to do
+  so ([#31]).
+
 ## [v0.7.1] - 2024-12-06
 
 ### Added

diff --git a/src/channel.jl b/src/channel.jl
@@ -13,6 +13,13 @@
 
 The type is named `SshChannel` to avoid confusion with Julia's own `Channel`
 type.
+
+!!! warning
+    If callbacks are set on an `SshChannel` (e.g. to implement a server) it
+    *must* be closed explicitly with [`Base.close(::SshChannel)`](@ref). It is
+    not safe to allow the finalizer to clean up the resources since callback
+    functions may be called while the `SshChannel` is closed and they may yield
+    to allow task switching, which is forbidden inside finalizers.
 """
 mutable struct SshChannel
     ptr::Union{lib.ssh_channel, Nothing}
@@ -51,12 +58,18 @@
 # close(SshChannel) can throw, which we don't want to happen in a finalizer so
 # we wrap it in a try-catch.
 function _finalizer(sshchan::SshChannel)
-    try
-        close(sshchan)
-    catch ex
-        # Note the use of @spawn to avoid a task switch, which is forbidden in a
-        # finalizer.
-        Threads.@spawn @error "Caught exception while finalizing SshChannel" exception=(ex, catch_backtrace())
+    if trylock(sshchan.close_lock)
+        try
+            close(sshchan)
+        catch ex
+            # Note the use of @spawn to avoid a task switch, which is forbidden in a
+            # finalizer.
+            Threads.@spawn @error "Caught exception while finalizing SshChannel" exception=(ex, catch_backtrace())
+        finally
+            unlock(sshchan.close_lock)
+        end
+    else
+        finalizer(_finalizer, sshchan)
     end
 end
 
@@ -158,14 +171,17 @@
         throw(ArgumentError("Calling close() on a non-owning SshChannel is not allowed to avoid accidental double-frees, see the docs for more information."))
     end
 
-    # Even though we hold a lock in this section it's still possible for the
-    # function to be called recursively and enter it again anyway. The reason is
-    # because lib.ssh_channel_send_eof() and lib.ssh_channel_close() both flush
-    # the channel, which will trigger any callbacks. And if the callbacks happen
-    # to call close(), then the lock will be taken anyway (because it's a
-    # reentrant lock). There's not much we can do about this apart from making
-    # close() as robust as possible, which is why there are so many checks.
+    # Note that the finalizer will take the lock before calling close(), so
+    # we can safely call @lock here.
     @lock sshchan.close_lock begin
+        # Even though we hold a lock in this section it's still possible for the
+        # function to be called recursively and enter it again anyway. The reason is
+        # because lib.ssh_channel_send_eof() and lib.ssh_channel_close() both flush
+        # the channel, which will trigger any callbacks. And if the callbacks happen
+        # to call close(), then the lock will be taken anyway (because it's a
+        # reentrant lock). There's not much we can do about this apart from making
+        # close() as robust as possible, which is why there are so many checks.
+
         if isassigned(sshchan)
             # Remove from the sessions list of active channels. findfirst()
             # should only return nothing if the function is being called

diff --git a/src/session.jl b/src/session.jl
@@ -13,8 +13,11 @@
 the `ssh_session`, i.e. they aren't simply fields of the struct. A `Session` may
 be owning or non-owning of its internal pointer to a `lib.ssh_session`.
 
-It must be closed explicitly with [`Base.close(::Session)`](@ref) or it will
-leak memory.
+!!! warning
+    It's *highly recommended* to close `Session`'s explicitly with
+    [`Base.close(::Session)`](@ref). Finalizers do not allow task switches so
+    the finalizer for `Session` will abruptly kill the session and free its
+    memory, which may cause problems in other code relying on the `Session`.
 """
 mutable struct Session
     ptr::Union{lib.ssh_session, Nothing}
@@ -123,11 +126,22 @@
 
 Base.lock(session::Session) = lock(session._lock)
 Base.unlock(session::Session) = unlock(session._lock)
+Base.islocked(session::Session) = islocked(session._lock)
+Base.trylock(session::Session) = trylock(session._lock)
 
 # Non-throwing finalizer for Session objects
 function _finalizer(session::Session)
     if isopen(session)
-        Threads.@spawn @error "$(session) has not been explicitly closed, this is a memory leak!"
+        if !trylock(session)
+            finalizer(_finalizer, session)
+        else
+            try
+                lib.ssh_free(session)
+                session.ptr = nothing
+            finally
+                unlock(session)
+            end
+        end
     end
 end
 

diff --git a/src/sftp.jl b/src/sftp.jl
@@ -121,9 +121,11 @@
 $(TYPEDEF)
 
 This represents a SFTP session, through which one can do SFTP operations. It is
-only usable while its parent [`Session`](@ref) is still open, and it must be
-closed explicitly with [`Base.close(::SftpSession)`](@ref) or it will leak
-memory.
+only usable while its parent [`Session`](@ref) is still open.
+
+!!! warning
+    `SftpSession`'s with open [`SftpFile`](@ref)'s *must* be closed explicitly
+    with [`Base.close(::SftpSession)`](@ref) or they will leak memory.
 """
 mutable struct SftpSession
     ptr::Union{lib.sftp_session, Nothing}
@@ -168,8 +170,24 @@
 end
 
 function _finalizer(sftp::SftpSession)
-    if isopen(sftp)
-        Threads.@spawn @error "$(sftp) has not been closed, this is a memory leak!"
+    # Closing an SftpSession requires locking at least SftpSession and its
+    # parent Session.
+    if trylock(sftp)
+        try
+            if trylock(sftp.session)
+                try
+                    close(sftp; finalizing=true)
+                finally
+                    unlock(sftp.session)
+                end
+            else
+                finalizer(_finalizer, sftp)
+            end
+        finally
+            unlock(sftp)
+        end
+    else
+        finalizer(_finalizer, sftp)
     end
 end
 
@@ -210,6 +228,9 @@
 """
 Base.unlock(sftp::SftpSession) = unlock(sftp._lock)
 
+Base.islocked(sftp::SftpSession) = islocked(sftp._lock)
+Base.trylock(sftp::SftpSession) = trylock(sftp._lock)
+
 Base.isassigned(sftp::SftpSession) = !isnothing(sftp.ptr)
 
 """
@@ -232,11 +253,17 @@
 
 Close an `SftpSession`. This will also close any open files.
 """
-function Base.close(sftp::SftpSession)
-    if isassigned(sftp)
-        # Close all open files
-        for i in reverse(eachindex(sftp.files))
-            close(sftp.files[i])
+function Base.close(sftp::SftpSession; finalizing=false)
+    @lock sftp if isassigned(sftp)
+        if !finalizing
+            # Close all open files
+            for i in reverse(eachindex(sftp.files))
+                close(sftp.files[i])
+            end
+        elseif !isempty(sftp.files)
+            # We cannot close files in a finalizer because that requires a
+            # network call.
+            Threads.@spawn @error "$(sftp) has open files that couldn't be closed in the finalizer. This is a memory leak! You must close() the SftpSession explicitly."
         end
 
         # Remove from the parent session

diff --git a/test/LibSSHTests.jl b/test/LibSSHTests.jl
@@ -396,6 +396,15 @@ end
     # And we shouldn't be able to wait on a closed session
     @test_throws InvalidStateException wait(session)
 
+    # Test the finalizer
+    ssh.Session("localhost"; auto_connect=false) do session2
+        finalize(session2)
+        @test !isopen(session2)
+
+        # Finalizing a closed session should do nothing
+        finalize(session2)
+    end
+
     # Test initializing with a socket instead of a port. We do this by setting
     # up two dummy servers, the one on port 2222 is what we want to connect to
     # and the one on port 2223 is the simulated jump host we have to go
@@ -544,6 +553,7 @@ end
         demo_server_with_session(2222) do session
             # Create a channel
             sshchan = ssh.SshChannel(session)
+            @test only(session.closeables) === sshchan
 
             # Create a non-owning channel and make sure that we can't close it
             non_owning_sshchan = ssh.SshChannel(sshchan.ptr; own=false)
@@ -557,6 +567,17 @@ end
             @test isnothing(sshchan.ptr)
             @test isempty(session.closeables)
         end
+
+        # Test the channel finalizer
+        demo_server_with_session(2222) do session
+            sshchan = ssh.SshChannel(session)
+            finalize(sshchan)
+            @test !isassigned(sshchan)
+            @test isempty(session.closeables)
+
+            # Finalizing a closed channel should do nothing
+            finalize(sshchan)
+        end
     end
 
     @testset "Command execution" begin
@@ -663,9 +684,21 @@ end
 
             # Test the finalizer
             ssh.SftpSession(session) do sftp
-                # We yield() to allow the spawned task from the finalizer to
-                # print its message.
-                @test_logs (:error, r".+memory leak.+") (finalize(sftp); yield())
+                finalize(sftp)
+                @test !isassigned(sftp)
+                @test isempty(session.closeables)
+
+                # Finalizing twice shouldn't do anything
+                finalize(sftp)
+            end
+
+            ssh.SftpSession(session) do sftp
+                # With a file open the finalizer should warn us of a memory
+                # leak. We yield() to allow the spawned task from the finalizer
+                # to print its message.
+                open(tempdir(), sftp) do file
+                    @test_logs (:error, r".+memory leak.+") (finalize(sftp); yield())
+                end
             end
 
             # Test the do-constructor