Skip to content

net: TCPConn.ReadFrom hangs when io.Reader is TCPConn or UnixConn, Linux kernel < 5.1 #63795

Closed
@sebastien-rosset

Description

@sebastien-rosset

What version of Go are you using (go version)?

$ go version 1.21.3

Does this issue reproduce with the latest release?

  1. Yes, the issue can be reproduced with go 1.21.3.
  2. The issue has been fixed in commit internal/poll/splice_linux.go. I.e. the same program that fails in 1.21.3 works with go master. The commit has not been back-ported to the 1.21 branch.

This is a regression from 1.20.x.

What operating system and processor architecture are you using (go env)?

The problem can be reproduced when running the program on Linux with amd64 cpu architecture and kernel version < 5.1. We have reproduced the problem on the following systems:

  1. 4.1.21-WR8.0.0.28-standard x86_64 x86_64 x86_64 GNU/Linux
  2. CentOS 7 3.10.0-1160.95.1.el7.x86_64

For our tests, we did a cross-compilation so there was no go tool chain on the target system. But as a follow-up we should be able to reproduce the test without cross-compilation.

go env Output

What did you do?

This is similar of issue #59041, but @panjf2000 asked to file a new issue.

We compiled and executed the following program with CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build .
It's not possible to reproduce the problem in go playground because the playground has Linux kernel > 5.1.

package main

import (
	"flag"
	"net/http"
	"net/url"
	"io"
	"os"
	"os/signal"
	"runtime"
	"time"
	"fmt"
	"strings"
	"sync"

	"github.com/elazarl/goproxy"
	"golang.org/x/sys/unix"
	"github.com/dustin/go-humanize"
	"golang.org/x/net/proxy"

)

var wg sync.WaitGroup

func main() {
	flag.Parse()
	sigs := make(chan os.Signal, 1)
	signal.Notify(sigs, unix.SIGUSR1, unix.SIGTERM, unix.SIGINT, unix.SIGABRT)
	go func() {
		// Wait for signal
		for sig := range sigs {
			switch sig {
			case unix.SIGUSR1:
				bt := make([]byte, 250*1024)
				size := runtime.Stack(bt, true)

				fmt.Printf("Stack dump requested\n%s", bt[:size])
			case unix.SIGTERM:
				os.Exit(0)
			case unix.SIGINT:
				os.Exit(0)
			}
		}
	}()

	go startGoProxy(`127.0.0.1:9090`)
	time.Sleep(2 * time.Second)
	go DownloadFile("/tmp/tmp.x","https://testfileorg.netwet.net/testfile.org-1GB- Corrupt.zip")
	wg.Add(1)
	wg.Wait()

}

func startGoProxy(aInAddr string) {
	addr := aInAddr

	proxy := goproxy.NewProxyHttpServer()
	proxyServ := &http.Server{
		Addr:           addr,
		Handler:        proxy,
		MaxHeaderBytes: 1 << 20,
	}
	for retryCount := 0; retryCount < 10; retryCount++ {
		fmt.Println("Starting GoProxy ")
		err := proxyServ.ListenAndServe()
		if err != nil {
			if err == http.ErrServerClosed {
				fmt.Println("Proxy has closed connections")
				break
			} else if retryCount == 10-1 {
				fmt.Println("Proxy has quit " + err.Error())
			}
			time.Sleep(1 * time.Minute)
		}
	}
}

// WriteCounter counts the number of bytes written to it. It implements to the io.Writer interface
// and we can pass this into io.TeeReader() which will report progress on each write cycle.
type WriteCounter struct {
	Total uint64
}

func (wc *WriteCounter) Write(p []byte) (int, error) {
	n := len(p)
	wc.Total += uint64(n)
	wc.PrintProgress()
	return n, nil
}

func (wc WriteCounter) PrintProgress() {
	// Clear the line by using a character return to go back to the start and remove
	// the remaining characters by filling it with spaces
	fmt.Printf("\r%s", strings.Repeat(" ", 35))

	// Return again and print current status of download
	// We use the humanize package to print the bytes in a meaningful way (e.g. 10 MB)
	fmt.Printf("\rDownloading... %s complete", humanize.Bytes(wc.Total))
}

func proxyAwareHttpClient() *http.Client {
	var dialer proxy.Dialer
	dialer = proxy.Direct
	proxyUrl, err := url.Parse("http://127.0.0.1:9090")
	if err != nil {
		fmt.Println("Unable to parse URL")
		return nil
	}
	fmt.Printf("Proxy URL %v, %v", proxyUrl, proxy.Direct)

	// setup a http client
	httpTransport := &http.Transport{Proxy:http.ProxyURL(proxyUrl)}
	httpClient := &http.Client{Transport: httpTransport}
	httpTransport.Dial = dialer.Dial
	return httpClient
}

// DownloadFile will download a url to a local file. It's efficient because it will
// write as it downloads and not load the whole file into memory. We pass an io.TeeReader
// into Copy() to report progress on the download.
func DownloadFile(filepath string, url string) error {

	defer wg.Done()
	// Create the file, but give it a tmp file extension, this means we won't overwrite a
	// file until it's downloaded, but we'll remove the tmp extension once downloaded.
	out, err := os.CreateTemp("", "test")
	if err != nil {
		return err
	}

	defer os.Remove(out.Name())
	fmt.Println("Download Started")
	req, err := http.NewRequest("GET", url, nil)
	client := proxyAwareHttpClient()
	if client == nil {
		return nil	
	}
	resp, err := client.Do(req)
	if err != nil {
		out.Close()
		panic(err)
	}
	defer resp.Body.Close()

	// Create our progress reporter and pass it to be used alongside our writer
	counter := &WriteCounter{}
	if _, err = io.Copy(out, io.TeeReader(resp.Body, counter)); err != nil {
		out.Close()
		return err
	}

	// The progress uses the same line so print a new line once it's finished downloading
	fmt.Print("\n")

	// Close the file without defer so it can happen before Rename()
	out.Close()

	fmt.Println("Download Finished")
	return nil
}

The program does the following:

  1. Starts a goProxy in a goroutine which blocks to accept a connection ([127.0.0.1:9090]).
  2. Starts a Downloader connects in another go routine.
  3. Downloader connects to the proxy ([127.0.0.1:9090]) and attempts to download a file.

What did you expect to see?

I expected the program to complete the data transfer successfully.
When running the program in a Linux environment with kernel 5.1 and above, the program produces the following output:

The program completes successfully if it has been compiled with go master after October 18th 2023. The fix is https://go-review.googlesource.com/c/go/+/536015.

./proxy
Starting GoProxy
Download Started
Downloading... 1.1 GB complete
Download Finished

What did you see instead?

When the program has been compiled with go version 1.21.x (<= 1.21.3) AND it runs in a Linux environment with kernel < 5.1, the program produces the following output and hangs:

linux#./proxy
Starting GoProxy
Download Started
Downloading... 31 MB complete

The program hangs in TCPConn.ReadFrom(), which invokes the splice() system call.
We think it should be possible to create a simpler program that can reproduce the issue.
Looking at the git history of internal/poll/splice_linux.go, the issue also exists in all 1.21.x versions, though we haven't actually tested each patch version.

We are consistently running into this issue with golang.org/x/net@v0.0.17 (go 1.21.3) with a proxy server. The application communicates with a remote server through a goproxy. The transfer starts of normally and after a few seconds (and approximately 10MB of download), connection completely stalls. This is the stack trace of the goroutines that have stalled.

goroutine 1643 [IO wait]:
internal/poll.runtime_pollWait(0x7f282aa52678, 0x72)
/go/1.21.3/go/src/runtime/netpoll.go:343 +0x85
internal/poll.(*pollDesc).wait(0xc000684400?, 0xc000a93ce8?, 0x0)
/go/1.21.3/go/src/internal/poll/fd_poll_runtime.go:84 +0x27
internal/poll.(*pollDesc).waitRead(...)
/go/1.21.3/go/src/internal/poll/fd_poll_runtime.go:89
internal/poll.spliceDrain(0xc0002be680?, 0xc000684400, 0x112?)
/go/1.21.3/go/src/internal/poll/splice_linux.go:100 +0x22f
internal/poll.Splice(0x0?, 0x0?, 0x7fffffffffffffff)
/go/1.21.3/go/src/internal/poll/splice_linux.go:42 +0x173
net.splice(0x0?, {0x1ad3d80?, 0xc0006224e0?})
/go/1.21.3/go/src/net/splice_linux.go:39 +0xdf
net.(*TCPConn).readFrom(0xc00051a0e0, {0x1ad3d80, 0xc0006224e0})
/go/1.21.3/go/src/net/tcpsock_posix.go:48 +0x28
net.(*TCPConn).ReadFrom(0xc00051a0e0, {0x1ad3d80?, 0xc0006224e0?})
/go/1.21.3/go/src/net/tcpsock.go:130 +0x30
io.copyBuffer({0x1ad3e60, 0xc00051a0e0}, {0x1ad3d80, 0xc0006224e0}, {0x0, 0x0, 0x0})
/go/1.21.3/go/src/io/io.go:416 +0x147
io.Copy(...)
/go/1.21.3/go/src/io/io.go:389
github.com/elazarl/goproxy.copyAndClose(0x0?, {0x7f282a9d5258?, 0xc00051a0e0}, {0x7f282a9d5258?, 0xc0006224e0})
go/pkg/mod/github.com/elazarl/[goproxy@v0.0.0-20200310082302-296d8939dc5a](mailto:goproxy@v0.0.0-20200310082302-296d8939dc5a)/https.go:310 +0x8a
created by github.com/elazarl/goproxy.(*ProxyHttpServer).handleHttps in goroutine 364
go/pkg/mod/github.com/elazarl/[goproxy@v0.0.0-20200310082302-296d8939dc5a](mailto:goproxy@v0.0.0-20200310082302-296d8939dc5a)/https.go:116 +0x8cd

goroutine 1644 [syscall]:
syscall.Syscall6(0x7f282aa526a0?, 0xc000439c68?, 0x46b985?, 0xc000439c78?, 0x4f005c?, 0xc000439c90?, 0x4f01c7?)
/go/1.21.3/go/src/syscall/syscall_linux.go:91 +0x30
syscall.Splice(0xc0002be680?, 0xc000439ce8?, 0x0?, 0x1443500?, 0x1443500?, 0xc000439d00?)
/go/1.21.3/go/src/syscall/zsyscall_linux_amd64.go:1356 +0x45
internal/poll.splice(...)
/go/1.21.3/go/src/internal/poll/splice_linux.go:155
internal/poll.spliceDrain(0xc000684400?, 0xc0002be680, 0xb50?)
/go/1.21.3/go/src/internal/poll/splice_linux.go:92 +0x185
internal/poll.Splice(0x40d86f?, 0x1652098?, 0x7fffffffffffffff)
/go/1.21.3/go/src/internal/poll/splice_linux.go:42 +0x173
net.splice(0x0?, {0x1ad3d80?, 0xc00051a0e0?})
/go/1.21.3/go/src/net/splice_linux.go:39 +0xdf
net.(*TCPConn).readFrom(0xc0006224e0, {0x1ad3d80, 0xc00051a0e0})
/go/1.21.3/go/src/net/tcpsock_posix.go:48 +0x28
net.(*TCPConn).ReadFrom(0xc0006224e0, {0x1ad3d80?, 0xc00051a0e0?})
/go/1.21.3/go/src/net/tcpsock.go:130 +0x30
io.copyBuffer({0x1ad3e60, 0xc0006224e0}, {0x1ad3d80, 0xc00051a0e0}, {0x0, 0x0, 0x0})
/go/1.21.3/go/src/io/io.go:416 +0x147
io.Copy(...)
/go/1.21.3/go/src/io/io.go:389
github.com/elazarl/goproxy.copyAndClose(0xc000439fd0?, {0x7f282a9d5258?, 0xc0006224e0}, {0x7f282a9d5258?, 0xc00051a0e0})
go/pkg/mod/github.com/elazarl/[goproxy@v0.0.0-20200310082302-296d8939dc5a](mailto:goproxy@v0.0.0-20200310082302-296d8939dc5a)/https.go:310 +0x8a
created by github.com/elazarl/goproxy.(*ProxyHttpServer).handleHttps in goroutine 364
go/pkg/mod/github.com/elazarl/[goproxy@v0.0.0-20200310082302-296d8939dc5a](mailto:goproxy@v0.0.0-20200310082302-296d8939dc5a)/https.go:117 +0x96f

Metadata

Metadata

Assignees

Labels

FrozenDueToAgeNeedsFixThe path to resolution is known, but the work has not been done.OS-Linux

Type

No type

Projects

No projects

Milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions