Skip to content

Commit

Permalink
os, internal/poll, internal/syscall/unix: use copy_file_range on Linux
Browse files Browse the repository at this point in the history
Linux 4.5 introduced (and Linux 5.3 refined) the copy_file_range
system call, which allows file systems the opportunity to implement
copy acceleration techniques. This commit adds support for
copy_file_range(2) to the os package.

Introduce a new ReadFrom method on *os.File, which makes *os.File
implement the io.ReaderFrom interface. If dst and src are both files,
this enables io.Copy(dst, src) to call dst.ReadFrom(src), which, in
turn, will call copy_file_range(2) if possible. If copy_file_range(2)
is not supported by the host kernel, or if either of dst or src
refers to a non-regular file, ReadFrom falls back to the regular
io.Copy code path.

Add internal/poll.CopyFileRange, which acquires locks on the
appropriate poll.FDs and performs the actual work, as well as
internal/syscall/unix.CopyFileRange, which wraps the copy_file_range
system call itself at the lowest level.

Rework file layout in internal/syscall/unix to accomodate the
additional system call numbers needed for copy_file_range.
Merge these definitions with the ones used by getrandom(2) into
sysnum_linux_$GOARCH.go files.

A note on additional optimizations: if dst and src both refer to pipes
in the invocation dst.ReadFrom(src), we could, in theory, use the
existing splice(2) code in package internal/poll to splice directly
from src to dst. Attempting this runs into trouble with the poller,
however. If we call splice(src, dst) and see EAGAIN, we cannot know
if it came from src not being ready for reading or dst not being
ready for writing. The write end of src and the read end of dst are
not under our control, so we cannot reliably use the poller to wait
for readiness. Therefore, it seems infeasible to use the new ReadFrom
method to splice between pipes directly. In conclusion, for now, the
only optimization enabled by the new ReadFrom method on *os.File is
the copy_file_range optimization.

Fixes golang#36817.

Change-Id: I696372639fa0cdf704e3f65414f7321fc7d30adb
Reviewed-on: https://go-review.googlesource.com/c/go/+/229101
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Ian Lance Taylor <iant@golang.org>
  • Loading branch information
acln0 authored and xujianhai666 committed May 21, 2020
1 parent 5ea8976 commit 6c68fc0
Show file tree
Hide file tree
Showing 16 changed files with 568 additions and 28 deletions.
93 changes: 93 additions & 0 deletions src/internal/poll/copy_file_range_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package poll

import (
"internal/syscall/unix"
"sync/atomic"
"syscall"
)

var copyFileRangeSupported int32 = 1 // accessed atomically

const maxCopyFileRangeRound = 1 << 30

// CopyFileRange copies at most remain bytes of data from src to dst, using
// the copy_file_range system call. dst and src must refer to regular files.
func CopyFileRange(dst, src *FD, remain int64) (written int64, handled bool, err error) {
if atomic.LoadInt32(&copyFileRangeSupported) == 0 {
return 0, false, nil
}
for remain > 0 {
max := remain
if max > maxCopyFileRangeRound {
max = maxCopyFileRangeRound
}
n, err := copyFileRange(dst, src, int(max))
switch err {
case syscall.ENOSYS:
// copy_file_range(2) was introduced in Linux 4.5.
// Go supports Linux >= 2.6.33, so the system call
// may not be present.
//
// If we see ENOSYS, we have certainly not transfered
// any data, so we can tell the caller that we
// couldn't handle the transfer and let them fall
// back to more generic code.
//
// Seeing ENOSYS also means that we will not try to
// use copy_file_range(2) again.
atomic.StoreInt32(&copyFileRangeSupported, 0)
return 0, false, nil
case syscall.EXDEV, syscall.EINVAL:
// Prior to Linux 5.3, it was not possible to
// copy_file_range across file systems. Similarly to
// the ENOSYS case above, if we see EXDEV, we have
// not transfered any data, and we can let the caller
// fall back to generic code.
//
// As for EINVAL, that is what we see if, for example,
// dst or src refer to a pipe rather than a regular
// file. This is another case where no data has been
// transfered, so we consider it unhandled.
return 0, false, nil
case nil:
if n == 0 {
// src is at EOF, which means we are done.
return written, true, nil
}
remain -= n
written += n
default:
return written, true, err
}
}
return written, true, nil
}

// copyFileRange performs one round of copy_file_range(2).
func copyFileRange(dst, src *FD, max int) (written int64, err error) {
// The signature of copy_file_range(2) is:
//
// ssize_t copy_file_range(int fd_in, loff_t *off_in,
// int fd_out, loff_t *off_out,
// size_t len, unsigned int flags);
//
// Note that in the call to unix.CopyFileRange below, we use nil
// values for off_in and off_out. For the system call, this means
// "use and update the file offsets". That is why we must acquire
// locks for both file descriptors (and why this whole machinery is
// in the internal/poll package to begin with).
if err := dst.writeLock(); err != nil {
return 0, err
}
defer dst.writeUnlock()
if err := src.readLock(); err != nil {
return 0, err
}
defer src.readUnlock()
n, err := unix.CopyFileRange(src.Sysfd, nil, dst.Sysfd, nil, max, 0)
return int64(n), err
}
26 changes: 26 additions & 0 deletions src/internal/syscall/unix/copy_file_range_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package unix

import (
"syscall"
"unsafe"
)

func CopyFileRange(rfd int, roff *int64, wfd int, woff *int64, len int, flags int) (n int, err error) {
r1, _, errno := syscall.Syscall6(copyFileRangeTrap,
uintptr(rfd),
uintptr(unsafe.Pointer(roff)),
uintptr(wfd),
uintptr(unsafe.Pointer(woff)),
uintptr(len),
uintptr(flags),
)
n = int(r1)
if errno != 0 {
err = errno
}
return
}
2 changes: 1 addition & 1 deletion src/internal/syscall/unix/getrandom_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ func GetRandom(p []byte, flags GetRandomFlag) (n int, err error) {
if atomic.LoadInt32(&randomUnsupported) != 0 {
return 0, syscall.ENOSYS
}
r1, _, errno := syscall.Syscall(randomTrap,
r1, _, errno := syscall.Syscall(getrandomTrap,
uintptr(unsafe.Pointer(&p[0])),
uintptr(len(p)),
uintptr(flags))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

package unix

// Linux getrandom system call number.
// See GetRandom in getrandom_linux.go.
const randomTrap uintptr = 355
const (
getrandomTrap uintptr = 355
copyFileRangeTrap uintptr = 377
)
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

package unix

// Linux getrandom system call number.
// See GetRandom in getrandom_linux.go.
const randomTrap uintptr = 318
const (
getrandomTrap uintptr = 318
copyFileRangeTrap uintptr = 326
)
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

package unix

// Linux getrandom system call number.
// See GetRandom in getrandom_linux.go.
const randomTrap uintptr = 384
const (
getrandomTrap uintptr = 384
copyFileRangeTrap uintptr = 391
)
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@

package unix

// Linux getrandom system call number.
// See GetRandom in getrandom_linux.go.
//
// This file is named "generic" because at a certain point Linux started
// standardizing on system call numbers across architectures. So far this means
// only arm64 and riscv64 use the standard numbers.
const randomTrap uintptr = 278
// standardizing on system call numbers across architectures. So far this
// means only arm64 and riscv64 use the standard numbers.

const (
getrandomTrap uintptr = 278
copyFileRangeTrap uintptr = 285
)
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

package unix

// Linux getrandom system call number.
// See GetRandom in getrandom_linux.go.
const randomTrap uintptr = 5313
const (
getrandomTrap uintptr = 5313
copyFileRangeTrap uintptr = 5320
)
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

package unix

// Linux getrandom system call number.
// See GetRandom in getrandom_linux.go.
const randomTrap uintptr = 4353
const (
getrandomTrap uintptr = 4353
copyFileRangeTrap uintptr = 4360
)
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

package unix

// Linux getrandom system call number.
// See GetRandom in getrandom_linux.go.
const randomTrap uintptr = 359
const (
getrandomTrap uintptr = 359
copyFileRangeTrap uintptr = 379
)
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

package unix

// Linux getrandom system call number.
// See GetRandom in getrandom_linux.go.
const randomTrap uintptr = 349
const (
getrandomTrap uintptr = 349
copyFileRangeTrap uintptr = 375
)
7 changes: 7 additions & 0 deletions src/os/export_linux_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package os

var PollCopyFileRangeP = &pollCopyFileRange
20 changes: 20 additions & 0 deletions src/os/file.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,26 @@ func (f *File) ReadAt(b []byte, off int64) (n int, err error) {
return
}

// ReadFrom implements io.ReaderFrom.
func (f *File) ReadFrom(r io.Reader) (n int64, err error) {
if err := f.checkValid("write"); err != nil {
return 0, err
}
n, handled, e := f.readFrom(r)
if !handled {
return genericReadFrom(f, r) // without wrapping
}
return n, f.wrapErr("write", e)
}

func genericReadFrom(f *File, r io.Reader) (int64, error) {
return io.Copy(onlyWriter{f}, r)
}

type onlyWriter struct {
io.Writer
}

// Write writes len(b) bytes to the File.
// It returns the number of bytes written and an error, if any.
// Write returns a non-nil error when n != len(b).
Expand Down
41 changes: 41 additions & 0 deletions src/os/readfrom_linux.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package os

import (
"internal/poll"
"io"
)

var pollCopyFileRange = poll.CopyFileRange

func (f *File) readFrom(r io.Reader) (written int64, handled bool, err error) {
// copy_file_range(2) does not support destinations opened with
// O_APPEND, so don't even try.
if f.appendMode {
return 0, false, nil
}

remain := int64(1 << 62)

lr, ok := r.(*io.LimitedReader)
if ok {
remain, r = lr.N, lr.R
if remain <= 0 {
return 0, true, nil
}
}

src, ok := r.(*File)
if !ok {
return 0, false, nil
}

written, handled, err = pollCopyFileRange(&f.pfd, &src.pfd, remain)
if lr != nil {
lr.N -= written
}
return written, handled, NewSyscallError("copy_file_range", err)
}
Loading

0 comments on commit 6c68fc0

Please sign in to comment.