Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

put can read from stdin by specifying the '-' source file #98

Closed
wants to merge 10 commits into from
5 changes: 3 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ os:
- osx
language: go
go_import_path: github.com/colinmarc/hdfs
go: 1.7
go: 1.x
before_install:
- git clone https://github.com/sstephenson/bats $HOME/bats
- mkdir -p $HOME/bats/build
Expand All @@ -19,11 +19,12 @@ before_script:
- "./setup_test_env.sh"
before_deploy: make release
script:
- find protocol -name *.pb.go | xargs touch # so make doesn't try to regen protobuf files
- make test
- cat minicluster.log
sudo: false
cache:
- "$HADOOP_HOME"
- "$HOME/hadoop-$HADOOP_DISTRO"
- "$HOME/bats"
deploy:
skip_cleanup: true
Expand Down
5 changes: 4 additions & 1 deletion client.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ func getNameNodeFromConf() ([]string, error) {

// NewForUser returns a connected Client with the user specified, or an error if
// it can't connect.
//
// Deprecated: Use NewClient with ClientOptions instead.
func NewForUser(address string, user string) (*Client, error) {
return NewClient(ClientOptions{
Addresses: []string{address},
Expand All @@ -107,6 +109,8 @@ func NewForUser(address string, user string) (*Client, error) {

// NewForConnection returns Client with the specified, underlying rpc.NamenodeConnection.
// You can use rpc.WrapNamenodeConnection to wrap your own net.Conn.
//
// Deprecated: Use NewClient with ClientOptions instead.
func NewForConnection(namenode *rpc.NamenodeConnection) *Client {
client, _ := NewClient(ClientOptions{Namenode: namenode})
return client
Expand Down Expand Up @@ -143,7 +147,6 @@ func (c *Client) CopyToLocal(src string, dst string) error {
}

// CopyToRemote copies the local file specified by src to the HDFS file at dst.
// If dst already exists, it will be overwritten.
func (c *Client) CopyToRemote(src string, dst string) error {
local, err := os.Open(src)
if err != nil {
Expand Down
36 changes: 36 additions & 0 deletions cmd/hdfs/put.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ import (
"os"
"path"
"path/filepath"

"github.com/colinmarc/hdfs"
)

func put(args []string) {
Expand All @@ -29,6 +31,40 @@ func put(args []string) {
fatal(err)
}

if filepath.Base(source) == "-" {
putFromStdin(client, dest)
} else {
putFromFile(client, source, dest)
}
}

func putFromStdin(client *hdfs.Client, dest string) {
// If the destination exists, regardless of what it is, bail out.
_, err := client.Stat(dest)
if err == nil {
fatal(&os.PathError{"put", dest, os.ErrExist})
} else if !os.IsNotExist(err) {
fatal(err)
}

mode := 0755 | os.ModeDir
parentDir := filepath.Dir(dest)
if parentDir != "." && parentDir != "/" {
if err := client.MkdirAll(parentDir, mode); err != nil {
fatal(err)
}
}

writer, err := client.Create(dest)
if err != nil {
fatal(err)
}
defer writer.Close()

io.Copy(writer, os.Stdin)
}

func putFromFile(client *hdfs.Client, source string, dest string) {
// If the destination is an existing directory, place it inside. Otherwise,
// the destination is really the parent directory, and we need to rename the
// source directory as we copy.
Expand Down
27 changes: 27 additions & 0 deletions cmd/hdfs/test/put.bats
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,33 @@ mkdir /_test_cmd/put/existing.txt: file already exists
OUT
}

@test "put from stdin" {
run bash -c "cat $ROOT_TEST_DIR/test/mobydick.txt | $HDFS put - /_test_cmd/put_stdin/mobydick_stdin.txt"
assert_success

run bash -c "$HDFS cat /_test_cmd/put_stdin/mobydick_stdin.txt > $BATS_TMPDIR/mobydick_stdin_test.txt"
assert_success

SHA=`shasum < $ROOT_TEST_DIR/test/mobydick.txt | awk '{ print $1 }'`
assert_equal $SHA `shasum < $BATS_TMPDIR/mobydick_stdin_test.txt | awk '{ print $1 }'`
}

@test "put from stdin into existing file" {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does the normal hadoop -fs behave in this case, out of curiosity?

Copy link
Contributor Author

@Shastick Shastick Jan 29, 2018

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just had a try:

put: `/tmp/testme': File exists

run bash -c "cat $ROOT_TEST_DIR/test/mobydick.txt | $HDFS put - /_test_cmd/put/existing.txt"
assert_failure
assert_output <<OUT
put /_test_cmd/put/existing.txt: file already exists
OUT
}

@test "put from stdin into dir" {
run bash -c "cat $ROOT_TEST_DIR/test/mobydick.txt | $HDFS put - /_test_cmd/put/"
assert_failure
assert_output <<OUT
put /_test_cmd/put: file already exists
OUT
}

teardown() {
$HDFS rm -r /_test_cmd/put
}
75 changes: 43 additions & 32 deletions protocol/hadoop_common/GenericRefreshProtocol.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

41 changes: 33 additions & 8 deletions protocol/hadoop_common/GetUserMappingsProtocol.pb.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading