diff --git a/cli/src/alluxio.org/cli/cmd/cache/format.go b/cli/src/alluxio.org/cli/cmd/cache/format.go index 7a090dbd36c8..de7071c31881 100644 --- a/cli/src/alluxio.org/cli/cmd/cache/format.go +++ b/cli/src/alluxio.org/cli/cmd/cache/format.go @@ -38,8 +38,14 @@ func (c *FormatCommand) Base() *env.BaseJavaCommand { func (c *FormatCommand) ToCommand() *cobra.Command { cmd := c.Base().InitRunJavaClassCmd(&cobra.Command{ Use: Format.CommandName, - Short: "Format Alluxio worker nodes.", - Args: cobra.NoArgs, + Short: "Format Alluxio worker running locally", + Long: `The format command formats the Alluxio worker on this host. +This deletes all the cached data stored by the worker. Data in the under storage will not be changed. + +> Warning: Format should only be called when the worker is not running`, + Example: `# Format worker +$ ./bin/alluxio cache format`, + Args: cobra.NoArgs, RunE: func(cmd *cobra.Command, args []string) error { return c.Run(args) }, diff --git a/cli/src/alluxio.org/cli/cmd/cache/free.go b/cli/src/alluxio.org/cli/cmd/cache/free.go index d3c0c37376fe..f60d68796f6c 100644 --- a/cli/src/alluxio.org/cli/cmd/cache/free.go +++ b/cli/src/alluxio.org/cli/cmd/cache/free.go @@ -30,7 +30,6 @@ type FreeCommand struct { *env.BaseJavaCommand worker string path string - force bool } func (c *FreeCommand) Base() *env.BaseJavaCommand { @@ -39,9 +38,13 @@ func (c *FreeCommand) Base() *env.BaseJavaCommand { func (c *FreeCommand) ToCommand() *cobra.Command { cmd := c.Base().InitRunJavaClassCmd(&cobra.Command{ - Use: Free.CommandName, - Short: "Synchronously free all blocks and directories of specific worker, " + - "or free the space occupied by a file or a directory in Alluxio", + Use: Free.CommandName, + Short: "Synchronously free cached files along a path or held by a specific worker", + Example: `# Free a file by its path +$ ./bin/alluxio cache free --path /path/to/file + +# Free files on a worker +$ ./bin/alluxio cache free --worker `, Args: cobra.NoArgs, RunE: func(cmd *cobra.Command, args []string) error { return c.Run(args) @@ -49,8 +52,6 @@ func (c *FreeCommand) ToCommand() *cobra.Command { }) cmd.Flags().StringVar(&c.worker, "worker", "", "The worker to free") cmd.Flags().StringVar(&c.path, "path", "", "The file or directory to free") - cmd.Flags().BoolVarP(&c.force, "force", "f", false, - "Force freeing pinned files in the directory") cmd.MarkFlagsMutuallyExclusive("worker", "path") return cmd } @@ -60,11 +61,7 @@ func (c *FreeCommand) Run(args []string) error { if c.worker == "" { if c.path != "" { // free directory - javaArgs = append(javaArgs, "free") - if c.force { - javaArgs = append(javaArgs, "-f") - } - javaArgs = append(javaArgs, c.path) + javaArgs = append(javaArgs, "free", c.path) } else { return stacktrace.NewError("neither worker nor path to free specified") } diff --git a/cli/src/alluxio.org/cli/cmd/conf/get.go b/cli/src/alluxio.org/cli/cmd/conf/get.go index 717f75a447c9..056dab99cfe6 100644 --- a/cli/src/alluxio.org/cli/cmd/conf/get.go +++ b/cli/src/alluxio.org/cli/cmd/conf/get.go @@ -46,7 +46,29 @@ func (c *GetCommand) ToCommand() *cobra.Command { cmd := c.Base().InitRunJavaClassCmd(&cobra.Command{ Use: fmt.Sprintf("%v [key]", Get.CommandName), Short: "Look up a configuration value by its property key or print all configuration if no key is provided", - Args: cobra.MaximumNArgs(1), + Long: `The get command prints the configured value for the given key. +If the key is invalid, it returns a nonzero exit code. +If the key is valid but isn't set, an empty string is printed. +If no key is specified, the full configuration is printed. + +> Note: This command does not require the Alluxio cluster to be running.`, + Example: `# Display all the current node configuration +$ ./bin/alluxio conf get + +# Display the value of a property key +$ ./bin/alluxio conf get alluxio.master.hostname + +# Display the configuration of the current running Alluxio leading master +$ ./bin/alluxio conf get --master + +# Display the source of the configuration +$ ./bin/alluxio conf get --source + +# Display the values in a given unit +$ ./bin/alluxio conf get alluxio.user.block.size.bytes.default --unit KB +$ ./bin/alluxio conf get alluxio.master.journal.flush.timeout --unit S +`, + Args: cobra.MaximumNArgs(1), RunE: func(cmd *cobra.Command, args []string) error { return c.Run(args) }, diff --git a/cli/src/alluxio.org/cli/cmd/conf/log.go b/cli/src/alluxio.org/cli/cmd/conf/log.go index 7f5521850bd0..a089d44127ed 100644 --- a/cli/src/alluxio.org/cli/cmd/conf/log.go +++ b/cli/src/alluxio.org/cli/cmd/conf/log.go @@ -42,7 +42,23 @@ func (c *LogCommand) ToCommand() *cobra.Command { cmd := c.Base().InitRunJavaClassCmd(&cobra.Command{ Use: Log.CommandName, Short: "Get or set the log level for the specified logger", - Args: cobra.NoArgs, + Long: `The log command returns the current value of or updates the log level of a particular class on specific instances. +Users are able to change Alluxio server-side log levels at runtime. + +The --target flag specifies which processes to apply the log level change to. +The target could be of the form and multiple targets can be listed as comma-separated entries. +The role can be one of master,worker,job_master,job_worker. +Using the role option is useful when an Alluxio process is configured to use a non-standard web port (e.g. if an Alluxio master does not use 19999 as its web port). +The default target value is the primary master, primary job master, all workers and job workers. + +> Note: This command requires the Alluxio cluster to be running.`, + Example: `# Set DEBUG level for DefaultFileSystemMaster class on master processes +$ ./bin/alluxio conf log --logName alluxio.master.file.DefaultFileSystemMaster --target=master --level=DEBUG + +# Set WARN level for PagedDoraWorker class on the worker process on host myHostName +$ ./bin/alluxio conf log --logName alluxio.worker.dora.PagedDoraWorker.java --target=myHostName:worker --level=WARN +`, + Args: cobra.NoArgs, RunE: func(cmd *cobra.Command, args []string) error { return c.Run(args) }, diff --git a/cli/src/alluxio.org/cli/cmd/exec/test_basic_io.go b/cli/src/alluxio.org/cli/cmd/exec/test_basic_io.go index 6899c21fac8c..e86f79070453 100644 --- a/cli/src/alluxio.org/cli/cmd/exec/test_basic_io.go +++ b/cli/src/alluxio.org/cli/cmd/exec/test_basic_io.go @@ -44,7 +44,12 @@ func (c *TestRunCommand) ToCommand() *cobra.Command { cmd := c.Base().InitRunJavaClassCmd(&cobra.Command{ Use: "basicIOTest", Args: cobra.NoArgs, - Short: "Run all end-to-end tests, or a specific test, on an Alluxio cluster.", + Short: "Run all end-to-end tests or a specific test, on an Alluxio cluster.", + Example: `# Run all permutations of IO tests +$ ./bin/alluxio exec basicIOTest + +# Run a specific permutation of the IO tests +$ ./bin/alluxio exec basicIOtest --operation BASIC --readType NO_CACHE --writeType THROUGH`, RunE: func(cmd *cobra.Command, args []string) error { return c.Run(args) }, @@ -61,7 +66,7 @@ func (c *TestRunCommand) ToCommand() *cobra.Command { "Alluxio worker addresses to run tests on. \n"+ "If not specified, random ones will be used.") cmd.Flags().StringVar(&c.writeType, "writeType", "", - "The write type to use, one of MUST_CACHE, CACHE_THROUGH, THROUGH, ASYNC_THROUGH. \n"+ + "The write type to use, one of MUST_CACHE, CACHE_THROUGH, THROUGH. \n"+ "By default all writeTypes are tested.") return cmd } diff --git a/cli/src/alluxio.org/cli/cmd/exec/test_ufs_io.go b/cli/src/alluxio.org/cli/cmd/exec/test_ufs_io.go index 211bba40761e..5314106fbc9a 100644 --- a/cli/src/alluxio.org/cli/cmd/exec/test_ufs_io.go +++ b/cli/src/alluxio.org/cli/cmd/exec/test_ufs_io.go @@ -46,10 +46,22 @@ func (c *TestUfsIOCommand) ToCommand() *cobra.Command { cmd := c.Base().InitRunJavaClassCmd(&cobra.Command{ Use: c.CommandName, Short: "A benchmarking tool for the I/O between Alluxio and UFS.", - Long: "A benchmarking tool for the I/O between Alluxio and UFS." + - "This test will measure the I/O throughput between Alluxio workers and the specified UFS path. " + - "Each worker will create concurrent clients to first generate test files of the specified size " + - "then read those files. The write/read I/O throughput will be measured in the process.", + Long: `A benchmarking tool for the I/O between Alluxio and UFS. +This test will measure the I/O throughput between Alluxio workers and the specified UFS path. +Each worker will create concurrent clients to first generate test files of the specified size then read those files. +The write/read I/O throughput will be measured in the process.`, + Example: `# This runs the I/O benchmark to HDFS in your process locally +$ ./bin/alluxio runUfsIOTest --path hdfs:// + +# This invokes the I/O benchmark to HDFS in the Alluxio cluster +# 1 worker will be used. 4 threads will be created, each writing then reading 4G of data +$ ./bin/alluxio runUfsIOTest --path hdfs:// --cluster --cluster-limit 1 + +# This invokes the I/O benchmark to HDFS in the Alluxio cluster +# 2 workers will be used +# 2 threads will be created on each worker +# Each thread is writing then reading 512m of data +$ ./bin/alluxio runUfsIOTest --path hdfs:// --cluster --cluster-limit 2 --io-size 512m --threads 2`, RunE: func(cmd *cobra.Command, args []string) error { return c.Run(args) }, diff --git a/cli/src/alluxio.org/cli/cmd/fs/cat.go b/cli/src/alluxio.org/cli/cmd/fs/cat.go index c47f8aa6a81e..874fa6014037 100644 --- a/cli/src/alluxio.org/cli/cmd/fs/cat.go +++ b/cli/src/alluxio.org/cli/cmd/fs/cat.go @@ -39,7 +39,10 @@ func (c *CatCommand) ToCommand() *cobra.Command { cmd := c.Base().InitRunJavaClassCmd(&cobra.Command{ Use: "cat [path]", Short: "Print specified file's content", - Args: cobra.ExactArgs(1), + Long: `The cat command prints the contents of a file in Alluxio to the shell.`, + Example: `# Print the contents of /output/part-00000 +$ ./bin/alluxio fs cat /output/part-00000`, + Args: cobra.ExactArgs(1), RunE: func(cmd *cobra.Command, args []string) error { return c.Run(args) }, diff --git a/cli/src/alluxio.org/cli/cmd/fs/checksum.go b/cli/src/alluxio.org/cli/cmd/fs/checksum.go index 2e674bac0da9..5a228e06089c 100644 --- a/cli/src/alluxio.org/cli/cmd/fs/checksum.go +++ b/cli/src/alluxio.org/cli/cmd/fs/checksum.go @@ -39,7 +39,16 @@ func (c *ChecksumCommand) ToCommand() *cobra.Command { cmd := c.Base().InitRunJavaClassCmd(&cobra.Command{ Use: "checksum [path]", Short: "Calculates the md5 checksum of a specified file", - Args: cobra.ExactArgs(1), + Long: `The checksum command outputs the md5 value of a file in Alluxio. +This can be used to verify the contents of a file stored in Alluxio.`, + Example: `# Compare the checksum values +# value from Alluxio filesystem +$ ./bin/alluxio fs checksum /LICENSE +md5sum: bf0513403ff54711966f39b058e059a3 +# value from local filesystem +md5 LICENSE +MD5 (LICENSE) = bf0513403ff54711966f39b058e059a3`, + Args: cobra.ExactArgs(1), RunE: func(cmd *cobra.Command, args []string) error { return c.Run(args) }, diff --git a/cli/src/alluxio.org/cli/cmd/fs/chgrp.go b/cli/src/alluxio.org/cli/cmd/fs/chgrp.go index dad706dc9ea1..4c33bc328401 100644 --- a/cli/src/alluxio.org/cli/cmd/fs/chgrp.go +++ b/cli/src/alluxio.org/cli/cmd/fs/chgrp.go @@ -39,15 +39,20 @@ func (c *ChgrpCommand) Base() *env.BaseJavaCommand { func (c *ChgrpCommand) ToCommand() *cobra.Command { cmd := c.Base().InitRunJavaClassCmd(&cobra.Command{ - Use: fmt.Sprintf("%s ", c.CommandName), - Short: "changes the group of a file or directory specified by args", - Args: cobra.ExactArgs(2), + Use: fmt.Sprintf("%s [group] [path]", c.CommandName), + Short: "Changes the group of a file or directory", + Long: `The chgrp command changes the group of the file or directory in Alluxio. +Alluxio supports file authorization with POSIX file permissions. +The file owner or superuser can execute this command.`, + Example: `# Change the group of a file +$ ./bin/alluxio fs chgrp alluxio-group-new /input/file1`, + Args: cobra.ExactArgs(2), RunE: func(cmd *cobra.Command, args []string) error { return c.Run(args) }, }) cmd.Flags().BoolVarP(&c.recursive, "recursive", "R", false, - "change the group recursively") + "change the group recursively for all files and directories under the given path") return cmd } diff --git a/cli/src/alluxio.org/cli/cmd/fs/chmod.go b/cli/src/alluxio.org/cli/cmd/fs/chmod.go index 614a58612794..357fb0a7bab2 100644 --- a/cli/src/alluxio.org/cli/cmd/fs/chmod.go +++ b/cli/src/alluxio.org/cli/cmd/fs/chmod.go @@ -39,15 +39,20 @@ func (c *ChmodCommand) Base() *env.BaseJavaCommand { func (c *ChmodCommand) ToCommand() *cobra.Command { cmd := c.Base().InitRunJavaClassCmd(&cobra.Command{ - Use: fmt.Sprintf("%s ", c.CommandName), - Short: "Changes the permission of a file or directory specified by args", - Args: cobra.ExactArgs(2), + Use: fmt.Sprintf("%s [mode] [path]", c.CommandName), + Short: "Changes the permission of a file or directory", + Long: `The chmod command changes the permission of a file or directory in Alluxio. +The permission mode is represented as an octal 3 digit value. +Refer to https://en.wikipedia.org/wiki/Chmod#Numerical_permissions for a detailed description of the modes.`, + Example: `# Set mode 755 for /input/file +$ ./bin/alluxio fs chmod 755 /input/file1`, + Args: cobra.ExactArgs(2), RunE: func(cmd *cobra.Command, args []string) error { return c.Run(args) }, }) cmd.Flags().BoolVarP(&c.recursive, "recursive", "R", false, - "change the permission recursively") + "change the permission recursively for all files and directories under the given path") return cmd } diff --git a/cli/src/alluxio.org/cli/cmd/fs/chown.go b/cli/src/alluxio.org/cli/cmd/fs/chown.go index 4cedc824a239..700df7cd05d2 100644 --- a/cli/src/alluxio.org/cli/cmd/fs/chown.go +++ b/cli/src/alluxio.org/cli/cmd/fs/chown.go @@ -41,13 +41,17 @@ func (c *ChownCommand) ToCommand() *cobra.Command { cmd := c.Base().InitRunJavaClassCmd(&cobra.Command{ Use: fmt.Sprintf("%s [:] ", c.CommandName), Short: "Changes the owner of a file or directory specified by args", - Args: cobra.NoArgs, + Long: `The chown command changes the owner of a file or directory in Alluxio. +The ownership of a file can only be altered by a superuser`, + Example: `# Change the owner of /input/file1 to alluxio-user +$ ./bin/alluxio fs chown alluxio-user /input/file1`, + Args: cobra.NoArgs, RunE: func(cmd *cobra.Command, args []string) error { return c.Run(args) }, }) cmd.Flags().BoolVarP(&c.recursive, "recursive", "R", false, - "change the owner recursively") + "change the owner recursively for all files and directories under the given path") return cmd } diff --git a/cli/src/alluxio.org/cli/cmd/fs/consistant_hash.go b/cli/src/alluxio.org/cli/cmd/fs/consistant_hash.go deleted file mode 100644 index adbe88da08e6..000000000000 --- a/cli/src/alluxio.org/cli/cmd/fs/consistant_hash.go +++ /dev/null @@ -1,74 +0,0 @@ -/* - * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 - * (the "License"). You may not use this work except in compliance with the License, which is - * available at www.apache.org/licenses/LICENSE-2.0 - * - * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, - * either express or implied, as more fully set forth in the License. - * - * See the NOTICE file distributed with this work for information regarding copyright ownership. - */ - -package fs - -import ( - "alluxio.org/cli/env" - "github.com/palantir/stacktrace" - "github.com/spf13/cobra" -) - -func ConsistentHash(className string) env.Command { - return &ConsistentHashCommand{ - BaseJavaCommand: &env.BaseJavaCommand{ - CommandName: "consistent-hash", - JavaClassName: className, - Parameters: []string{"consistent-hash"}, - }, - } -} - -type ConsistentHashCommand struct { - *env.BaseJavaCommand - - createCheckFile bool - compareCheckFiles bool - cleanCheckData bool -} - -func (c *ConsistentHashCommand) Base() *env.BaseJavaCommand { - return c.BaseJavaCommand -} - -func (c *ConsistentHashCommand) ToCommand() *cobra.Command { - cmd := c.Base().InitRunJavaClassCmd(&cobra.Command{ - Use: "consistent-hash [--create]|[--compare <1stCheckFilePath> <2ndCheckFilePath>]|[--clean] ", - Short: "This command is for checking whether the consistent hash ring is changed or not", - RunE: func(cmd *cobra.Command, args []string) error { - return c.Run(args) - }, - }) - cmd.Flags().BoolVar(&c.createCheckFile, "create", false, "Generate check file.") - cmd.Flags().BoolVar(&c.compareCheckFiles, "compare", false, "Compare check files to see if the hash ring has changed and if data lost.") - cmd.Flags().BoolVar(&c.cleanCheckData, "clean", false, "Clean all check data.") - cmd.MarkFlagsMutuallyExclusive("create", "compare", "clean") - return cmd -} - -func (c *ConsistentHashCommand) Run(args []string) error { - javaArgs := []string{} - if c.createCheckFile { - javaArgs = append(javaArgs, "--create") - } - if c.compareCheckFiles { - if len(args) != 2 { - return stacktrace.NewError("expect 2 arguments with --compare-check-files but got %v", len(args)) - } - javaArgs = append(javaArgs, "--compare") - } - if c.cleanCheckData { - javaArgs = append(javaArgs, "--clean") - } - - javaArgs = append(javaArgs, args...) - return c.Base().Run(javaArgs) -} diff --git a/cli/src/alluxio.org/cli/cmd/fs/consistent_hash.go b/cli/src/alluxio.org/cli/cmd/fs/consistent_hash.go new file mode 100644 index 000000000000..d94463058dde --- /dev/null +++ b/cli/src/alluxio.org/cli/cmd/fs/consistent_hash.go @@ -0,0 +1,76 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package fs + +import ( + "github.com/palantir/stacktrace" + "github.com/spf13/cobra" + + "alluxio.org/cli/env" +) + +func ConsistentHash(className string) env.Command { + return &ConsistentHashCommand{ + BaseJavaCommand: &env.BaseJavaCommand{ + CommandName: "consistent-hash", + JavaClassName: className, + Parameters: []string{"consistent-hash"}, + }, + } +} + +type ConsistentHashCommand struct { + *env.BaseJavaCommand + + create bool + compare bool + clean bool +} + +func (c *ConsistentHashCommand) Base() *env.BaseJavaCommand { + return c.BaseJavaCommand +} + +func (c *ConsistentHashCommand) ToCommand() *cobra.Command { + cmd := c.Base().InitRunJavaClassCmd(&cobra.Command{ + Use: "consistent-hash [--create]|[--compare <1stCheckFilePath> <2ndCheckFilePath>]|[--clean]", + Short: "This command is for checking whether the consistent hash ring is changed or not", + RunE: func(cmd *cobra.Command, args []string) error { + return c.Run(args) + }, + }) + const create, compare, clean = "create", "compare", "clean" + cmd.Flags().BoolVar(&c.create, create, false, "Generate check file") + cmd.Flags().BoolVar(&c.compare, compare, false, "Compare check files to see if the hash ring has changed") + cmd.Flags().BoolVar(&c.clean, clean, false, "Delete generated check data") + cmd.MarkFlagsMutuallyExclusive(create, compare, clean) + return cmd +} + +func (c *ConsistentHashCommand) Run(args []string) error { + javaArgs := []string{} + if c.create { + javaArgs = append(javaArgs, "--create") + } + if c.compare { + if len(args) != 2 { + return stacktrace.NewError("expect 2 arguments with --compare but got %v", len(args)) + } + javaArgs = append(javaArgs, "--compare") + } + if c.clean { + javaArgs = append(javaArgs, "--clean") + } + + javaArgs = append(javaArgs, args...) + return c.Base().Run(javaArgs) +} diff --git a/cli/src/alluxio.org/cli/cmd/fs/cp.go b/cli/src/alluxio.org/cli/cmd/fs/cp.go index fbda1579b5cb..e4183854b4f4 100644 --- a/cli/src/alluxio.org/cli/cmd/fs/cp.go +++ b/cli/src/alluxio.org/cli/cmd/fs/cp.go @@ -46,9 +46,20 @@ func (c *CopyCommand) ToCommand() *cobra.Command { cmd := c.Base().InitRunJavaClassCmd(&cobra.Command{ Use: "cp [srcPath] [dstPath]", Short: "Copy a file or directory", - Long: `Copies a file or directory in the Alluxio filesystem or between local and Alluxio filesystems -Use the file:// schema to indicate a local filesystem path (ex. file:///absolute/path/to/file) and -use the recursive flag to copy directories`, + Long: `Copies a file or directory in the Alluxio filesystem or between local and Alluxio filesystems. +The file:// scheme indicates a local filesystem path and the alluxio:// scheme or no scheme indicates an Alluxio filesystem path.`, + Example: `# Copy within the Alluxio filesystem +$ ./bin/alluxio fs cp /file1 /file2 + +# Copy a local file to the Alluxio filesystem +$ ./bin/alluxio fs cp file:///file1 /file2 + +# Copy a file in Alluxio to local +$ ./bin/alluxio fs cp alluxio:///file1 file:///file2 + +# Recursively copy a directory within the Alluxio filesystem +$ ./bin/alluxio fs cp -R /dir1 /dir2 +`, Args: cobra.ExactArgs(2), RunE: func(cmd *cobra.Command, args []string) error { return c.Run(args) diff --git a/cli/src/alluxio.org/cli/cmd/fs/fs.go b/cli/src/alluxio.org/cli/cmd/fs/fs.go index acf5f75b5e6c..94fea1aeaf32 100644 --- a/cli/src/alluxio.org/cli/cmd/fs/fs.go +++ b/cli/src/alluxio.org/cli/cmd/fs/fs.go @@ -12,40 +12,55 @@ package fs import ( - "alluxio.org/cli/cmd/names" - "alluxio.org/cli/env" + "alluxio.org/cli/cmd/names" + "alluxio.org/cli/env" ) var Service = &env.Service{ - Name: "fs", - Description: "Operations to interface with the Alluxio filesystem", - Commands: Cmds(names.FileSystemShellJavaClass), + Name: "fs", + Description: "Operations to interface with the Alluxio filesystem", + Documentation: `Operations to interface with the Alluxio filesystem +For commands that take Alluxio URIs as an argument such as ls or mkdir, the argument should be either +- A complete Alluxio URI, such as alluxio://:/ +- A path without its scheme header, such as /path, in order to use the default hostname and port set in alluxio-site.properties + +> Note: All fs commands require the Alluxio cluster to be running. + +Most of the commands which require path components allow wildcard arguments for ease of use. +For example, the command "bin/alluxio fs rm '/data/2014*'" deletes anything in the data directory with a prefix of 2014. + +Some shells will attempt to glob the input paths, causing strange errors. +As a workaround, you can disable globbing (depending on the shell type; for example, set -f) or by escaping wildcards +For example, the command "bin/alluxio fs cat /\\*" uses the escape backslash character twice. +This is because the shell script will eventually call a java program which should have the final escaped parameters "cat /\\*". +`, + Commands: Cmds(names.FileSystemShellJavaClass), } func Cmds(className string) []env.Command { - var ret []env.Command - for _, c := range []func(string) env.Command{ - Cat, - Checksum, - Chgrp, - Chmod, - Chown, - Cp, - Head, - Ls, - Mkdir, - Mv, - Rm, - Stat, - Tail, - Test, - Touch, - Location, - CheckCached, - ConsistentHash, - } { - ret = append(ret, c(className)) - } - - return ret + var ret []env.Command + for _, c := range []func(string) env.Command{ + Cat, + CheckCached, + Checksum, + Chgrp, + Chmod, + Chown, + ConsistentHash, + Cp, + Head, + Location, + Ls, + Mkdir, + Mv, + Rm, + Stat, + Tail, + Test, + Touch, + } { + ret = append(ret, c(className)) + } + + return ret } diff --git a/cli/src/alluxio.org/cli/cmd/fs/head.go b/cli/src/alluxio.org/cli/cmd/fs/head.go index a96af645c108..0ed5458177ca 100644 --- a/cli/src/alluxio.org/cli/cmd/fs/head.go +++ b/cli/src/alluxio.org/cli/cmd/fs/head.go @@ -41,12 +41,16 @@ func (c *HeadCommand) ToCommand() *cobra.Command { cmd := c.Base().InitRunJavaClassCmd(&cobra.Command{ Use: "head [path]", Short: "Print the leading bytes from the specified file", - Args: cobra.ExactArgs(1), + Long: `The head command prints the first 1KB of data of a file to the shell. +Specifying the -c flag sets the number of bytes to print.`, + Example: `# Print first 2048 bytes of a file +$ ./bin/alluxio fs head -c 2048 /output/part-00000`, + Args: cobra.ExactArgs(1), RunE: func(cmd *cobra.Command, args []string) error { return c.Run(args) }, }) - cmd.Flags().StringVar(&c.bytes, "bytes", "", "Byte size to print") + cmd.Flags().StringVarP(&c.bytes, "bytes", "c", "", "Byte size to print") return cmd } diff --git a/cli/src/alluxio.org/cli/cmd/fs/ls.go b/cli/src/alluxio.org/cli/cmd/fs/ls.go index 7966107cdd4a..562ce2873ad3 100644 --- a/cli/src/alluxio.org/cli/cmd/fs/ls.go +++ b/cli/src/alluxio.org/cli/cmd/fs/ls.go @@ -70,8 +70,18 @@ func (c *LsCommand) ToCommand() *cobra.Command { cmd := c.Base().InitRunJavaClassCmd(&cobra.Command{ Use: "ls [path]", Short: "Prints information for files and directories at the given path", - Long: `Displays information for all files and directories directly under the specified paths, including permission, owner, group, size (bytes for files or the number of children for directories), persistence state, last modified time, the percentage of content already in Alluxio, and the path`, - Args: cobra.ExactArgs(1), + Long: `The ls command lists all the immediate children in a directory and displays the file size, last modification time, and in memory status of the files. +Using ls on a file will only display the information for that specific file. + +The ls command will also load the metadata for any file or immediate children of a directory from the under storage system to Alluxio namespace if it does not exist in Alluxio. +It queries the under storage system for any file or directory matching the given path and creates a mirror of the file in Alluxio backed by that file. +Only the metadata, such as the file name and size, are loaded this way and no data transfer occurs.`, + Example: `# List and load metadata for all immediate children of /s3/data +$ ./bin/alluxio fs ls /s3/data + +# Force loading metadata of /s3/data +$ ./bin/alluxio fs ls -f /s3/data`, + Args: cobra.ExactArgs(1), RunE: func(cmd *cobra.Command, args []string) error { return c.Run(args) }, diff --git a/cli/src/alluxio.org/cli/cmd/fs/mkdir.go b/cli/src/alluxio.org/cli/cmd/fs/mkdir.go index 8a7ab4da9769..79c08f42a58e 100644 --- a/cli/src/alluxio.org/cli/cmd/fs/mkdir.go +++ b/cli/src/alluxio.org/cli/cmd/fs/mkdir.go @@ -39,7 +39,15 @@ func (c *MkdirCommand) ToCommand() *cobra.Command { cmd := c.Base().InitRunJavaClassCmd(&cobra.Command{ Use: "mkdir [path1 path2 ...]", Short: "Create directories at the specified paths, creating the parent directory if not exists", - Args: cobra.MinimumNArgs(1), + Long: `The mkdir command creates a new directory in the Alluxio filesystem. +It is recursive and will create any parent directories that do not exist. +Note that the created directory will not be created in the under storage system until a file in the directory is persisted to the underlying storage. +Using mkdir on an invalid or existing path will fail.`, + Example: `# Creating a folder structure +$ ./bin/alluxio fs mkdir /users +$ ./bin/alluxio fs mkdir /users/Alice +$ ./bin/alluxio fs mkdir /users/Bob`, + Args: cobra.MinimumNArgs(1), RunE: func(cmd *cobra.Command, args []string) error { return c.Run(args) }, diff --git a/cli/src/alluxio.org/cli/cmd/fs/mv.go b/cli/src/alluxio.org/cli/cmd/fs/mv.go index 3bd7d6ca8141..ee188ef01ede 100644 --- a/cli/src/alluxio.org/cli/cmd/fs/mv.go +++ b/cli/src/alluxio.org/cli/cmd/fs/mv.go @@ -39,7 +39,13 @@ func (c *MoveCommand) ToCommand() *cobra.Command { cmd := c.Base().InitRunJavaClassCmd(&cobra.Command{ Use: "mv [srcPath] [dstPath]", Short: "Rename a file or directory", - Args: cobra.ExactArgs(2), + Long: `The mv command moves a file or directory to another path in Alluxio. +The destination path must not exist or be a directory. +If it is a directory, the file or directory will be placed as a child of the directory. +The command is purely a metadata operation and does not affect the data blocks of the file.`, + Example: `# Moving a file +$ ./bin/alluxio fs mv /data/2014 /data/archives/2014`, + Args: cobra.ExactArgs(2), RunE: func(cmd *cobra.Command, args []string) error { return c.Run(args) }, diff --git a/cli/src/alluxio.org/cli/cmd/fs/rm.go b/cli/src/alluxio.org/cli/cmd/fs/rm.go index 3b10974ad8d3..07da9b51be9d 100644 --- a/cli/src/alluxio.org/cli/cmd/fs/rm.go +++ b/cli/src/alluxio.org/cli/cmd/fs/rm.go @@ -44,13 +44,20 @@ func (c *RmCommand) ToCommand() *cobra.Command { cmd := c.Base().InitRunJavaClassCmd(&cobra.Command{ Use: "rm [path]", Short: "Remove the specified file", - Args: cobra.ExactArgs(1), + Long: `The rm command removes a file from Alluxio space and the under storage system. +The file will be unavailable immediately after this command returns, but the actual data may be deleted a while later.`, + Example: `# Remove a file from Alluxio and the under storage system +$ ./bin/alluxio fs rm /tmp/unused-file + +# Remove a file from Alluxio filesystem only +$ ./bin/alluxio fs rm --alluxio-only --skip-ufs-check /tmp/unused-file2 +# Note it is recommended to use both --alluxio-only and --skip-ufs-check together in this situation`, + Args: cobra.ExactArgs(1), RunE: func(cmd *cobra.Command, args []string) error { return c.Run(args) }, }) cmd.Flags().BoolVar(&c.alluxioOnly, "alluxio-only", false, "True to only remove data and metadata from Alluxio cache") - cmd.Flags().BoolVarP(&c.deleteMount, "delete-mount", "m", false, "True to remove mount points within the specified directory subtree, which would otherwise cause failures") cmd.Flags().BoolVarP(&c.isRecursive, "recursive", "R", false, "True to recursively remove files within the specified directory subtree") cmd.Flags().BoolVarP(&c.skipUfsCheck, "skip-ufs-check", "U", false, "True to skip checking if corresponding UFS contents are in sync") return cmd @@ -61,9 +68,6 @@ func (c *RmCommand) Run(args []string) error { if c.alluxioOnly { javaArgs = append(javaArgs, "--alluxioOnly") } - if c.deleteMount { - javaArgs = append(javaArgs, "--deleteMountPoint") - } if c.isRecursive { javaArgs = append(javaArgs, "-R") } diff --git a/cli/src/alluxio.org/cli/cmd/fs/stat.go b/cli/src/alluxio.org/cli/cmd/fs/stat.go index 1248fc2601b2..21474e72342d 100644 --- a/cli/src/alluxio.org/cli/cmd/fs/stat.go +++ b/cli/src/alluxio.org/cli/cmd/fs/stat.go @@ -44,7 +44,19 @@ func (c *StatCommand) ToCommand() *cobra.Command { cmd := c.Base().InitRunJavaClassCmd(&cobra.Command{ Use: "stat", Short: "Displays info for the specified file or directory", - Args: cobra.NoArgs, + Long: `The stat command dumps the FileInfo representation of a file or a directory to the shell.`, + Example: `# Display file's stat +$ ./bin/alluxio fs stat /data/2015/logs-1.txt + +# Display directory's stat +$ ./bin/alluxio fs stat /data/2015 + +# Display the size of file +$ ./bin/alluxio fs stat -f %z /data/2015/logs-1.txt + +# Find the file by fileID and display the stat, useful in troubleshooting +$ ./bin/alluxio fs stat -fileId 12345678`, + Args: cobra.NoArgs, RunE: func(cmd *cobra.Command, args []string) error { return c.Run(nil) }, diff --git a/cli/src/alluxio.org/cli/cmd/fs/tail.go b/cli/src/alluxio.org/cli/cmd/fs/tail.go index 5affc193fd13..aa258881b231 100644 --- a/cli/src/alluxio.org/cli/cmd/fs/tail.go +++ b/cli/src/alluxio.org/cli/cmd/fs/tail.go @@ -41,7 +41,11 @@ func (c *TailCommand) ToCommand() *cobra.Command { cmd := c.Base().InitRunJavaClassCmd(&cobra.Command{ Use: "tail [path]", Short: "Print the trailing bytes from the specified file", - Args: cobra.ExactArgs(1), + Long: `The tail command prints the last 1KB of data of a file to the shell. +Specifying the -c flag sets the number of bytes to print.`, + Example: `# Print last 2048 bytes of a file +$ ./bin/alluxio fs tail -c 2048 /output/part-00000`, + Args: cobra.ExactArgs(1), RunE: func(cmd *cobra.Command, args []string) error { return c.Run(args) }, diff --git a/cli/src/alluxio.org/cli/cmd/generate/docs.go b/cli/src/alluxio.org/cli/cmd/generate/doc_tables.go similarity index 73% rename from cli/src/alluxio.org/cli/cmd/generate/docs.go rename to cli/src/alluxio.org/cli/cmd/generate/doc_tables.go index 92e457647bb9..05f584ef58bd 100644 --- a/cli/src/alluxio.org/cli/cmd/generate/docs.go +++ b/cli/src/alluxio.org/cli/cmd/generate/doc_tables.go @@ -19,26 +19,26 @@ import ( "alluxio.org/cli/env" ) -var Docs = &DocsCommand{ +var DocTables = &DocTablesCommand{ BaseJavaCommand: &env.BaseJavaCommand{ - CommandName: "docs", + CommandName: "doc-tables", JavaClassName: "alluxio.cli.DocGenerator", ShellJavaOpts: fmt.Sprintf(env.JavaOptFormat, env.ConfAlluxioLoggerType, "Console"), }, } -type DocsCommand struct { +type DocTablesCommand struct { *env.BaseJavaCommand } -func (c *DocsCommand) Base() *env.BaseJavaCommand { +func (c *DocTablesCommand) Base() *env.BaseJavaCommand { return c.BaseJavaCommand } -func (c *DocsCommand) ToCommand() *cobra.Command { +func (c *DocTablesCommand) ToCommand() *cobra.Command { cmd := c.Base().InitRunJavaClassCmd(&cobra.Command{ - Use: Docs.CommandName, - Short: "Generate docs automatically.", + Use: DocTables.CommandName, + Short: "Generate configuration and metric tables used in documentation", Args: cobra.NoArgs, RunE: func(cmd *cobra.Command, args []string) error { return c.Run(args) @@ -47,6 +47,6 @@ func (c *DocsCommand) ToCommand() *cobra.Command { return cmd } -func (c *DocsCommand) Run(args []string) error { +func (c *DocTablesCommand) Run(args []string) error { return c.Base().Run(args) } diff --git a/cli/src/alluxio.org/cli/cmd/generate/generate.go b/cli/src/alluxio.org/cli/cmd/generate/generate.go index 3a26a653570e..cb666a514874 100644 --- a/cli/src/alluxio.org/cli/cmd/generate/generate.go +++ b/cli/src/alluxio.org/cli/cmd/generate/generate.go @@ -13,12 +13,37 @@ package generate import ( "alluxio.org/cli/env" + "github.com/palantir/stacktrace" + "github.com/spf13/cobra" ) var Service = &env.Service{ Name: "generate", - Description: "Generate docs automatically if one doesn't exist.", + Description: "Generate files used in documentation", Commands: []env.Command{ - Docs, + &DocsCommand{}, + DocTables, + UserCliDoc, }, } + +type DocsCommand struct{} + +func (c *DocsCommand) ToCommand() *cobra.Command { + return &cobra.Command{ + Use: DocTables.CommandName, + Short: "Generate all documentation files", + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, args []string) error { + for _, c := range []env.Command{ + UserCliDoc, + DocTables, + } { + if err := c.ToCommand().RunE(cmd, args); err != nil { + return stacktrace.Propagate(err, "error running %v", c.ToCommand().Use) + } + } + return nil + }, + } +} diff --git a/cli/src/alluxio.org/cli/cmd/generate/user_cli.go b/cli/src/alluxio.org/cli/cmd/generate/user_cli.go new file mode 100644 index 000000000000..cb702046e901 --- /dev/null +++ b/cli/src/alluxio.org/cli/cmd/generate/user_cli.go @@ -0,0 +1,177 @@ +/* + * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0 + * (the "License"). You may not use this work except in compliance with the License, which is + * available at www.apache.org/licenses/LICENSE-2.0 + * + * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, + * either express or implied, as more fully set forth in the License. + * + * See the NOTICE file distributed with this work for information regarding copyright ownership. + */ + +package generate + +import ( + "bufio" + "fmt" + "io" + "os" + "path/filepath" + "strings" + + "github.com/palantir/stacktrace" + "github.com/spf13/cobra" + "github.com/spf13/pflag" + + "alluxio.org/cli/env" +) + +var UserCliDoc = &UserCliCommand{ + Dst: filepath.Join("docs", "en", "operation", "User-CLI.md"), +} + +type UserCliCommand struct { + Dst string +} + +func (c *UserCliCommand) ToCommand() *cobra.Command { + return &cobra.Command{ + Use: "user-cli", + Short: "Generate content for `operation/User-CLI.md`", + Args: cobra.NoArgs, + RunE: func(cmd *cobra.Command, args []string) error { + var rootCmd *cobra.Command + for rootCmd = cmd; rootCmd.HasParent(); rootCmd = rootCmd.Parent() { + } + + f, err := os.Create(c.Dst) + if err != nil { + return stacktrace.Propagate(err, "error creating output file") + } + defer f.Close() + w := bufio.NewWriter(f) + fmt.Fprintln(w, + `--- +layout: global +title: User Command Line Interface +--- + +{% comment %} +This is a generated file created by running command "bin/alluxio generate user-cli" +The command parses the golang command definitions and descriptions to generate the markdown in this file +{% endcomment %} + +Alluxio's command line interface provides user access to various operations, such as: +- Start or stop processes +- Filesystem operations +- Administrative commands`) + fmt.Fprintln(w) + fmt.Fprintln(w, "Invoke the executable to view the possible subcommands:") + fmt.Fprintln(w, "```shell") + fmt.Fprintln(w, "$ ./bin/alluxio") + fmt.Fprintln(w, rootCmd.UsageString()) + fmt.Fprintln(w, "```") + fmt.Fprintln(w) + fmt.Fprintln(w, "To set JVM system properties as part of the command, set the `-D` flag in the form of `-Dproperty=value`.") + fmt.Fprintln(w) + fmt.Fprintln(w, "To attach debugging java options specified by `$ALLUXIO_USER_ATTACH_OPTS`, set the `--attach-debug` flag") + fmt.Fprintln(w) + fmt.Fprintln(w, "Note that, as a part of Alluxio deployment, the Alluxio shell will also take the configuration in `${ALLUXIO_HOME}/conf/alluxio-site.properties` when it is run from Alluxio installation at `${ALLUXIO_HOME}`.") + fmt.Fprintln(w) + + for _, serviceCmd := range rootCmd.Commands() { + if serviceCmd.Name() == "help" { + // help is a built in command from the library. avoid documenting it + continue + } + fmt.Fprint(w, "## ") + fmt.Fprintln(w, serviceCmd.Name()) + + desc := serviceCmd.Short + if serviceCmd.Long != "" { + desc = serviceCmd.Long + } + fmt.Fprintln(w, desc) + fmt.Fprintln(w) + + for _, opCmd := range serviceCmd.Commands() { + printCommandDocs(serviceCmd.Name(), opCmd, w) + } + } + w.Flush() + return nil + }, + } +} + +func printCommandDocs(serviceName string, opCmd *cobra.Command, w io.Writer) { + fmt.Fprintln(w, "###", serviceName, opCmd.Name()) + + // collect relevant flags defined for the command + inheritedFlags := opCmd.InheritedFlags() + definedFlags := pflag.NewFlagSet(fmt.Sprintf("%v_%v", serviceName, opCmd.Name()), pflag.ContinueOnError) + opCmd.Flags().VisitAll(func(f *pflag.Flag) { + if f.Hidden { + return + } + if f.Name == env.AttachDebugName || f.Name == env.JavaOptsName { + return + } + if inheritedFlags.Lookup(f.Name) == nil { + definedFlags.AddFlag(f) + } + }) + if definedFlags.HasFlags() { + fmt.Fprintf(w, "Usage: `%v`\n\n", opCmd.UseLine()) + } else { + // remove the [flags] part of the usage as there are no flags to mention + fmt.Fprintf(w, "Usage: `%v`\n\n", strings.Replace(opCmd.UseLine(), " [flags]", "", 1)) + } + + desc := opCmd.Short + if opCmd.Long != "" { + desc = opCmd.Long + } + fmt.Fprintln(w, desc) + fmt.Fprintln(w) + + if definedFlags.HasFlags() { + fmt.Fprintln(w, "Flags:") + definedFlags.VisitAll(func(f *pflag.Flag) { + fmt.Fprintf(w, "- `--%v`", f.Name) + if f.Shorthand != "" { + fmt.Fprintf(w, ",`-%v`", f.Shorthand) + } + _, required := f.Annotations[cobra.BashCompOneRequiredFlag] + _, usage := pflag.UnquoteUsage(f) + + // prepend the flag description with "(Required)" if required + // print default value if flag is not required + var requiredPrefix, defVal string + if required { + requiredPrefix = "(Required) " + } else { + v := f.DefValue + if f.Value.Type() == "string" { + // add quotes for string flags + v = fmt.Sprintf("%q", defVal) + } + defVal = fmt.Sprintf(" (Default: %v)", v) + } + fmt.Fprintf(w, ": %v%v%v\n", requiredPrefix, usage, defVal) + + }) + fmt.Fprintln(w) + } + + if opCmd.HasExample() { + fmt.Fprintln(w, "Examples:") + for _, ex := range strings.Split(opCmd.Example, "\n\n") { + fmt.Fprintln(w, "```shell") + fmt.Fprintln(w, ex) + fmt.Fprintln(w, "```") + fmt.Fprintln(w) + } + fmt.Fprintln(w) + } +} diff --git a/cli/src/alluxio.org/cli/cmd/info/collect.go b/cli/src/alluxio.org/cli/cmd/info/collect.go index bdfb032a1b6c..d7be722f71db 100644 --- a/cli/src/alluxio.org/cli/cmd/info/collect.go +++ b/cli/src/alluxio.org/cli/cmd/info/collect.go @@ -55,18 +55,17 @@ func (c *CollectCommand) ToCommand() *cobra.Command { Use: fmt.Sprintf("%v [command]", c.CommandName), Short: "Collects information such as logs, config, metrics, and more from the running Alluxio cluster and bundle into a single tarball", Long: `Collects information such as logs, config, metrics, and more from the running Alluxio cluster and bundle into a single tarball + [command] must be one of the following values: - all runs all the commands below - cluster: runs a set of Alluxio commands to collect information about the Alluxio cluster - conf: collects the configuration files under ${ALLUXIO_HOME}/config/ - env: runs a set of linux commands to collect information about the cluster - jvm: collects jstack from the JVMs - log: collects the log files under ${ALLUXIO_HOME}/logs/ - metrics: collects Alluxio system metrics - -WARNING: This command MAY bundle credentials. To understand the risks refer to the docs here. -https://docs.alluxio.io/os/user/edge/en/operation/Troubleshooting.html#collect-alluxio-cluster-information -`, +- all: runs all the commands below +- cluster: runs a set of Alluxio commands to collect information about the Alluxio cluster +- conf: collects the configuration files under ${ALLUXIO_HOME}/config/ +- env: runs a set of linux commands to collect information about the cluster +- jvm: collects jstack from the JVMs +- log: collects the log files under ${ALLUXIO_HOME}/logs/ +- metrics: collects Alluxio system metrics + +> WARNING: This command MAY bundle credentials. Inspect the output tarball for any sensitive information and remove it before sharing with others.`, Args: cobra.ExactArgs(1), RunE: func(cmd *cobra.Command, args []string) error { return c.Run(args) diff --git a/cli/src/alluxio.org/cli/cmd/info/version.go b/cli/src/alluxio.org/cli/cmd/info/version.go index bd4a32557601..76a3cd304640 100644 --- a/cli/src/alluxio.org/cli/cmd/info/version.go +++ b/cli/src/alluxio.org/cli/cmd/info/version.go @@ -35,7 +35,7 @@ func (c *VersionCommand) Base() *env.BaseJavaCommand { func (c *VersionCommand) ToCommand() *cobra.Command { cmd := c.Base().InitRunJavaClassCmd(&cobra.Command{ Use: c.CommandName, - Short: "Print Alluxio version and exit.", + Short: "Print Alluxio version.", Args: cobra.NoArgs, RunE: func(cmd *cobra.Command, args []string) error { return c.Run(args) diff --git a/cli/src/alluxio.org/cli/cmd/initiate/clear_os_cache.go b/cli/src/alluxio.org/cli/cmd/initiate/clear_os_cache.go index 933daf280202..06808f47be65 100644 --- a/cli/src/alluxio.org/cli/cmd/initiate/clear_os_cache.go +++ b/cli/src/alluxio.org/cli/cmd/initiate/clear_os_cache.go @@ -25,9 +25,10 @@ type ClearOSCacheCommand struct{} func (c *ClearOSCacheCommand) ToCommand() *cobra.Command { cmd := &cobra.Command{ - Use: "clearOSCache", + Use: "clear-os-cache", Args: cobra.NoArgs, Short: "Clear OS buffer cache of the machine", + Long: `The clear-os-cache command drops the OS buffer cache`, RunE: func(cmd *cobra.Command, args []string) error { if err := exec.Command("sync").Run(); err != nil { return stacktrace.Propagate(err, "error running sync") diff --git a/cli/src/alluxio.org/cli/cmd/initiate/copy_dir.go b/cli/src/alluxio.org/cli/cmd/initiate/copy_dir.go index 4a72793ecc1b..c7c1ba57df57 100644 --- a/cli/src/alluxio.org/cli/cmd/initiate/copy_dir.go +++ b/cli/src/alluxio.org/cli/cmd/initiate/copy_dir.go @@ -31,9 +31,14 @@ type CopyDirCommand struct{} func (c *CopyDirCommand) ToCommand() *cobra.Command { cmd := &cobra.Command{ - Use: "copyDir [path]", + Use: "copy-dir [path]", Args: cobra.ExactArgs(1), - Short: "Copy a path to all master/worker nodes.", + Short: "Copy a path to all master and worker nodes.", + Long: `The copy-dir command copies the directory at given path to all master nodes listed in conf/masters and all worker nodes listed in conf/workers. + +> Note: This command does not require the Alluxio cluster to be running.`, + Example: `# copy alluxio-site properties file to all nodes +$ ./bin/alluxio init copy-dir conf/alluxio-site.properties`, RunE: func(cmd *cobra.Command, args []string) error { // get list of masters or workers, or both hosts, err := processes.GetHostnames([]string{processes.HostGroupMasters, processes.HostGroupWorkers}) diff --git a/cli/src/alluxio.org/cli/cmd/initiate/format.go b/cli/src/alluxio.org/cli/cmd/initiate/format.go index 171d575ae633..27582c08ee97 100644 --- a/cli/src/alluxio.org/cli/cmd/initiate/format.go +++ b/cli/src/alluxio.org/cli/cmd/initiate/format.go @@ -36,6 +36,14 @@ func (c *FormatCommand) ToCommand() *cobra.Command { Use: "format", Args: cobra.NoArgs, Short: "Format Alluxio master and all workers", + Long: `The format command formats the Alluxio master and all its workers. + +Running this command on an existing Alluxio cluster deletes everything persisted in Alluxio, including cached data and any metadata information. +Data in under storage will not be changed. + +> Warning: Formatting is required when you run Alluxio for the first time. +It should only be called while the cluster is not running. +`, RunE: func(cmd *cobra.Command, args []string) error { if c.localFileSystem { // check if alluxio.master.mount.table.root.ufs set @@ -59,7 +67,7 @@ func (c *FormatCommand) ToCommand() *cobra.Command { if env.Env.EnvVar.GetString(env.ConfAlluxioMasterJournalType.EnvVar) == "EMBEDDED" { if err := processes.RunSshCommand( strings.Join(append([]string{cliPath}, journalArgs...), " "), - processes.HostGroupWorkers); err != nil { + processes.HostGroupMasters); err != nil { return stacktrace.Propagate(err, "error formatting masters") } } else { @@ -73,6 +81,6 @@ func (c *FormatCommand) ToCommand() *cobra.Command { }, } cmd.Flags().BoolVarP(&c.localFileSystem, "localFileSystem", "s", false, - "if -s specified, only format if underfs is local and doesn't already exist") + "If specified, only format if underfs is local and doesn't already exist") return cmd } diff --git a/cli/src/alluxio.org/cli/cmd/initiate/validate.go b/cli/src/alluxio.org/cli/cmd/initiate/validate.go index 40caf8f281f8..d93fe979a1d8 100644 --- a/cli/src/alluxio.org/cli/cmd/initiate/validate.go +++ b/cli/src/alluxio.org/cli/cmd/initiate/validate.go @@ -39,8 +39,13 @@ func (c *ValidateCommand) Base() *env.BaseJavaCommand { func (c *ValidateCommand) ToCommand() *cobra.Command { cmd := c.Base().InitRunJavaClassCmd(&cobra.Command{ Use: c.CommandName, - Short: "Validate Alluxio conf or environment and exit", - Args: cobra.NoArgs, + Short: "Validate Alluxio configuration or environment", + Example: `# Validate configuration +$ ./bin/alluxio init validate --type conf + +# Validate environment +$ ./bin/alluxio init validate --type env`, + Args: cobra.NoArgs, RunE: func(cmd *cobra.Command, args []string) error { return c.Run(args) }, diff --git a/cli/src/alluxio.org/cli/cmd/job/load.go b/cli/src/alluxio.org/cli/cmd/job/load.go index c501e0b58f9f..5d087a7f0416 100644 --- a/cli/src/alluxio.org/cli/cmd/job/load.go +++ b/cli/src/alluxio.org/cli/cmd/job/load.go @@ -47,7 +47,27 @@ func (c *LoadCommand) ToCommand() *cobra.Command { cmd := c.Base().InitRunJavaClassCmd(&cobra.Command{ Use: Load.CommandName, Short: "Submit or manage load jobs", - Args: cobra.NoArgs, + Long: `The load command moves data from the under storage system into Alluxio storage. +For example, load can be used to prefetch data for analytics jobs. +If load is run on a directory, files in the directory will be recursively loaded.`, + Example: `# Submit a load job +$ ./bin/alluxio job load --path /path --submit + +# View the progress of a submitted job +$ ./bin/alluxio job load --path /path --progress +# Example output +Progress for loading path '/path': + Settings: bandwidth: unlimited verify: false + Job State: SUCCEEDED + Files Processed: 1000 + Bytes Loaded: 125.00MB + Throughput: 2509.80KB/s + Block load failure rate: 0.00% + Files Failed: 0 + +# Stop a submitted job +$ ./bin/alluxio job load --path /path --stop`, + Args: cobra.NoArgs, RunE: func(cmd *cobra.Command, args []string) error { return c.Run(args) }, diff --git a/cli/src/alluxio.org/cli/cmd/journal/format.go b/cli/src/alluxio.org/cli/cmd/journal/format.go index 47b508e22053..ff8b7e64a570 100644 --- a/cli/src/alluxio.org/cli/cmd/journal/format.go +++ b/cli/src/alluxio.org/cli/cmd/journal/format.go @@ -44,7 +44,10 @@ func (c *FormatCommand) ToCommand() *cobra.Command { cmd := c.Base().InitRunJavaClassCmd(&cobra.Command{ Use: Format.CommandName, Short: "Format the local Alluxio master journal", - Args: cobra.NoArgs, + Long: `The format command formats the local Alluxio master's journal. + +> Warning: Formatting should only be called while the cluster is not running.`, + Args: cobra.NoArgs, RunE: func(cmd *cobra.Command, args []string) error { return c.Run(args) }, diff --git a/cli/src/alluxio.org/cli/cmd/journal/read.go b/cli/src/alluxio.org/cli/cmd/journal/read.go index 69c29fd02c0a..31d4fb32e25e 100644 --- a/cli/src/alluxio.org/cli/cmd/journal/read.go +++ b/cli/src/alluxio.org/cli/cmd/journal/read.go @@ -48,7 +48,15 @@ func (c *ReadCommand) ToCommand() *cobra.Command { cmd := c.Base().InitRunJavaClassCmd(&cobra.Command{ Use: Read.CommandName, Short: "Read an Alluxio journal file to a human-readable version", - Args: cobra.NoArgs, + Long: `The read command parses the current journal and outputs a human readable version to the local folder. +This command may take a while depending on the size of the journal. +> Note: This command requies that the Alluxio cluster is NOT running.`, + Example: `$ ./bin/alluxio readJournal +# output +Dumping journal of type EMBEDDED to /Users/alluxio/journal_dump-1602698211916 +2020-10-14 10:56:51,960 INFO RaftStorageDirectory - Lock on /Users/alluxio/alluxio/journal/raft/02511d47-d67c-49a3-9011-abb3109a44c1/in_use.lock acquired by nodename 78602@alluxio-user +2020-10-14 10:56:52,254 INFO RaftJournalDumper - Read 223 entries from log /Users/alluxio/alluxio/journal/raft/02511d47-d67c-49a3-9011-abb3109a44c1/current/log_0-222.`, + Args: cobra.NoArgs, RunE: func(cmd *cobra.Command, args []string) error { return c.Run(nil) }, diff --git a/cli/src/alluxio.org/cli/cmd/process/process.go b/cli/src/alluxio.org/cli/cmd/process/process.go index 65983a2714e6..9e1c3a2e9e0d 100644 --- a/cli/src/alluxio.org/cli/cmd/process/process.go +++ b/cli/src/alluxio.org/cli/cmd/process/process.go @@ -17,7 +17,7 @@ import ( var Service = &env.Service{ Name: "process", - Description: "Start, stop, and other operations related to the cluster processes", + Description: "Start or stop cluster processes", Commands: []env.Command{ &env.StartProcessCommand{}, &env.StopProcessCommand{}, diff --git a/cli/src/alluxio.org/cli/env/command.go b/cli/src/alluxio.org/cli/env/command.go index 32e13e98fa93..2a3c23bf5a91 100644 --- a/cli/src/alluxio.org/cli/env/command.go +++ b/cli/src/alluxio.org/cli/env/command.go @@ -43,9 +43,14 @@ type BaseJavaCommand struct { ShellJavaOpts string // default java opts encoded as part of the specific command } +const ( + AttachDebugName = "attach-debug" + JavaOptsName = "java-opts" +) + func (c *BaseJavaCommand) InitRunJavaClassCmd(cmd *cobra.Command) *cobra.Command { - cmd.Flags().BoolVar(&c.DebugMode, "attach-debug", false, fmt.Sprintf("True to attach debug opts specified by $%v", ConfAlluxioUserAttachOpts.EnvVar)) - cmd.Flags().StringSliceVarP(&c.InlineJavaOpts, "java-opts", "D", nil, `Alluxio properties to apply, ex. -Dkey=value`) + cmd.Flags().BoolVar(&c.DebugMode, AttachDebugName, false, fmt.Sprintf("True to attach debug opts specified by $%v", ConfAlluxioUserAttachOpts.EnvVar)) + cmd.Flags().StringSliceVarP(&c.InlineJavaOpts, JavaOptsName, "D", nil, `Alluxio properties to apply, ex. -Dkey=value`) return cmd } diff --git a/cli/src/alluxio.org/cli/env/process_start.go b/cli/src/alluxio.org/cli/env/process_start.go index 80b1f11cfae5..22d1d59d1a81 100644 --- a/cli/src/alluxio.org/cli/env/process_start.go +++ b/cli/src/alluxio.org/cli/env/process_start.go @@ -25,7 +25,10 @@ type StartProcessCommand struct { func (c *StartProcessCommand) ToCommand() *cobra.Command { cmd := &cobra.Command{ Use: StartProcessName, - Short: "Starts one or more processes", + Short: "Starts a process locally or a group of similar processes across the cluster", + Long: `Starts a single process locally or a group of similar processes across the cluster. +For starting a group, it is assumed the local host has passwordless SSH access to other nodes in the cluster. +The command will parse the hostnames to run on by reading the conf/masters and conf/workers files, depending on the process type.`, } cmd.PersistentFlags().BoolVarP(&c.SkipKillOnStart, "skip-kill-prev", "N", false, "Avoid killing previous running processes when starting") cmd.PersistentFlags().BoolVarP(&c.AsyncStart, "async", "a", false, "Asynchronously start processes without monitoring for start completion") diff --git a/cli/src/alluxio.org/cli/env/process_stop.go b/cli/src/alluxio.org/cli/env/process_stop.go index b983ce4b03bd..391a237483d4 100644 --- a/cli/src/alluxio.org/cli/env/process_stop.go +++ b/cli/src/alluxio.org/cli/env/process_stop.go @@ -22,7 +22,10 @@ type StopProcessCommand struct { func (c *StopProcessCommand) ToCommand() *cobra.Command { cmd := &cobra.Command{ Use: StopProcessName, - Short: "Stops one or more processes", + Short: "Stops a process locally or a group of similar processes across the cluster", + Long: `Stops a single process locally or a group of similar processes across the cluster. +For stopping a group, it is assumed the local host has passwordless SSH access to other nodes in the cluster. +The command will parse the hostnames to run on by reading the conf/masters and conf/workers files, depending on the process type.`, } cmd.PersistentFlags().BoolVarP(&c.SoftKill, "soft", "s", false, "Soft kill only, don't forcibly kill the process") diff --git a/cli/src/alluxio.org/cli/env/service.go b/cli/src/alluxio.org/cli/env/service.go index 274e2222856f..13e14124bbe7 100644 --- a/cli/src/alluxio.org/cli/env/service.go +++ b/cli/src/alluxio.org/cli/env/service.go @@ -34,15 +34,17 @@ func InitServiceCommandTree(rootCmd *cobra.Command) { } type Service struct { - Name string - Description string - Commands []Command + Name string + Description string + Documentation string + Commands []Command } func (s *Service) InitCommandTree(rootCmd *cobra.Command) { cmd := &cobra.Command{ Use: s.Name, Short: s.Description, + Long: s.Documentation, } rootCmd.AddCommand(cmd) diff --git a/cli/src/alluxio.org/cli/launch/launch.go b/cli/src/alluxio.org/cli/launch/launch.go index a007cba913f2..6a3f0b6ecba6 100644 --- a/cli/src/alluxio.org/cli/launch/launch.go +++ b/cli/src/alluxio.org/cli/launch/launch.go @@ -64,6 +64,7 @@ func (l *Launcher) Run() error { rootCmd := &cobra.Command{ Use: "bin/alluxio", } + rootCmd.CompletionOptions.DisableDefaultCmd = true l.AddFlags(rootCmd) rootCmd.PersistentPreRunE = l.GetPreRunFunc() env.InitServiceCommandTree(rootCmd) diff --git a/cli/src/alluxio.org/go.mod b/cli/src/alluxio.org/go.mod index 79860b7141e3..fc7cb17bf720 100644 --- a/cli/src/alluxio.org/go.mod +++ b/cli/src/alluxio.org/go.mod @@ -7,6 +7,7 @@ require ( github.com/shirou/gopsutil v3.21.11+incompatible github.com/sirupsen/logrus v1.9.0 github.com/spf13/cobra v1.7.0 + github.com/spf13/pflag v1.0.5 github.com/spf13/viper v1.15.0 golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e gopkg.in/yaml.v3 v3.0.1 @@ -23,7 +24,6 @@ require ( github.com/spf13/afero v1.9.3 // indirect github.com/spf13/cast v1.5.0 // indirect github.com/spf13/jwalterweatherman v1.1.0 // indirect - github.com/spf13/pflag v1.0.5 // indirect github.com/stretchr/testify v1.8.2 // indirect github.com/subosito/gotenv v1.4.2 // indirect github.com/tklauser/go-sysconf v0.3.11 // indirect diff --git a/docs/_data/table/chmod-permission.csv b/docs/_data/table/chmod-permission.csv deleted file mode 100644 index c8bf142308ee..000000000000 --- a/docs/_data/table/chmod-permission.csv +++ /dev/null @@ -1,9 +0,0 @@ -number,permission,rwx -7,"read, write and execute",rwx -6,read and write,rw- -5,read and execute,r-x -4,read only,r-- -3,write and execute,-wx -2,write only,-w- -1,execute only,--x -0,none,--- diff --git a/docs/en/operation/Logging.md b/docs/en/operation/Logging.md index e442a7fce76b..ec89a8205a4a 100644 --- a/docs/en/operation/Logging.md +++ b/docs/en/operation/Logging.md @@ -115,7 +115,7 @@ An alternative way to modify logging configurations is use the `logLevel` comman This allows someone to modify the configuration at runtime without needing to restart processes. This is not the recommended way as any modifications will not be persisted across restart, and causes a configuration mismatch between the running process and its `log4j.properties` file. -See the [logLevel command documentation]({{ '/en/operation/User-CLI.html#loglevel' | relativize_url }}) +See the [logLevel command documentation]({{ '/en/operation/User-CLI.html#conf-log' | relativize_url }}) for the command options. For example, the following command sets the logger level of the class `alluxio.underfs.hdfs.HdfsUnderFileSystem` to diff --git a/docs/en/operation/User-CLI.md b/docs/en/operation/User-CLI.md index 58575329cee2..f9c323f59a75 100644 --- a/docs/en/operation/User-CLI.md +++ b/docs/en/operation/User-CLI.md @@ -3,737 +3,861 @@ layout: global title: User Command Line Interface --- -Alluxio's command line interface provides users with basic file system operations. You can invoke -the following command line utility to get all the subcommands: +{% comment %} +This is a generated file created by running command "bin/alluxio generate user-cli" +The command parses the golang command definitions and descriptions to generate the markdown in this file +{% endcomment %} +Alluxio's command line interface provides user access to various operations, such as: +- Start or stop processes +- Filesystem operations +- Administrative commands + +Invoke the executable to view the possible subcommands: ```shell $ ./bin/alluxio -Usage: alluxio [COMMAND] - [format [-s]] - [getConf [key]] - [logLevel] - [runTests] - ... -``` +Usage: + bin/alluxio [command] -Alluxio shell users can put JVM system properties `-Dproperty=value` after the `fs` command and -before the subcommand to specify Alluxio user properties from the command line. -For example, the following Alluxio shell command sets the write type to `CACHE_THROUGH` when copying -files to Alluxio: +Available Commands: + cache Worker-related file system and format operations. + conf Get, set, and validate configuration settings, primarily those defined in conf/alluxio-site.properties + exec Run the main method of an Alluxio class, or end-to-end tests on an Alluxio cluster. + fs Operations to interface with the Alluxio filesystem + generate Generate files used in documentation + help Help about any command + info Retrieve and/or display info about the running Alluxio cluster + init Initialization operations such as format and validate + job Command line tool for interacting with the job service. + journal Journal related operations + process Start or stop cluster processes -```shell -$ ./bin/alluxio fs -Dalluxio.user.file.writetype.default=CACHE_THROUGH \ - copyFromLocal README.md /README.md -``` +Flags: + --debug-log True to enable debug logging -Note that, as a part of Alluxio deployment, the Alluxio shell will also take the configuration in -`${ALLUXIO_HOME}/conf/alluxio-site.properties` when it is run from Alluxio installation at -`${ALLUXIO_HOME}`. +Use "bin/alluxio [command] --help" for more information about a command. -## General operations +``` -This section lists usages and examples of general Alluxio operations +To set JVM system properties as part of the command, set the `-D` flag in the form of `-Dproperty=value`. -### format +To attach debugging java options specified by `$ALLUXIO_USER_ATTACH_OPTS`, set the `--attach-debug` flag -The `format` command formats the Alluxio master and all its workers. +Note that, as a part of Alluxio deployment, the Alluxio shell will also take the configuration in `${ALLUXIO_HOME}/conf/alluxio-site.properties` when it is run from Alluxio installation at `${ALLUXIO_HOME}`. -If `-s` specified, only format if under storage is local and does not already exist +## cache +Worker-related file system and format operations. -Running this command on an existing Alluxio cluster deletes everything persisted in Alluxio, -including cached data and any metadata information. -Data in under storage will not be changed. +### cache format +Usage: `bin/alluxio cache format` + +The format command formats the Alluxio worker on this host. +This deletes all the cached data stored by the worker. Data in the under storage will not be changed. -> Warning: `format` is required when you run Alluxio for the first time. -`format` should only be called while the cluster is not running. +> Warning: Format should only be called when the worker is not running +Examples: ```shell -$ ./bin/alluxio format -$ ./bin/alluxio format -s +# Format worker +$ ./bin/alluxio cache format ``` -### journal - -The `journal` command manages the Alluxio master journal on this host. -The Alluxio master stores various forms of metadata, including: -- file system operations -- where files are located on workers -- journal transactions -- under storage file metadata -There are two operations that can be performed on the journal: -- `format`: formats the journal -- `read`: read an Alluxio journal file to a human-readable version +### cache free +Usage: `bin/alluxio cache free [flags]` -For `journal format`, all this information is deleted if `journal format` is run. +Synchronously free cached files along a path or held by a specific worker -> Warning: `journal format` should only be called while the cluster is not running. +Flags: +- `--path`: The file or directory to free (Default: "") +- `--worker`: The worker to free (Default: "") +Examples: ```shell -$ ./bin/alluxio journal format -``` - -For `journal read`, the journal file is read and printed to the console. -```shell -$ ./bin/alluxio journal read +# Free a file by its path +$ ./bin/alluxio cache free --path /path/to/file ``` -### init - -The `init format` command formats the Alluxio masters/workers. -This operation deletes all the information stored in Alluxio. -Data in under storage will not be changed. - -> Warning: `init format` should only be called while the cluster is not running. - ```shell -$ ./bin/alluxio init format [flags] +# Free files on a worker +$ ./bin/alluxio cache free --worker ``` -### fs -See [File System Operations](#file-system-operations). +## conf +Get, set, and validate configuration settings, primarily those defined in conf/alluxio-site.properties -### getConf +### conf get +Usage: `bin/alluxio conf get [key] [flags]` -The `getConf` command prints the configured value for the given key. +The get command prints the configured value for the given key. If the key is invalid, it returns a nonzero exit code. -If the key is valid but isn't set, an empty string is printed. +If the key is valid but isn't set, an empty string is printed. If no key is specified, the full configuration is printed. -**Options:** +> Note: This command does not require the Alluxio cluster to be running. -* `--master` option prints any configuration properties used by the master. -* `--source` option prints the source of the configuration properties. -* `--unit ` option displays the configuration value in the given unit. -For example, with `--unit KB`, a configuration value of `4096B` returns as `4`, -and with `--unit S`, a configuration value of `5000ms` returns as `5`. -Possible unit options include B, KB, MB, GB, TP, PB as units of byte size and -MS, S, M, H, D as units of time. +Flags: +- `--master`: Show configuration properties used by the master (Default: false) +- `--source`: Show source of the configuration property instead of the value (Default: false) +- `--unit`: Unit of the value to return, converted to correspond to the given unit. +E.g., with "--unit KB", a configuration value of "4096B" will return 4 +Possible options include B, KB, MB, GB, TP, PB, MS, S, M, H, D (Default: "") +Examples: ```shell # Display all the current node configuration -$ ./bin/alluxio getConf +$ ./bin/alluxio conf get ``` ```shell # Display the value of a property key -$ ./bin/alluxio getConf alluxio.master.hostname +$ ./bin/alluxio conf get alluxio.master.hostname ``` ```shell # Display the configuration of the current running Alluxio leading master -$ ./bin/alluxio getConf --master +$ ./bin/alluxio conf get --master ``` ```shell # Display the source of the configuration -$ ./bin/alluxio getConf --source +$ ./bin/alluxio conf get --source ``` ```shell # Display the values in a given unit -$ ./bin/alluxio getConf --unit KB alluxio.user.block.size.bytes.default -$ ./bin/alluxio getConf --unit S alluxio.master.journal.flush.timeout +$ ./bin/alluxio conf get alluxio.user.block.size.bytes.default --unit KB +$ ./bin/alluxio conf get alluxio.master.journal.flush.timeout --unit S + ``` -> Note: This command does not require the Alluxio cluster to be running. -### logLevel +### conf log +Usage: `bin/alluxio conf log [flags]` -The `logLevel` command returns the current value of or updates the log level of a particular class -on specific instances. Users are able to change Alluxio server-side log levels at runtime. +The log command returns the current value of or updates the log level of a particular class on specific instances. +Users are able to change Alluxio server-side log levels at runtime. -The command follows the format `alluxio logLevel --logName=NAME [--target=] [--level=LEVEL]`, -where: -* `--logName ` indicates the logger's class (e.g. `alluxio.master.file.DefaultFileSystemMaster`) -* `--target ` lists the Alluxio master or workers to set. -The target could be of the form `` and multiple targets can be listed as comma-separated entries. -`role` can be one of `master|worker|job_master|job_worker`. Using the `role` option is useful when an Alluxio process -is configured to use a non-standard web port (e.g. if an Alluxio master does not use 19999 as its web port). +The --target flag specifies which processes to apply the log level change to. +The target could be of the form and multiple targets can be listed as comma-separated entries. +The role can be one of master,worker,job_master,job_worker. +Using the role option is useful when an Alluxio process is configured to use a non-standard web port (e.g. if an Alluxio master does not use 19999 as its web port). The default target value is the primary master, primary job master, all workers and job workers. -* `--level ` If provided, the command changes to the given logger level, -otherwise it returns the current logger level. - -See [here]({{ '/en/operation/Logging.html#modifying-server-logging-at-runtime' | relativize_url }}) -for more examples. > Note: This command requires the Alluxio cluster to be running. -> You are not able to set the logger level on the standby masters. -> The standby masters/job masters do not have a running web server. -> So they are not accepting the requests from this command. -> If you want to modify the logger level for standby masters, -> update the `log4j.properties` and restart the process. - -### readJournal -The `readJournal` command parses the current journal and outputs a human readable version to the local folder. -Note this command may take a while depending on the size of the journal. -Note that Alluxio master is required to stop before reading the local embedded journal. +Flags: +- `--level`: If specified, sets the specified logger at the given level (Default: "") +- `--name`: (Required) Logger name (ex. alluxio.master.file.DefaultFileSystemMaster) +- `--target`: A target name among . Defaults to master,workers,job_master,job_workers (Default: []) -* `-help` provides detailed guidance. -* `-start ` the start log sequence number (exclusive). (Default: `0`) -* `-end ` the end log sequence number (exclusive). (Default: `+inf`) -* `-inputDir ` the input directory on-disk to read journal content from. (Default: Read from system configuration) -* `-outputDir ` the output directory to write journal content to. (Default: journal_dump-${timestamp}) -* `-master ` (advanced) the name of the master (e.g. FileSystemMaster, BlockMaster). (Default: "FileSystemMaster") +Examples: +```shell +# Set DEBUG level for DefaultFileSystemMaster class on master processes +$ ./bin/alluxio conf log --logName alluxio.master.file.DefaultFileSystemMaster --target=master --level=DEBUG +``` ```shell -$ ./bin/alluxio readJournal +# Set WARN level for PagedDoraWorker class on the worker process on host myHostName +$ ./bin/alluxio conf log --logName alluxio.worker.dora.PagedDoraWorker.java --target=myHostName:worker --level=WARN -Dumping journal of type EMBEDDED to /Users/alluxio/journal_dump-1602698211916 -2020-10-14 10:56:51,960 INFO RaftStorageDirectory - Lock on /Users/alluxio/alluxio/journal/raft/02511d47-d67c-49a3-9011-abb3109a44c1/in_use.lock acquired by nodename 78602@alluxio-user -2020-10-14 10:56:52,254 INFO RaftJournalDumper - Read 223 entries from log /Users/alluxio/alluxio/journal/raft/02511d47-d67c-49a3-9011-abb3109a44c1/current/log_0-222. ``` -> Note: This command requires that the Alluxio cluster is **NOT** running. -### killAll +## exec +Run the main method of an Alluxio class, or end-to-end tests on an Alluxio cluster. -The `killAll` command kills all processes containing the specified word. -> Note: This kills non-Alluxio processes as well. +### exec basicIOTest +Usage: `bin/alluxio exec basicIOTest [flags]` -### copyDir +Run all end-to-end tests or a specific test, on an Alluxio cluster. -The `copyDir` command copies the directory at `PATH` to all master nodes listed in `conf/masters` -and all worker nodes listed in `conf/workers`. +Flags: +- `--directory`: Alluxio path for the tests working directory. Default: / (Default: "") +- `--operation`: The operation to test, either BASIC or BASIC_NON_BYTE_BUFFER. +By default both operations are tested. (Default: "") +- `--readType`: The read type to use, one of NO_CACHE, CACHE, CACHE_PROMOTE. +By default all readTypes are tested. (Default: "") +- `--workers`: Alluxio worker addresses to run tests on. +If not specified, random ones will be used. (Default: "") +- `--writeType`: The write type to use, one of MUST_CACHE, CACHE_THROUGH, THROUGH. +By default all writeTypes are tested. (Default: "") +Examples: ```shell -$ ./bin/alluxio copyDir conf/alluxio-site.properties +# Run all permutations of IO tests +$ ./bin/alluxio exec basicIOTest ``` -> Note: This command does not require the Alluxio cluster to be running. - -### clearCache +```shell +# Run a specific permutation of the IO tests +$ ./bin/alluxio exec basicIOtest --operation BASIC --readType NO_CACHE --writeType THROUGH +``` -The `clearCache` command drops the OS buffer cache. -> Note: This command does not require the Alluxio cluster to be running. +### exec class +Usage: `bin/alluxio exec class [flags]` -### docGen +Run the main method of an Alluxio class. -The `docGen` command autogenerates documentation based on the current source code. +Flags: +- `--jar`: Determine a JAR file to run. (Default: "") +- `--m`: Determine a module to run. (Default: "") -Usage: `docGen [--metric] [--conf]` -* `--metric` flag indicates to generate Metric docs -* `--conf` flag indicates to generate Configuration docs +### exec hdfsMountTest +Usage: `bin/alluxio exec hdfsMountTest [flags]` -Supplying neither flag will default to generating both docs. +Tests runs a set of validations against the given hdfs path. -> Note: This command does not require the Alluxio cluster to be running. +Flags: +- `--option`: options associated with this mount point. (Default: "") +- `--path`: (Required) specifies the HDFS path you want to validate. +- `--readonly`: mount point is readonly in Alluxio. (Default: false) +- `--shared`: mount point is shared. (Default: false) -### version +### exec ufsIOTest +Usage: `bin/alluxio exec ufsIOTest [flags]` -The `version` command prints Alluxio version. +A benchmarking tool for the I/O between Alluxio and UFS. +This test will measure the I/O throughput between Alluxio workers and the specified UFS path. +Each worker will create concurrent clients to first generate test files of the specified size then read those files. +The write/read I/O throughput will be measured in the process. -Usage: `version --revision [revision_length]` -* `-r,--revision [revision_length]` Prints the git revision along with the Alluxio version. Optionally specify the revision length. +Flags: +- `--cluster`: specifies the benchmark is run in the Alluxio cluster. +If not specified, this benchmark will run locally. (Default: false) +- `--cluster-limit`: specifies how many Alluxio workers to run the benchmark concurrently. +If >0, it will only run on that number of workers. +If 0, it will run on all available cluster workers. +If <0, will run on the workers from the end of the worker list. +This flag is only used if --cluster is enabled. (Default: 0) +- `--io-size`: specifies the amount of data each thread writes/reads. (Default: "") +- `--java-opt`: The java options to add to the command line to for the task. +This can be repeated. The options must be quoted and prefixed with a space. +For example: --java-opt " -Xmx4g" --java-opt " -Xms2g". (Default: []) +- `--path`: (Required) specifies the path to write/read temporary data in. +- `--threads`: specifies the number of threads to concurrently use on each worker. (Default: 4) +Examples: ```shell -$ ./bin/alluxio version +# This runs the I/O benchmark to HDFS in your process locally +$ ./bin/alluxio runUfsIOTest --path hdfs:// ``` -> Note: This command does not require the Alluxio cluster to be running. - -### validateConf - -The `validateConf` command validates the local Alluxio configuration files, checking for common misconfigurations. +```shell +# This invokes the I/O benchmark to HDFS in the Alluxio cluster +# 1 worker will be used. 4 threads will be created, each writing then reading 4G of data +$ ./bin/alluxio runUfsIOTest --path hdfs:// --cluster --cluster-limit 1 +``` ```shell -$ ./bin/alluxio validateConf +# This invokes the I/O benchmark to HDFS in the Alluxio cluster +# 2 workers will be used +# 2 threads will be created on each worker +# Each thread is writing then reading 512m of data +$ ./bin/alluxio runUfsIOTest --path hdfs:// --cluster --cluster-limit 2 --io-size 512m --threads 2 ``` -> Note: This command does not require the Alluxio cluster to be running. -### collectInfo +### exec ufsTest +Usage: `bin/alluxio exec ufsTest [flags]` -The `collectInfo` command collects information to troubleshoot an Alluxio cluster. -For more information see the [collectInfo command page]({{ '/en/reference/Troubleshooting.html#alluxio-collectinfo-command' | relativize_url }}). +Test the integration between Alluxio and the given UFS to validate UFS semantics -> Note: This command does not require the Alluxio cluster to be running. -> But if the cluster is not running, this command will fail to gather some information from it. +Flags: +- `--path`: (Required) the full UFS path to run tests against. +- `--test`: Test name, this option can be passed multiple times to indicate multipleZ tests (Default: []) -## File System Operations +## fs +Operations to interface with the Alluxio filesystem +For commands that take Alluxio URIs as an argument such as ls or mkdir, the argument should be either +- A complete Alluxio URI, such as alluxio://:/ +- A path without its scheme header, such as /path, in order to use the default hostname and port set in alluxio-site.properties -```shell -$ ./bin/alluxio fs +> Note: All fs commands require the Alluxio cluster to be running. -Usage: alluxio fs [generic options] - [cat ] - [checkConsistency [-r] ] - ... -``` +Most of the commands which require path components allow wildcard arguments for ease of use. +For example, the command "bin/alluxio fs rm '/data/2014*'" deletes anything in the data directory with a prefix of 2014. -For `fs` subcommands that take Alluxio URIs as argument (e.g. `ls`, `mkdir`), the argument should -be either a complete Alluxio URI, such as `alluxio://:/`, -or a path without its header, such as `/`, to use the default hostname and port set in the -`conf/alluxio-site.properties`. +Some shells will attempt to glob the input paths, causing strange errors. +As a workaround, you can disable globbing (depending on the shell type; for example, set -f) or by escaping wildcards +For example, the command "bin/alluxio fs cat /\\*" uses the escape backslash character twice. +This is because the shell script will eventually call a java program which should have the final escaped parameters "cat /\\*". -> Note: This command requires the Alluxio cluster to be running. ->**Wildcard Input:** -> ->Most of the commands which require path components allow wildcard arguments for ease of use. For ->example: -> ->```shell ->$ ./bin/alluxio fs rm '/data/2014*' ->``` -> ->The example command deletes anything in the `data` directory with a prefix of `2014`. -> ->Note that some shells will attempt to glob the input paths, causing strange errors (Note: the ->number 21 could be different and comes from the number of matching files in your local ->filesystem): -> ->``` ->rm takes 1 arguments, not 21 ->``` -> ->As a workaround, you can disable globbing (depending on the shell type; for example, `set -f`) or by ->escaping wildcards, for example: -> ->```shell ->$ ./bin/alluxio fs cat /\\* ->``` -> ->Note the double escape; this is because the shell script will eventually call a java program ->which should have the final escaped parameters (`cat /\\*`). - -### cat - -The `cat` command prints the contents of a file in Alluxio to the shell. -If you wish to copy the file to your local file system, `copyToLocal` should be used. - -For example, when testing a new computation job, `cat` can be used as a quick way to check the output: +### fs cat +Usage: `bin/alluxio fs cat [path]` + +The cat command prints the contents of a file in Alluxio to the shell. +Examples: ```shell +# Print the contents of /output/part-00000 $ ./bin/alluxio fs cat /output/part-00000 ``` -### checkConsistency - -The `checkConsistency` command compares Alluxio and under storage metadata for a given path. -If the path is a directory, the entire subtree will be compared. -The command returns a message listing each inconsistent file or directory. -The system administrator should reconcile the differences of these files at their discretion. -To avoid metadata inconsistencies between Alluxio and under storages, -design your systems to modify files and directories through Alluxio -and avoid directly modifying the under storage. -If the `-r` option is used, the `checkConsistency` command will repair all inconsistent files and -directories under the given path. -If an inconsistent file or directory exists only in under storage, its metadata will be added to Alluxio. -If an inconsistent file exists in Alluxio and its data is fully present in Alluxio, -its metadata will be loaded to Alluxio again. +### fs check-cached +Usage: `bin/alluxio fs check-cached [path] [flags]` -If the `-t ` option is specified, the provided number of threads will be used when -repairing consistency. Defaults to the number of CPU cores available, -* This option has no effect if `-r` is not specified +Checks if files under a path have been cached in alluxio. -NOTE: This command requires a read lock on the subtree being checked, meaning writes and updates -to files or directories in the subtree cannot be completed until this command completes. +Flags: +- `--limit`: Limit number of files to check (Default: 1000) +- `--sample`: Sample ratio, 10 means sample 1 in every 10 files. (Default: 1) -For example, `checkConsistency` can be used to periodically validate the integrity of the namespace. +### fs checksum +Usage: `bin/alluxio fs checksum [path]` -```shell -# List each inconsistent file or directory -$ ./bin/alluxio fs checkConsistency / -``` - -```shell -# Repair the inconsistent files or directories -$ ./bin/alluxio fs checkConsistency -r / -``` - -### checksum - -The `checksum` command outputs the md5 value of a file in Alluxio. - -For example, `checksum` can be used to verify the contents of a file stored in Alluxio. +The checksum command outputs the md5 value of a file in Alluxio. +This can be used to verify the contents of a file stored in Alluxio. +Examples: ```shell +# Compare the checksum values +# value from Alluxio filesystem $ ./bin/alluxio fs checksum /LICENSE - md5sum: bf0513403ff54711966f39b058e059a3 +# value from local filesystem md5 LICENSE MD5 (LICENSE) = bf0513403ff54711966f39b058e059a3 ``` -### chgrp -The `chgrp` command changes the group of the file or directory in Alluxio. -Alluxio supports file authorization with Posix file permission. -Group is an authorizable entity in Posix file permissions model. -The file owner or super user can execute this command to change the group of the file or directory. +### fs chgrp +Usage: `bin/alluxio fs chgrp [group] [path] [flags]` -Adding `-R` option also changes the group of child file and child directory recursively. +The chgrp command changes the group of the file or directory in Alluxio. +Alluxio supports file authorization with POSIX file permissions. +The file owner or superuser can execute this command. -For example, `chgrp` can be used as a quick way to change the group of file: +Flags: +- `--recursive`,`-R`: change the group recursively for all files and directories under the given path (Default: false) +Examples: ```shell +# Change the group of a file $ ./bin/alluxio fs chgrp alluxio-group-new /input/file1 ``` -### chmod -The `chmod` command changes the permission of file or directory in Alluxio. -Currently, octal mode is supported: the numerical format accepts three octal digits -which refer to permissions for the file owner, the group and other users. -Here is number-permission mapping table: +### fs chmod +Usage: `bin/alluxio fs chmod [mode] [path] [flags]` - - - {% for item in site.data.table.chmod-permission %} - - - - - - {% endfor %} -
NumberPermissionrwx
{{ item.number }}{{ item.permission }}{{ item.rwx }}
+The chmod command changes the permission of a file or directory in Alluxio. +The permission mode is represented as an octal 3 digit value. +Refer to https://en.wikipedia.org/wiki/Chmod#Numerical_permissions for a detailed description of the modes. -Adding `-R` option also changes the permission of child file and child directory recursively. - -For example, `chmod` can be used as a quick way to change the permission of file: +Flags: +- `--recursive`,`-R`: change the permission recursively for all files and directories under the given path (Default: false) +Examples: ```shell +# Set mode 755 for /input/file $ ./bin/alluxio fs chmod 755 /input/file1 ``` -### chown -The `chown` command changes the owner of the file or directory in Alluxio. -For security reasons, the ownership of a file can only be altered by a super user. +### fs chown +Usage: `bin/alluxio fs chown [:] [flags]` -For example, `chown` can be used as a quick way to change the owner of file: +The chown command changes the owner of a file or directory in Alluxio. +The ownership of a file can only be altered by a superuser +Flags: +- `--recursive`,`-R`: change the owner recursively for all files and directories under the given path (Default: false) + +Examples: ```shell +# Change the owner of /input/file1 to alluxio-user $ ./bin/alluxio fs chown alluxio-user /input/file1 -$ ./bin/alluxio fs chown alluxio-user:alluxio-group /input/file2 ``` -Adding `-R` option also changes the owner of child file and child directory recursively. -### copyFromLocal +### fs consistent-hash +Usage: `bin/alluxio fs consistent-hash [--create]|[--compare <1stCheckFilePath> <2ndCheckFilePath>]|[--clean] [flags]` -The `copyFromLocal` command copies the contents of a file in the local file system into Alluxio. -If the node you run the command from has an Alluxio worker, the data will be available on that worker. -Otherwise, the data will be copied to a random remote node running an Alluxio worker. -If a directory is specified, the directory and all its contents will be copied recursively -(parallel at file level up to the number of available threads). +This command is for checking whether the consistent hash ring is changed or not -Usage: `copyFromLocal [--thread ] [--buffersize ] ` -* `--thread ` (optional) Number of threads used to copy files in parallel, default value is CPU cores * 2 -* `--buffersize ` (optional) Read buffer size in bytes, default is 8MB when copying from local and 64MB when copying to local -* `` file or directory path on the local filesystem -* `` file or directory path on the Alluxio filesystem +Flags: +- `--clean`: Delete generated check data (Default: false) +- `--compare`: Compare check files to see if the hash ring has changed (Default: false) +- `--create`: Generate check file (Default: false) -For example, `copyFromLocal` can be used as a quick way to inject data into the system for processing: +### fs cp +Usage: `bin/alluxio fs cp [srcPath] [dstPath] [flags]` -```shell -$ ./bin/alluxio fs copyFromLocal /local/data /input -``` +Copies a file or directory in the Alluxio filesystem or between local and Alluxio filesystems. +The file:// scheme indicates a local filesystem path and the alluxio:// scheme or no scheme indicates an Alluxio filesystem path. -### copyToLocal +Flags: +- `--buffer-size`: Read buffer size when coping to or from local, with defaults of 64MB and 8MB respectively (Default: "") +- `--preserve`,`-p`: Preserve file permission attributes when copying files; all ownership, permissions, and ACLs will be preserved (Default: false) +- `--recursive`,`-R`: True to copy the directory subtree to the destination directory (Default: false) +- `--thread`: Number of threads used to copy files in parallel, defaults to 2 * CPU cores (Default: 0) -The `copyToLocal` command copies a file in Alluxio to the local file system. -If a directory is specified, the directory and all its contents will be copied recursively. +Examples: +```shell +# Copy within the Alluxio filesystem +$ ./bin/alluxio fs cp /file1 /file2 +``` -Usage: `copyToLocal [--buffersize ] ` -* `--buffersize ` (optional) file transfer buffer size in bytes -* `` file or directory path on the Alluxio filesystem -* `` file or directory path on the local filesystem +```shell +# Copy a local file to the Alluxio filesystem +$ ./bin/alluxio fs cp file:///file1 /file2 +``` -For example, `copyToLocal` can be used as a quick way to download output data -for additional investigation or debugging. +```shell +# Copy a file in Alluxio to local +$ ./bin/alluxio fs cp alluxio:///file1 file:///file2 +``` ```shell -$ ./bin/alluxio fs copyToLocal /output/part-00000 part-00000 -$ wc -l part-00000 +# Recursively copy a directory within the Alluxio filesystem +$ ./bin/alluxio fs cp -R /dir1 /dir2 + ``` -### head -The `head` command prints the first 1 KB of data in a file to the shell. +### fs head +Usage: `bin/alluxio fs head [path] [flags]` -Using the `-c [bytes]` option will print the first `n` bytes of data to the shell. +The head command prints the first 1KB of data of a file to the shell. +Specifying the -c flag sets the number of bytes to print. +Flags: +- `--bytes`,`-c`: Byte size to print (Default: "") + +Examples: ```shell +# Print first 2048 bytes of a file $ ./bin/alluxio fs head -c 2048 /output/part-00000 ``` -### help -The `help` command prints the help message for a given `fs` subcommand. -If the given command does not exist, it prints help messages for all supported subcommands. +### fs location +Usage: `bin/alluxio fs location [path]` + +Displays the list of hosts storing the specified file. + +### fs ls +Usage: `bin/alluxio fs ls [path] [flags]` + +The ls command lists all the immediate children in a directory and displays the file size, last modification time, and in memory status of the files. +Using ls on a file will only display the information for that specific file. + +The ls command will also load the metadata for any file or immediate children of a directory from the under storage system to Alluxio namespace if it does not exist in Alluxio. +It queries the under storage system for any file or directory matching the given path and creates a mirror of the file in Alluxio backed by that file. +Only the metadata, such as the file name and size, are loaded this way and no data transfer occurs. + +Flags: +- `--help`: help for this command (Default: false) +- `--human-readable`,`-h`: Print sizes in human readable format (Default: false) +- `--list-dir-as-file`,`-d`: List directories as files (Default: false) +- `--load-metadata`,`-f`: Force load metadata for immediate children in a directory (Default: false) +- `--omit-mount-info`,`-m`: Omit mount point related information such as the UFS path (Default: false) +- `--pinned-files`,`-p`: Only show pinned files (Default: false) +- `--recursive`,`-R`: List subdirectories recursively (Default: false) +- `--reverse`,`-r`: Reverse sorted order (Default: false) +- `--sort`: Sort entries by column, one of {creationTime|inMemoryPercentage|lastAccessTime|lastModificationTime|name|path|size} (Default: "") +- `--timestamp`: Display specified timestamp of entry, one of {createdTime|lastAccessTime|lastModifiedTime} (Default: "") Examples: ```shell -# Print all subcommands -$ ./bin/alluxio fs help +# List and load metadata for all immediate children of /s3/data +$ ./bin/alluxio fs ls /s3/data ``` ```shell -# Print help message for ls -$ ./bin/alluxio fs help ls +# Force loading metadata of /s3/data +$ ./bin/alluxio fs ls -f /s3/data ``` -### leader -The `leader` command prints the current Alluxio leading master hostname. +### fs mkdir +Usage: `bin/alluxio fs mkdir [path1 path2 ...]` + +The mkdir command creates a new directory in the Alluxio filesystem. +It is recursive and will create any parent directories that do not exist. +Note that the created directory will not be created in the under storage system until a file in the directory is persisted to the underlying storage. +Using mkdir on an invalid or existing path will fail. +Examples: ```shell -$ ./bin/alluxio fs leader +# Creating a folder structure +$ ./bin/alluxio fs mkdir /users +$ ./bin/alluxio fs mkdir /users/Alice +$ ./bin/alluxio fs mkdir /users/Bob ``` -### load -The `load` command load data/metadata from the under storage system into Alluxio storage. -For example, `load` can be used to prefetch data for analytics jobs. -If `load` is run on a directory, files in the directory will be recursively loaded. -```shell -$ ./bin/alluxio fs load --submit [--metadata-only] -``` -**Options:** -* `--metadata-only` option specify whether loading metadata only +### fs mv +Usage: `bin/alluxio fs mv [srcPath] [dstPath]` -After submit the command, you can check the status by running the following +The mv command moves a file or directory to another path in Alluxio. +The destination path must not exist or be a directory. +If it is a directory, the file or directory will be placed as a child of the directory. +The command is purely a metadata operation and does not affect the data blocks of the file. + +Examples: ```shell -$ ./bin/alluxio fs load --progress [--format TEXT|JSON] [--verbose] +# Moving a file +$ ./bin/alluxio fs mv /data/2014 /data/archives/2014 ``` -And you would get the following output: + + +### fs rm +Usage: `bin/alluxio fs rm [path] [flags]` + +The rm command removes a file from Alluxio space and the under storage system. +The file will be unavailable immediately after this command returns, but the actual data may be deleted a while later. + +Flags: +- `--alluxio-only`: True to only remove data and metadata from Alluxio cache (Default: false) +- `--recursive`,`-R`: True to recursively remove files within the specified directory subtree (Default: false) +- `--skip-ufs-check`,`-U`: True to skip checking if corresponding UFS contents are in sync (Default: false) + +Examples: ```shell -Progress for loading path '/dir-99': - Settings: bandwidth: unlimited verify: false - Job State: SUCCEEDED - Files Processed: 1000 - Bytes Loaded: 125.00MB - Throughput: 2509.80KB/s - Block load failure rate: 0.00% - Files Failed: 0 +# Remove a file from Alluxio and the under storage system +$ ./bin/alluxio fs rm /tmp/unused-file ``` -**Options:** -* `--format` option specify output format. TEXT as default -* `--verbose` option output job details. ```shell -# If you want to stop the command, run the following -$ ./bin/alluxio fs load --stop +# Remove a file from Alluxio filesystem only +$ ./bin/alluxio fs rm --alluxio-only --skip-ufs-check /tmp/unused-file2 +# Note it is recommended to use both --alluxio-only and --skip-ufs-check together in this situation ``` -### ls -The `ls` command lists all the immediate children in a directory and displays the file size, last -modification time, and in memory status of the files. -Using `ls` on a file will only display the information for that specific file. - -The `ls` command will also load the metadata for any file or immediate children of a directory -from the under storage system to Alluxio namespace if it does not exist in Alluxio. -`ls` queries the under storage system for any file or directory matching the given path -and creates a mirror of the file in Alluxio backed by that file. -Only the metadata, such as the file name and size, are loaded this way and no data transfer occurs. +### fs stat +Usage: `bin/alluxio fs stat [flags]` -**Options:** +The stat command dumps the FileInfo representation of a file or a directory to the shell. -* `-d` option lists the directories as plain files. For example, `ls -d /` shows the attributes of root directory. -* `-f` option forces loading metadata for immediate children in a directory. -By default, it loads metadata only at the first time at which a directory is listed. -`-f` is equivalent to `-Dalluxio.user.file.metadata.sync.interval=0`. -* `-h` option displays file sizes in human-readable formats. -* `-p` option lists all pinned files. -* `-R` option also recursively lists child directories, displaying the entire subtree starting from the input path. -* `--sort` sorts the result by the given option. Possible values are size, creationTime, inMemoryPercentage, lastModificationTime, lastAccessTime and path. -* `-r` reverses the sorting order. -* `--timestamp` display the timestamp of the given option. Possible values are creationTime, lastModificationTime, and lastAccessTime. -The default option is lastModificationTime. -* `-m` option excludes mount point related information. - -For example, `ls` can be used to browse the file system. +Flags: +- `--file-id`: File id of file (Default: "") +- `--format`,`-f`: Display info in the given format: + "%N": name of the file + "%z": size of file in bytes + "%u": owner + "%g": group name of owner + "%i": file id of the file + "%y": modification time in UTC in 'yyyy-MM-dd HH:mm:ss' format + "%Y": modification time as Unix timestamp in milliseconds + "%b": Number of blocks allocated for file + (Default: "") +- `--path`: Path to file or directory (Default: "") +Examples: ```shell -$ ./bin/alluxio fs mount /s3/data s3://data-bucket/ +# Display file's stat +$ ./bin/alluxio fs stat /data/2015/logs-1.txt ``` ```shell -# Loads metadata for all immediate children of /s3/data and lists them -$ ./bin/alluxio fs ls /s3/data/ +# Display directory's stat +$ ./bin/alluxio fs stat /data/2015 ``` ```shell -# Forces loading metadata -$ aws s3 cp /tmp/somedata s3://data-bucket/somedata -$ ./bin/alluxio fs ls -f /s3/data +# Display the size of file +$ ./bin/alluxio fs stat -f %z /data/2015/logs-1.txt ``` ```shell -# Files are not removed from Alluxio if they are removed from the UFS (s3 here) only -$ aws s3 rm s3://data-bucket/somedata -$ ./bin/alluxio fs ls -f /s3/data +# Find the file by fileID and display the stat, useful in troubleshooting +$ ./bin/alluxio fs stat -fileId 12345678 ``` -Metadata sync is an expensive operation. A rough estimation is metadata sync -on 1 million files will consume 2GB heap until the sync operation is complete. -Therefore, we recommend not using forced sync to avoid accidental repeated sync operations. -It is recommended to always specify a non-zero sync interval for metadata sync, so -even if the sync is repeatedly triggered, the paths that have just been sync-ed can be identified and skipped. +### fs tail +Usage: `bin/alluxio fs tail [path] [flags]` + +The tail command prints the last 1KB of data of a file to the shell. +Specifying the -c flag sets the number of bytes to print. + +Flags: +- `--bytes`: Byte size to print (Default: "") + +Examples: ```shell -# Should be avoided -$ ./bin/alluxio fs ls -f -R /s3/data -``` -```shell -# RECOMMENDED. This will not sync files repeatedly in 1 minute. -$ ./bin/alluxio fs ls -Dalluxio.user.file.metadata.sync.interval=1min -R /s3/data +# Print last 2048 bytes of a file +$ ./bin/alluxio fs tail -c 2048 /output/part-00000 ``` -### masterInfo -The `masterInfo` command prints information regarding master fault tolerance such as leader address, -list of master addresses, and the configured Zookeeper address. -If Alluxio is running in single master mode, `masterInfo` prints the master address. -If Alluxio is running in fault tolerance mode, the leader address, list of master addresses -and the configured Zookeeper address is printed. +### fs test +Usage: `bin/alluxio fs test [path] [flags]` -For example, `masterInfo` can be used to print information regarding master fault tolerance. +Test a property of a path, returning 0 if the property is true, or 1 otherwise -```shell -$ ./bin/alluxio fs masterInfo -``` +Flags: +- `--dir`,`-d`: Test if path is a directory (Default: false) +- `--exists`,`-e`: Test if path exists (Default: false) +- `--file`,`-f`: Test if path is a file (Default: false) +- `--not-empty`,`-s`: Test if path is not empty (Default: false) +- `--zero`,`-z`: Test if path is zero length (Default: false) -### mkdir +### fs touch +Usage: `bin/alluxio fs touch [path]` -The `mkdir` command creates a new directory in Alluxio space. -It is recursive and will create any nonexistent parent directories. -Note that the created directory will not be created in the under storage system -until a file in the directory is persisted to the underlying storage. -Using `mkdir` on an invalid or existing path will fail. +Create a 0 byte file at the specified path, which will also be created in the under file system -For example, `mkdir` can be used by an admin to set up the basic folder structures. +## generate +Generate files used in documentation -```shell -$ ./bin/alluxio fs mkdir /users -$ ./bin/alluxio fs mkdir /users/Alice -$ ./bin/alluxio fs mkdir /users/Bob -``` +### generate doc-tables +Usage: `bin/alluxio generate doc-tables` -### mv +Generate all documentation files -The `mv` command moves a file or directory to another path in Alluxio. -The destination path must not exist or be a directory. -If it is a directory, the file or directory will be placed as a child of the directory. -`mv` is purely a metadata operation and does not affect the data blocks of the file. -`mv` cannot be done between mount points of different under storage systems. +### generate doc-tables +Usage: `bin/alluxio generate doc-tables` -For example, `mv` can be used to re-organize your files. +Generate configuration and metric tables used in documentation -```shell -$ ./bin/alluxio fs mv /data/2014 /data/archives/2014 -``` +### generate user-cli +Usage: `bin/alluxio generate user-cli [flags]` -### rm +Generate content for `operation/User-CLI.md` -The `rm` command removes a file from Alluxio space and the under storage system. -The file will be unavailable immediately after this command returns, -but the actual data may be deleted a while later. +Flags: +- `--help`,`-h`: help for user-cli (Default: false) -* Adding `-R` option deletes all contents of the directory and the directory itself. -* Adding `-U` option skips the check for whether the UFS contents being deleted are in-sync with Alluxio -before attempting to delete persisted directories. We recommend always using the `-U` option for the best performance and resource efficiency. -* Adding `--alluxioOnly` option removes data and metadata from Alluxio space only. -The under storage system will not be affected. +## info +Retrieve and/or display info about the running Alluxio cluster -```shell -# Remove a file from Alluxio space and the under storage system -$ ./bin/alluxio fs rm /tmp/unused-file -``` +### info cache +Usage: `bin/alluxio info cache [flags]` -```shell -# Remove a file from Alluxio space only -$ ./bin/alluxio fs rm --alluxioOnly /tmp/unused-file2 -``` +Reports worker capacity information -When deleting only from Alluxio but leaving the files in UFS, we recommend using `-U` and `-Dalluxio.user.file.metadata.sync.interval=-1` -to skip the metadata sync and the UFS check. This will save time and memory consumption on the Alluxio master. -```shell -$ bin/alluxio fs rm -R -U --alluxioOnly -Dalluxio.user.file.metadata.sync.interval=-1 /dir -``` +Flags: +- `--live`: Only show live workers for capacity report (Default: false) +- `--lost`: Only show lost workers for capacity report (Default: false) +- `--worker`: Only show specified workers for capacity report, labeled by hostname or IP address (Default: []) + +### info collect +Usage: `bin/alluxio info collect [command] [flags]` + +Collects information such as logs, config, metrics, and more from the running Alluxio cluster and bundle into a single tarball + +[command] must be one of the following values: +- all: runs all the commands below +- cluster: runs a set of Alluxio commands to collect information about the Alluxio cluster +- conf: collects the configuration files under ${ALLUXIO_HOME}/config/ +- env: runs a set of linux commands to collect information about the cluster +- jvm: collects jstack from the JVMs +- log: collects the log files under ${ALLUXIO_HOME}/logs/ +- metrics: collects Alluxio system metrics + +> WARNING: This command MAY bundle credentials. Inspect the output tarball for any sensitive information and remove it before sharing with others. + +Flags: +- `--additional-logs`: Additional file name prefixes from ${ALLUXIO_HOME}/logs to include in the tarball, inclusive of the default log files (Default: []) +- `--end-time`: Logs that do not contain entries before this time will be ignored, format must be like 2006-01-02T15:04:05 (Default: "") +- `--exclude-logs`: File name prefixes from ${ALLUXIO_HOME}/logs to exclude; this is evaluated after adding files from --additional-logs (Default: []) +- `--exclude-worker-metrics`: True to skip worker metrics collection (Default: false) +- `--include-logs`: File name prefixes from ${ALLUXIO_HOME}/logs to include in the tarball, ignoring the default log files; cannot be used with --exclude-logs or --additional-logs (Default: []) +- `--local`: True to only collect information from the local machine (Default: false) +- `--max-threads`: Parallelism of the command; use a smaller value to limit network I/O when transferring tarballs (Default: 1) +- `--output-dir`: (Required) Output directory to write collect info tarball to +- `--start-time`: Logs that do not contain entries after this time will be ignored, format must be like 2006-01-02T15:04:05 (Default: "") + +### info doctor +Usage: `bin/alluxio info doctor [type]` + +Runs doctor configuration or storage command + +### info nodes +Usage: `bin/alluxio info nodes` + +Show all registered workers' status + +### info report +Usage: `bin/alluxio info report [arg] [flags]` + +Reports Alluxio running cluster information +[arg] can be one of the following values: + jobservice: job service metrics information + metrics: metrics information + summary: cluster summary + ufs: under storage system information + +Defaults to summary if no arg is provided + + +Flags: +- `--format`: Set output format, any of [json, yaml] (Default: "") + +### info version +Usage: `bin/alluxio info version` -When deleting a large directory (with millions of files) recursively both from Alluxio and UFS, -the operation is expensive. +Print Alluxio version. -We recommend doing the deletion in the following way: -1. Perform a direct sanity check against the UFS path with the corresponding file system API -or CLI to make sure everything can be deleted safely. -For example if the UFS is HDFS, use `hdfs dfs -ls -R /dir` to list the UFS files and check. -We do not recommend doing this sanity check from Alluxio using a command like `alluxio fs ls -R -f /dir`, -because the loaded file metadata will be deleted anyway, and the expensive metadata sync operation -will essentially be wasted. +## init +Initialization operations such as format and validate -2. Issue the deletion from Alluxio to delete files from both Alluxio and the UFS: +### init clear-os-cache +Usage: `bin/alluxio init clear-os-cache` + +The clear-os-cache command drops the OS buffer cache + +### init copy-dir +Usage: `bin/alluxio init copy-dir [path]` + +The copy-dir command copies the directory at given path to all master nodes listed in conf/masters and all worker nodes listed in conf/workers. + +> Note: This command does not require the Alluxio cluster to be running. + +Examples: ```shell -# Disable the sync and skip the UFS check, to reduce memory consumption on the master side -$ bin/alluxio fs rm -R -U -Dalluxio.user.file.metadata.sync.interval=-1 /dir +# copy alluxio-site properties file to all nodes +$ ./bin/alluxio init copy-dir conf/alluxio-site.properties ``` -Per 1 million files deleted, the memory overhead can be estimated as follows: -* If both metadata sync and UFS check are disabled, recursively deleting from Alluxio only will hold 2GB JVM heap memory until the deletion completes. -* If files are also deleted from UFS, there will not be extra heap consumption but the operation will take longer to complete. -* If metadata sync is enabled, there will be another around 2GB overhead on the JVM heap until the operation completes. -* If UFS check is enabled, there will another around 2GB overhead on the JVM heap until the operation completes. -Using this example as a guideline, estimate the total additional memory overhead as a proportion to the number of files to be deleted. -Ensure that the leading master has sufficient available heap memory to perform the operation before issuing a large recursive delete command. -A general good practice is to break deleting a large directory into deleting each individual children directories. +### init format +Usage: `bin/alluxio init format [flags]` +The format command formats the Alluxio master and all its workers. -### stat +Running this command on an existing Alluxio cluster deletes everything persisted in Alluxio, including cached data and any metadata information. +Data in under storage will not be changed. + +> Warning: Formatting is required when you run Alluxio for the first time. +It should only be called while the cluster is not running. + + +Flags: +- `--localFileSystem`,`-s`: If specified, only format if underfs is local and doesn't already exist (Default: false) -The `stat` command dumps the FileInfo representation of a file or a directory to the shell. -It is primarily intended to assist power users in debugging their system. -Generally viewing the file info in the UI will be easier to understand. +### init validate +Usage: `bin/alluxio init validate [flags]` -One can specify `-f ` to display info in given format: -* `%N`: name of the file -* `%z`: size of file in bytes -* `%u`: owner -* `%g`: group name of owner -* `%y` or `%Y`: modification time, where `%y` shows the UTC date in the form `yyyy-MM-dd HH:mm:ss` - and `%Y` shows the number of milliseconds since January 1, 1970 UTC -* `%b`: Number of blocks allocated for file -* `%i`: file ID(inode ID) of the file +Validate Alluxio configuration or environment -For example, `stat` can be used to debug the block locations of a file. -This is useful when trying to achieve locality for compute workloads. +Flags: +- `--type`: Decide the type to validate. Valid inputs: [conf, env] (Default: "") +Examples: ```shell -# Display file's stat -$ ./bin/alluxio fs stat /data/2015/logs-1.txt +# Validate configuration +$ ./bin/alluxio init validate --type conf ``` ```shell -# Display directory's stat -$ ./bin/alluxio fs stat /data/2015 +# Validate environment +$ ./bin/alluxio init validate --type env ``` + +## job +Command line tool for interacting with the job service. + +### job load +Usage: `bin/alluxio job load [flags]` + +The load command moves data from the under storage system into Alluxio storage. +For example, load can be used to prefetch data for analytics jobs. +If load is run on a directory, files in the directory will be recursively loaded. + +Flags: +- `--bandwidth`: [submit] Single worker read bandwidth limit (Default: "") +- `--format`: [progress] Format of output, either TEXT or JSON (Default: "") +- `--metadata-only`: [submit] Only load file metadata (Default: false) +- `--partial-listing`: [submit] Use partial directory listing, initializing load before reading the entire directory but cannot report on certain progress details (Default: false) +- `--path`: (Required) [all] Source path of load operation +- `--progress`: View progress of submitted job (Default: false) +- `--skip-if-exists`: [submit] Skip existing fullly cached files (Default: false) +- `--stop`: Stop running job (Default: false) +- `--submit`: Submit job (Default: false) +- `--verbose`: [progress] Verbose output (Default: false) +- `--verify`: [submit] Run verification when load finishes and load new files if any (Default: false) + +Examples: ```shell -# Display the size of file -$ ./bin/alluxio fs stat -f %z /data/2015/logs-1.txt +# Submit a load job +$ ./bin/alluxio job load --path /path --submit ``` ```shell -# Find the file by fileID/inodeID and display the stat, useful in troubleshooting -$ ./bin/alluxio fs stat -fileId 12345678 +# View the progress of a submitted job +$ ./bin/alluxio job load --path /path --progress +# Example output +Progress for loading path '/path': + Settings: bandwidth: unlimited verify: false + Job State: SUCCEEDED + Files Processed: 1000 + Bytes Loaded: 125.00MB + Throughput: 2509.80KB/s + Block load failure rate: 0.00% + Files Failed: 0 +``` + +```shell +# Stop a submitted job +$ ./bin/alluxio job load --path /path --stop ``` -### tail -The `tail` command outputs the last 1 KB of data in a file to the shell. -Using the `-c [bytes]` option will print the last `n` bytes of data to the shell. +## journal +Journal related operations + +### journal format +Usage: `bin/alluxio journal format` + +The format command formats the local Alluxio master's journal. + +> Warning: Formatting should only be called while the cluster is not running. -For example, `tail` can be used to verify the output of a job is in the expected format -or contains expected values. +### journal read +Usage: `bin/alluxio journal read [flags]` +The read command parses the current journal and outputs a human readable version to the local folder. +This command may take a while depending on the size of the journal. +> Note: This command requies that the Alluxio cluster is NOT running. + +Flags: +- `--end`: end log sequence number (exclusive) (Default: -1) +- `--input-dir`: input directory on-disk to read the journal content from (Default: "") +- `--master`: name of the master class (Default: "") +- `--output-dir`: output directory to write journal content to (Default: "") +- `--start`: start log sequence number (inclusive) (Default: 0) + +Examples: ```shell -$ ./bin/alluxio fs tail /output/part-00000 +$ ./bin/alluxio readJournal +# output +Dumping journal of type EMBEDDED to /Users/alluxio/journal_dump-1602698211916 +2020-10-14 10:56:51,960 INFO RaftStorageDirectory - Lock on /Users/alluxio/alluxio/journal/raft/02511d47-d67c-49a3-9011-abb3109a44c1/in_use.lock acquired by nodename 78602@alluxio-user +2020-10-14 10:56:52,254 INFO RaftJournalDumper - Read 223 entries from log /Users/alluxio/alluxio/journal/raft/02511d47-d67c-49a3-9011-abb3109a44c1/current/log_0-222. ``` + + +## process +Start or stop cluster processes + +### process start +Usage: `bin/alluxio process start [flags]` + +Starts a single process locally or a group of similar processes across the cluster. +For starting a group, it is assumed the local host has passwordless SSH access to other nodes in the cluster. +The command will parse the hostnames to run on by reading the conf/masters and conf/workers files, depending on the process type. + +Flags: +- `--async`,`-a`: Asynchronously start processes without monitoring for start completion (Default: false) +- `--skip-kill-prev`,`-N`: Avoid killing previous running processes when starting (Default: false) + +### process stop +Usage: `bin/alluxio process stop [flags]` + +Stops a single process locally or a group of similar processes across the cluster. +For stopping a group, it is assumed the local host has passwordless SSH access to other nodes in the cluster. +The command will parse the hostnames to run on by reading the conf/masters and conf/workers files, depending on the process type. + +Flags: +- `--soft`,`-s`: Soft kill only, don't forcibly kill the process (Default: false) + diff --git a/docs/en/reference/Troubleshooting.md b/docs/en/reference/Troubleshooting.md index d53ff1208292..0c6feea4e91b 100644 --- a/docs/en/reference/Troubleshooting.md +++ b/docs/en/reference/Troubleshooting.md @@ -101,9 +101,9 @@ Both of them mask credential properties. The difference is the latter command fa `collectConfig` will collect all the configuration files under `${alluxio.work.dir}/conf`. From Alluxio 2.4, the `alluxio-site.properties` file will not be copied, as many users tend to put their plaintext credentials to the UFS in this file. -Instead, the `collectAlluxioInfo` will run a `alluxio getConf` command +Instead, the `collectAlluxioInfo` will run a `alluxio conf get` command which prints all the configuration properties, with the credential fields masked. -The [getConf command]({{ '/en/operation/User-CLI.html#getconf' | relativize_url }}) will collect all the current node configuration. +The [conf get command]({{ '/en/operation/User-CLI.html#conf-get' | relativize_url }}) will collect all the current node configuration. So in order to collect Alluxio configuration in the tarball, please make sure `collectAlluxioInfo` sub-command is run.