diff --git a/cmd/hdq/hdq.go b/cmd/hdq/hdq.go new file mode 100644 index 0000000..02807c7 --- /dev/null +++ b/cmd/hdq/hdq.go @@ -0,0 +1,94 @@ +/* +Copyright 2021 The GoPlus Authors (goplus.org) +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "flag" + "fmt" + "os" + "strings" + + "github.com/goplus/hdq/cmd/hdq/internal/base" + "github.com/goplus/hdq/cmd/hdq/internal/fetch" + "github.com/goplus/hdq/cmd/hdq/internal/help" + "github.com/qiniu/x/log" + + _ "github.com/goplus/hdq/fetcher/gopkg" + _ "github.com/goplus/hdq/fetcher/hrefs" + _ "github.com/goplus/hdq/fetcher/torch" + _ "github.com/goplus/hdq/stream/http/cached" +) + +func mainUsage() { + help.PrintUsage(os.Stderr, base.Hdq) + os.Exit(2) +} + +func init() { + flag.Usage = mainUsage + base.Hdq.Commands = []*base.Command{ + fetch.Cmd, + } +} + +func main() { + flag.Parse() + args := flag.Args() + if len(args) < 1 { + flag.Usage() + } + log.SetFlags(log.Ldefault &^ log.LstdFlags) + + base.CmdName = args[0] // for error messages + if args[0] == "help" { + help.Help(os.Stderr, args[1:]) + return + } + +BigCmdLoop: + for bigCmd := base.Hdq; ; { + for _, cmd := range bigCmd.Commands { + if cmd.Name() != args[0] { + continue + } + args = args[1:] + if len(cmd.Commands) > 0 { + bigCmd = cmd + if len(args) == 0 { + help.PrintUsage(os.Stderr, bigCmd) + os.Exit(2) + } + if args[0] == "help" { + help.Help(os.Stderr, append(strings.Split(base.CmdName, " "), args[1:]...)) + return + } + base.CmdName += " " + args[0] + continue BigCmdLoop + } + if !cmd.Runnable() { + continue + } + cmd.Run(cmd, args) + return + } + helpArg := "" + if i := strings.LastIndex(base.CmdName, " "); i >= 0 { + helpArg = " " + base.CmdName[:i] + } + fmt.Fprintf(os.Stderr, "hdq %s: unknown command\nRun 'hdq help%s' for usage.\n", base.CmdName, helpArg) + os.Exit(2) + } +} diff --git a/cmd/hdq/internal/base/base.go b/cmd/hdq/internal/base/base.go new file mode 100644 index 0000000..f7c55e2 --- /dev/null +++ b/cmd/hdq/internal/base/base.go @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2023 The GoPlus Authors (goplus.org). All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Package base defines shared basic pieces of the hdq command, +// in particular logging and the Command structure. +package base + +import ( + "flag" + "fmt" + "io" + "os" + "strings" +) + +// A Command is an implementation of a gop command +// like gop export or gop install. +type Command struct { + // Run runs the command. + // The args are the arguments after the command name. + Run func(cmd *Command, args []string) + + // UsageLine is the one-line usage message. + // The words between "gop" and the first flag or argument in the line are taken to be the command name. + UsageLine string + + // Short is the short description shown in the 'gop help' output. + Short string + + // Flag is a set of flags specific to this command. + Flag flag.FlagSet + + // Commands lists the available commands and help topics. + // The order here is the order in which they are printed by 'gop help'. + // Note that subcommands are in general best avoided. + Commands []*Command +} + +// Hdq command +var Hdq = &Command{ + UsageLine: "hdq", + Short: `hdq - a HTML DOM Query Language for Go+`, + // Commands initialized in package main +} + +// LongName returns the command's long name: all the words in the usage line between "gop" and a flag or argument, +func (c *Command) LongName() string { + name := c.UsageLine + if i := strings.Index(name, " ["); i >= 0 { + name = name[:i] + } + if name == "hdq" { + return "" + } + return strings.TrimPrefix(name, "hdq ") +} + +// Name returns the command's short name: the last word in the usage line before a flag or argument. +func (c *Command) Name() string { + name := c.LongName() + if i := strings.LastIndex(name, " "); i >= 0 { + name = name[i+1:] + } + return name +} + +// Usage show the command usage. +func (c *Command) Usage(w io.Writer) { + fmt.Fprintf(w, "%s\n\nUsage: %s\n", c.Short, c.UsageLine) + + // restore output of flag + defer c.Flag.SetOutput(c.Flag.Output()) + + c.Flag.SetOutput(w) + c.Flag.PrintDefaults() + fmt.Fprintln(w) + os.Exit(2) +} + +// Runnable reports whether the command can be run; otherwise +// it is a documentation pseudo-command. +func (c *Command) Runnable() bool { + return c.Run != nil +} + +// Usage is the usage-reporting function, filled in by package main +// but here for reference by other packages. +// +// flag.Usage func() + +// CmdName - "build", "install", "list", "mod tidy", etc. +var CmdName string + +// Main runs a command. +func Main(c *Command, app string, args []string) { + name := c.UsageLine + if i := strings.Index(name, " ["); i >= 0 { + c.UsageLine = app + name[i:] + } + c.Run(c, args) +} diff --git a/cmd/hdq/internal/fetch/fetch.go b/cmd/hdq/internal/fetch/fetch.go new file mode 100644 index 0000000..5b8e975 --- /dev/null +++ b/cmd/hdq/internal/fetch/fetch.go @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2024 The GoPlus Authors (goplus.org). All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Package fetch implements the "hdq fetch" command. +package fetch + +import ( + "encoding/json" + "io" + "log" + "os" + "strings" + + "github.com/goplus/hdq/cmd/hdq/internal/base" + "github.com/goplus/hdq/fetcher" +) + +// hdq fetch +var Cmd = &base.Command{ + UsageLine: "hdq fetch [flags] pageType [input ...]", + Short: "Fetch objects from the html source with the specified pageType and input", +} + +func init() { + Cmd.Run = runCmd +} + +func runCmd(cmd *base.Command, args []string) { + if len(args) < 2 { + cmd.Usage(os.Stderr) + return + } + pageType := args[0] + inputs := args[1:] + if len(inputs) == 1 && inputs[0] == "-" { + b, _ := io.ReadAll(os.Stdin) + inputs = strings.Split(strings.TrimSpace(string(b)), " ") + } + docs := make([]any, 0, len(inputs)) + for _, input := range inputs { + log.Println("==> Fetch", input) + doc, err := fetcher.FromInput(pageType, input) + if err != nil { + panic(err) + } + docs = append(docs, doc) + } + enc := json.NewEncoder(os.Stdout) + enc.SetIndent("", " ") + enc.Encode(docs) +} diff --git a/cmd/hdq/internal/help/help.go b/cmd/hdq/internal/help/help.go new file mode 100644 index 0000000..46ea3fc --- /dev/null +++ b/cmd/hdq/internal/help/help.go @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2023 The GoPlus Authors (goplus.org). All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Package help implements the "hdq help” command. +package help + +import ( + "bufio" + "fmt" + "io" + "log" + "os" + "strings" + "text/template" + "unicode" + "unicode/utf8" + + "github.com/goplus/hdq/cmd/hdq/internal/base" +) + +// Help implements the 'help' command. +func Help(w io.Writer, args []string) { + cmd := base.Hdq +Args: + for i, arg := range args { + for _, sub := range cmd.Commands { + if sub.Name() == arg { + cmd = sub + continue Args + } + } + + // helpSuccess is the help command using as many args as possible that would succeed. + helpSuccess := "hdq help" + if i > 0 { + helpSuccess += " " + strings.Join(args[:i], " ") + } + fmt.Fprintf(os.Stderr, "hdq help %s: unknown help topic. Run '%s'.\n", strings.Join(args, " "), helpSuccess) + os.Exit(2) + } + + if len(cmd.Commands) > 0 { + PrintUsage(w, cmd) + } else { + cmd.Usage(w) + } + // not exit 2: succeeded at 'hdq help cmd'. +} + +var usageTemplate = `{{.Short | trim}} + +Usage: + + {{.UsageLine}} [arguments] + +The commands are: +{{range .Commands}}{{if or (.Runnable) .Commands}} + {{.Name | printf "%-11s"}} {{.Short}}{{end}}{{end}} + +Use "hdq help{{with .LongName}} {{.}}{{end}} " for more information about a command. + +` + +// An errWriter wraps a writer, recording whether a write error occurred. +type errWriter struct { + w io.Writer + err error +} + +func (w *errWriter) Write(b []byte) (int, error) { + n, err := w.w.Write(b) + if err != nil { + w.err = err + } + return n, err +} + +// tmpl executes the given template text on data, writing the result to w. +func tmpl(w io.Writer, text string, data interface{}) { + t := template.New("top") + t.Funcs(template.FuncMap{"trim": strings.TrimSpace, "capitalize": capitalize}) + template.Must(t.Parse(text)) + ew := &errWriter{w: w} + err := t.Execute(ew, data) + if ew.err != nil { + // I/O error writing. Ignore write on closed pipe. + if strings.Contains(ew.err.Error(), "pipe") { + os.Exit(1) + } + log.Fatalf("writing output: %v", ew.err) + } + if err != nil { + panic(err) + } +} + +func capitalize(s string) string { + if s == "" { + return s + } + r, n := utf8.DecodeRuneInString(s) + return string(unicode.ToTitle(r)) + s[n:] +} + +// PrintUsage prints usage information. +func PrintUsage(w io.Writer, cmd *base.Command) { + bw := bufio.NewWriter(w) + tmpl(bw, usageTemplate, cmd) + bw.Flush() +}