Skip to content

Commit

Permalink
tool: hdq
Browse files Browse the repository at this point in the history
  • Loading branch information
xushiwei committed Aug 2, 2024
1 parent b147a84 commit 9175e36
Show file tree
Hide file tree
Showing 4 changed files with 395 additions and 0 deletions.
94 changes: 94 additions & 0 deletions cmd/hdq/hdq.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/*
Copyright 2021 The GoPlus Authors (goplus.org)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package main

import (
"flag"
"fmt"
"os"
"strings"

"github.com/goplus/hdq/cmd/hdq/internal/base"
"github.com/goplus/hdq/cmd/hdq/internal/fetch"
"github.com/goplus/hdq/cmd/hdq/internal/help"
"github.com/qiniu/x/log"

_ "github.com/goplus/hdq/fetcher/gopkg"
_ "github.com/goplus/hdq/fetcher/hrefs"
_ "github.com/goplus/hdq/fetcher/torch"
_ "github.com/goplus/hdq/stream/http/cached"
)

func mainUsage() {
help.PrintUsage(os.Stderr, base.Hdq)
os.Exit(2)

Check warning on line 37 in cmd/hdq/hdq.go

View check run for this annotation

qiniu-x / golangci-lint

cmd/hdq/hdq.go#L37

deep-exit: calls to os.Exit only in main() or init() functions (revive)
}

func init() {
flag.Usage = mainUsage
base.Hdq.Commands = []*base.Command{
fetch.Cmd,
}
}

func main() {

Check warning on line 47 in cmd/hdq/hdq.go

View check run for this annotation

qiniu-x / golangci-lint

cmd/hdq/hdq.go#L47

cyclomatic: function main has cyclomatic complexity 11 (> max enabled 10) (revive)
flag.Parse()
args := flag.Args()
if len(args) < 1 {
flag.Usage()
}
log.SetFlags(log.Ldefault &^ log.LstdFlags)

base.CmdName = args[0] // for error messages
if args[0] == "help" {
help.Help(os.Stderr, args[1:])
return
}

BigCmdLoop:
for bigCmd := base.Hdq; ; {
for _, cmd := range bigCmd.Commands {
if cmd.Name() != args[0] {
continue
}
args = args[1:]
if len(cmd.Commands) > 0 {
bigCmd = cmd
if len(args) == 0 {
help.PrintUsage(os.Stderr, bigCmd)
os.Exit(2)
}
if args[0] == "help" {
help.Help(os.Stderr, append(strings.Split(base.CmdName, " "), args[1:]...))
return
}
base.CmdName += " " + args[0]
continue BigCmdLoop
}
if !cmd.Runnable() {
continue
}
cmd.Run(cmd, args)
return
}
helpArg := ""
if i := strings.LastIndex(base.CmdName, " "); i >= 0 {
helpArg = " " + base.CmdName[:i]
}
fmt.Fprintf(os.Stderr, "hdq %s: unknown command\nRun 'hdq help%s' for usage.\n", base.CmdName, helpArg)
os.Exit(2)
}
}
114 changes: 114 additions & 0 deletions cmd/hdq/internal/base/base.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/*
* Copyright (c) 2023 The GoPlus Authors (goplus.org). All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

// Package base defines shared basic pieces of the hdq command,
// in particular logging and the Command structure.
package base

import (
"flag"
"fmt"
"io"
"os"
"strings"
)

// A Command is an implementation of a gop command
// like gop export or gop install.
type Command struct {
// Run runs the command.
// The args are the arguments after the command name.
Run func(cmd *Command, args []string)

// UsageLine is the one-line usage message.
// The words between "gop" and the first flag or argument in the line are taken to be the command name.
UsageLine string

// Short is the short description shown in the 'gop help' output.
Short string

// Flag is a set of flags specific to this command.
Flag flag.FlagSet

// Commands lists the available commands and help topics.
// The order here is the order in which they are printed by 'gop help'.
// Note that subcommands are in general best avoided.
Commands []*Command
}

// Hdq command

Check warning on line 52 in cmd/hdq/internal/base/base.go

View check run for this annotation

qiniu-x / golangci-lint

cmd/hdq/internal/base/base.go#L52

Comment should end in a period (godot)
var Hdq = &Command{
UsageLine: "hdq",
Short: `hdq - a HTML DOM Query Language for Go+`,
// Commands initialized in package main
}

// LongName returns the command's long name: all the words in the usage line between "gop" and a flag or argument,

Check warning on line 59 in cmd/hdq/internal/base/base.go

View check run for this annotation

qiniu-x / golangci-lint

cmd/hdq/internal/base/base.go#L59

Comment should end in a period (godot)
func (c *Command) LongName() string {
name := c.UsageLine
if i := strings.Index(name, " ["); i >= 0 {
name = name[:i]
}
if name == "hdq" {
return ""
}
return strings.TrimPrefix(name, "hdq ")
}

// Name returns the command's short name: the last word in the usage line before a flag or argument.
func (c *Command) Name() string {
name := c.LongName()
if i := strings.LastIndex(name, " "); i >= 0 {
name = name[i+1:]
}
return name
}

// Usage show the command usage.
func (c *Command) Usage(w io.Writer) {
fmt.Fprintf(w, "%s\n\nUsage: %s\n", c.Short, c.UsageLine)

Check warning on line 82 in cmd/hdq/internal/base/base.go

View check run for this annotation

qiniu-x / golangci-lint

cmd/hdq/internal/base/base.go#L82

unhandled-error: Unhandled error in call to function fmt.Fprintf (revive)

// restore output of flag
defer c.Flag.SetOutput(c.Flag.Output())

c.Flag.SetOutput(w)
c.Flag.PrintDefaults()
fmt.Fprintln(w)

Check warning on line 89 in cmd/hdq/internal/base/base.go

View check run for this annotation

qiniu-x / golangci-lint

cmd/hdq/internal/base/base.go#L89

unhandled-error: Unhandled error in call to function fmt.Fprintln (revive)
os.Exit(2)

Check warning on line 90 in cmd/hdq/internal/base/base.go

View check run for this annotation

qiniu-x / golangci-lint

cmd/hdq/internal/base/base.go#L90

exitAfterDefer: os.Exit will exit, and `defer c.Flag.SetOutput(c.Flag.Output())` will not run (gocritic)
}

// Runnable reports whether the command can be run; otherwise
// it is a documentation pseudo-command.
func (c *Command) Runnable() bool {
return c.Run != nil
}

// Usage is the usage-reporting function, filled in by package main
// but here for reference by other packages.
//
// flag.Usage func()

// CmdName - "build", "install", "list", "mod tidy", etc.
var CmdName string

// Main runs a command.
func Main(c *Command, app string, args []string) {
name := c.UsageLine
if i := strings.Index(name, " ["); i >= 0 {
c.UsageLine = app + name[i:]
}
c.Run(c, args)
}
64 changes: 64 additions & 0 deletions cmd/hdq/internal/fetch/fetch.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* Copyright (c) 2024 The GoPlus Authors (goplus.org). All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

// Package fetch implements the "hdq fetch" command.
package fetch

import (
"encoding/json"
"io"
"log"
"os"
"strings"

"github.com/goplus/hdq/cmd/hdq/internal/base"
"github.com/goplus/hdq/fetcher"
)

// hdq fetch
var Cmd = &base.Command{
UsageLine: "hdq fetch [flags] pageType [input ...]",
Short: "Fetch objects from the html source with the specified pageType and input",
}

func init() {
Cmd.Run = runCmd
}

func runCmd(cmd *base.Command, args []string) {
if len(args) < 2 {
cmd.Usage(os.Stderr)
return
}
pageType := args[0]
inputs := args[1:]
if len(inputs) == 1 && inputs[0] == "-" {
b, _ := io.ReadAll(os.Stdin)
inputs = strings.Split(strings.TrimSpace(string(b)), " ")
}
docs := make([]any, 0, len(inputs))
for _, input := range inputs {
log.Println("==> Fetch", input)
doc, err := fetcher.FromInput(pageType, input)
if err != nil {
panic(err)
}
docs = append(docs, doc)
}
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
enc.Encode(docs)

Check warning on line 63 in cmd/hdq/internal/fetch/fetch.go

View check run for this annotation

qiniu-x / golangci-lint

cmd/hdq/internal/fetch/fetch.go#L63

Error return value of `enc.Encode` is not checked (errcheck)
}
123 changes: 123 additions & 0 deletions cmd/hdq/internal/help/help.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
/*
* Copyright (c) 2023 The GoPlus Authors (goplus.org). All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

// Package help implements the "hdq help” command.
package help

import (
"bufio"
"fmt"
"io"
"log"
"os"
"strings"
"text/template"
"unicode"
"unicode/utf8"

"github.com/goplus/hdq/cmd/hdq/internal/base"
)

// Help implements the 'help' command.
func Help(w io.Writer, args []string) {
cmd := base.Hdq
Args:
for i, arg := range args {
for _, sub := range cmd.Commands {
if sub.Name() == arg {
cmd = sub
continue Args
}
}

// helpSuccess is the help command using as many args as possible that would succeed.
helpSuccess := "hdq help"
if i > 0 {
helpSuccess += " " + strings.Join(args[:i], " ")
}
fmt.Fprintf(os.Stderr, "hdq help %s: unknown help topic. Run '%s'.\n", strings.Join(args, " "), helpSuccess)

Check warning on line 51 in cmd/hdq/internal/help/help.go

View check run for this annotation

qiniu-x / golangci-lint

cmd/hdq/internal/help/help.go#L51

unhandled-error: Unhandled error in call to function fmt.Fprintf (revive)
os.Exit(2)

Check warning on line 52 in cmd/hdq/internal/help/help.go

View check run for this annotation

qiniu-x / golangci-lint

cmd/hdq/internal/help/help.go#L52

deep-exit: calls to os.Exit only in main() or init() functions (revive)
}

if len(cmd.Commands) > 0 {
PrintUsage(w, cmd)
} else {
cmd.Usage(w)
}
// not exit 2: succeeded at 'hdq help cmd'.
}

var usageTemplate = `{{.Short | trim}}
Usage:
{{.UsageLine}} <command> [arguments]
The commands are:
{{range .Commands}}{{if or (.Runnable) .Commands}}
{{.Name | printf "%-11s"}} {{.Short}}{{end}}{{end}}
Use "hdq help{{with .LongName}} {{.}}{{end}} <command>" for more information about a command.
`

// An errWriter wraps a writer, recording whether a write error occurred.
type errWriter struct {
w io.Writer
err error
}

func (w *errWriter) Write(b []byte) (int, error) {
n, err := w.w.Write(b)
if err != nil {
w.err = err
}
return n, err
}

// tmpl executes the given template text on data, writing the result to w.
func tmpl(w io.Writer, text string, data interface{}) {

Check warning on line 92 in cmd/hdq/internal/help/help.go

View check run for this annotation

qiniu-x / golangci-lint

cmd/hdq/internal/help/help.go#L92

use-any: since GO 1.18 'interface{}' can be replaced by 'any' (revive)
t := template.New("top")
t.Funcs(template.FuncMap{"trim": strings.TrimSpace, "capitalize": capitalize})
template.Must(t.Parse(text))
ew := &errWriter{w: w}
err := t.Execute(ew, data)
if ew.err != nil {
// I/O error writing. Ignore write on closed pipe.
if strings.Contains(ew.err.Error(), "pipe") {
os.Exit(1)

Check warning on line 101 in cmd/hdq/internal/help/help.go

View check run for this annotation

qiniu-x / golangci-lint

cmd/hdq/internal/help/help.go#L101

deep-exit: calls to os.Exit only in main() or init() functions (revive)
}
log.Fatalf("writing output: %v", ew.err)

Check warning on line 103 in cmd/hdq/internal/help/help.go

View check run for this annotation

qiniu-x / golangci-lint

cmd/hdq/internal/help/help.go#L103

deep-exit: calls to log.Fatalf only in main() or init() functions (revive)
}
if err != nil {
panic(err)
}
}

func capitalize(s string) string {
if s == "" {
return s
}
r, n := utf8.DecodeRuneInString(s)
return string(unicode.ToTitle(r)) + s[n:]
}

// PrintUsage prints usage information.
func PrintUsage(w io.Writer, cmd *base.Command) {
bw := bufio.NewWriter(w)
tmpl(bw, usageTemplate, cmd)
bw.Flush()

Check warning on line 122 in cmd/hdq/internal/help/help.go

View check run for this annotation

qiniu-x / golangci-lint

cmd/hdq/internal/help/help.go#L122

unhandled-error: Unhandled error in call to function bufio.Writer.Flush (revive)
}

0 comments on commit 9175e36

Please sign in to comment.