Skip to content

Commit

Permalink
Merge pull request #60 from xushiwei/q
Browse files Browse the repository at this point in the history
tool: hdq
  • Loading branch information
xushiwei authored Aug 2, 2024
2 parents c5343d3 + 9175e36 commit a0f8604
Show file tree
Hide file tree
Showing 4 changed files with 395 additions and 0 deletions.
94 changes: 94 additions & 0 deletions cmd/hdq/hdq.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/*
Copyright 2021 The GoPlus Authors (goplus.org)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package main

import (
"flag"
"fmt"
"os"
"strings"

"github.com/goplus/hdq/cmd/hdq/internal/base"
"github.com/goplus/hdq/cmd/hdq/internal/fetch"
"github.com/goplus/hdq/cmd/hdq/internal/help"
"github.com/qiniu/x/log"

_ "github.com/goplus/hdq/fetcher/gopkg"
_ "github.com/goplus/hdq/fetcher/hrefs"
_ "github.com/goplus/hdq/fetcher/torch"
_ "github.com/goplus/hdq/stream/http/cached"
)

func mainUsage() {
help.PrintUsage(os.Stderr, base.Hdq)
os.Exit(2)
}

func init() {
flag.Usage = mainUsage
base.Hdq.Commands = []*base.Command{
fetch.Cmd,
}
}

func main() {
flag.Parse()
args := flag.Args()
if len(args) < 1 {
flag.Usage()
}
log.SetFlags(log.Ldefault &^ log.LstdFlags)

base.CmdName = args[0] // for error messages
if args[0] == "help" {
help.Help(os.Stderr, args[1:])
return
}

BigCmdLoop:
for bigCmd := base.Hdq; ; {
for _, cmd := range bigCmd.Commands {
if cmd.Name() != args[0] {
continue
}
args = args[1:]
if len(cmd.Commands) > 0 {
bigCmd = cmd
if len(args) == 0 {
help.PrintUsage(os.Stderr, bigCmd)
os.Exit(2)
}
if args[0] == "help" {
help.Help(os.Stderr, append(strings.Split(base.CmdName, " "), args[1:]...))
return
}
base.CmdName += " " + args[0]
continue BigCmdLoop
}
if !cmd.Runnable() {
continue
}
cmd.Run(cmd, args)
return
}
helpArg := ""
if i := strings.LastIndex(base.CmdName, " "); i >= 0 {
helpArg = " " + base.CmdName[:i]
}
fmt.Fprintf(os.Stderr, "hdq %s: unknown command\nRun 'hdq help%s' for usage.\n", base.CmdName, helpArg)
os.Exit(2)
}
}
114 changes: 114 additions & 0 deletions cmd/hdq/internal/base/base.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/*
* Copyright (c) 2023 The GoPlus Authors (goplus.org). All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

// Package base defines shared basic pieces of the hdq command,
// in particular logging and the Command structure.
package base

import (
"flag"
"fmt"
"io"
"os"
"strings"
)

// A Command is an implementation of a gop command
// like gop export or gop install.
type Command struct {
// Run runs the command.
// The args are the arguments after the command name.
Run func(cmd *Command, args []string)

// UsageLine is the one-line usage message.
// The words between "gop" and the first flag or argument in the line are taken to be the command name.
UsageLine string

// Short is the short description shown in the 'gop help' output.
Short string

// Flag is a set of flags specific to this command.
Flag flag.FlagSet

// Commands lists the available commands and help topics.
// The order here is the order in which they are printed by 'gop help'.
// Note that subcommands are in general best avoided.
Commands []*Command
}

// Hdq command
var Hdq = &Command{
UsageLine: "hdq",
Short: `hdq - a HTML DOM Query Language for Go+`,
// Commands initialized in package main
}

// LongName returns the command's long name: all the words in the usage line between "gop" and a flag or argument,
func (c *Command) LongName() string {
name := c.UsageLine
if i := strings.Index(name, " ["); i >= 0 {
name = name[:i]
}
if name == "hdq" {
return ""
}
return strings.TrimPrefix(name, "hdq ")
}

// Name returns the command's short name: the last word in the usage line before a flag or argument.
func (c *Command) Name() string {
name := c.LongName()
if i := strings.LastIndex(name, " "); i >= 0 {
name = name[i+1:]
}
return name
}

// Usage show the command usage.
func (c *Command) Usage(w io.Writer) {
fmt.Fprintf(w, "%s\n\nUsage: %s\n", c.Short, c.UsageLine)

// restore output of flag
defer c.Flag.SetOutput(c.Flag.Output())

c.Flag.SetOutput(w)
c.Flag.PrintDefaults()
fmt.Fprintln(w)
os.Exit(2)
}

// Runnable reports whether the command can be run; otherwise
// it is a documentation pseudo-command.
func (c *Command) Runnable() bool {
return c.Run != nil
}

// Usage is the usage-reporting function, filled in by package main
// but here for reference by other packages.
//
// flag.Usage func()

// CmdName - "build", "install", "list", "mod tidy", etc.
var CmdName string

// Main runs a command.
func Main(c *Command, app string, args []string) {
name := c.UsageLine
if i := strings.Index(name, " ["); i >= 0 {
c.UsageLine = app + name[i:]
}
c.Run(c, args)
}
64 changes: 64 additions & 0 deletions cmd/hdq/internal/fetch/fetch.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* Copyright (c) 2024 The GoPlus Authors (goplus.org). All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

// Package fetch implements the "hdq fetch" command.
package fetch

import (
"encoding/json"
"io"
"log"
"os"
"strings"

"github.com/goplus/hdq/cmd/hdq/internal/base"
"github.com/goplus/hdq/fetcher"
)

// hdq fetch
var Cmd = &base.Command{
UsageLine: "hdq fetch [flags] pageType [input ...]",
Short: "Fetch objects from the html source with the specified pageType and input",
}

func init() {
Cmd.Run = runCmd
}

func runCmd(cmd *base.Command, args []string) {
if len(args) < 2 {
cmd.Usage(os.Stderr)
return
}
pageType := args[0]
inputs := args[1:]
if len(inputs) == 1 && inputs[0] == "-" {
b, _ := io.ReadAll(os.Stdin)
inputs = strings.Split(strings.TrimSpace(string(b)), " ")
}
docs := make([]any, 0, len(inputs))
for _, input := range inputs {
log.Println("==> Fetch", input)
doc, err := fetcher.FromInput(pageType, input)
if err != nil {
panic(err)
}
docs = append(docs, doc)
}
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
enc.Encode(docs)
}
123 changes: 123 additions & 0 deletions cmd/hdq/internal/help/help.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
/*
* Copyright (c) 2023 The GoPlus Authors (goplus.org). All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

// Package help implements the "hdq help” command.
package help

import (
"bufio"
"fmt"
"io"
"log"
"os"
"strings"
"text/template"
"unicode"
"unicode/utf8"

"github.com/goplus/hdq/cmd/hdq/internal/base"
)

// Help implements the 'help' command.
func Help(w io.Writer, args []string) {
cmd := base.Hdq
Args:
for i, arg := range args {
for _, sub := range cmd.Commands {
if sub.Name() == arg {
cmd = sub
continue Args
}
}

// helpSuccess is the help command using as many args as possible that would succeed.
helpSuccess := "hdq help"
if i > 0 {
helpSuccess += " " + strings.Join(args[:i], " ")
}
fmt.Fprintf(os.Stderr, "hdq help %s: unknown help topic. Run '%s'.\n", strings.Join(args, " "), helpSuccess)
os.Exit(2)
}

if len(cmd.Commands) > 0 {
PrintUsage(w, cmd)
} else {
cmd.Usage(w)
}
// not exit 2: succeeded at 'hdq help cmd'.
}

var usageTemplate = `{{.Short | trim}}
Usage:
{{.UsageLine}} <command> [arguments]
The commands are:
{{range .Commands}}{{if or (.Runnable) .Commands}}
{{.Name | printf "%-11s"}} {{.Short}}{{end}}{{end}}
Use "hdq help{{with .LongName}} {{.}}{{end}} <command>" for more information about a command.
`

// An errWriter wraps a writer, recording whether a write error occurred.
type errWriter struct {
w io.Writer
err error
}

func (w *errWriter) Write(b []byte) (int, error) {
n, err := w.w.Write(b)
if err != nil {
w.err = err
}
return n, err
}

// tmpl executes the given template text on data, writing the result to w.
func tmpl(w io.Writer, text string, data interface{}) {
t := template.New("top")
t.Funcs(template.FuncMap{"trim": strings.TrimSpace, "capitalize": capitalize})
template.Must(t.Parse(text))
ew := &errWriter{w: w}
err := t.Execute(ew, data)
if ew.err != nil {
// I/O error writing. Ignore write on closed pipe.
if strings.Contains(ew.err.Error(), "pipe") {
os.Exit(1)
}
log.Fatalf("writing output: %v", ew.err)
}
if err != nil {
panic(err)
}
}

func capitalize(s string) string {
if s == "" {
return s
}
r, n := utf8.DecodeRuneInString(s)
return string(unicode.ToTitle(r)) + s[n:]
}

// PrintUsage prints usage information.
func PrintUsage(w io.Writer, cmd *base.Command) {
bw := bufio.NewWriter(w)
tmpl(bw, usageTemplate, cmd)
bw.Flush()
}

0 comments on commit a0f8604

Please sign in to comment.