From c70c9d79dad3e0337ceaee9dea811f74226c2fa4 Mon Sep 17 00:00:00 2001 From: xushiwei Date: Sat, 18 May 2024 09:42:35 +0800 Subject: [PATCH] hdq/fetcher; pysigfetch --- chore/pysigfetch/gop_autogen.go | 8 -- chore/pysigfetch/pysigfetch.go | 45 ++++++++++++ chore/pysigfetch/pysigfetch.gop | 0 fetcher/fetch.go | 73 +++++++++++++++++++ .../torch/_testdata/eye/in.html | 0 .../torch/_testdata/eye/out.json | 0 .../torch/_testdata/invalid/in.html | 0 .../torch/_testdata/invalid/out.json | 0 fetcher/torch/gop_autogen.go | 66 +++++++++++++++++ {pysig => fetcher}/torch/pysig_torch.gop | 13 ++++ {pysig => fetcher}/torch/pysig_torch_test.go | 0 hdq.go | 15 ++++ hdq_test.go | 4 +- hdqtest/hdqtest.go | 16 +--- pysig/torch/gop_autogen.go | 53 -------------- tutorial/02-GithubRepos/gop_autogen.go | 27 +++---- tutorial/02-GithubRepos/repos.gop | 1 + 17 files changed, 232 insertions(+), 89 deletions(-) delete mode 100644 chore/pysigfetch/gop_autogen.go create mode 100644 chore/pysigfetch/pysigfetch.go delete mode 100644 chore/pysigfetch/pysigfetch.gop create mode 100644 fetcher/fetch.go rename {pysig => fetcher}/torch/_testdata/eye/in.html (100%) rename {pysig => fetcher}/torch/_testdata/eye/out.json (100%) rename {pysig => fetcher}/torch/_testdata/invalid/in.html (100%) rename {pysig => fetcher}/torch/_testdata/invalid/out.json (100%) create mode 100644 fetcher/torch/gop_autogen.go rename {pysig => fetcher}/torch/pysig_torch.gop (75%) rename {pysig => fetcher}/torch/pysig_torch_test.go (100%) delete mode 100644 pysig/torch/gop_autogen.go diff --git a/chore/pysigfetch/gop_autogen.go b/chore/pysigfetch/gop_autogen.go deleted file mode 100644 index 7bcfaea..0000000 --- a/chore/pysigfetch/gop_autogen.go +++ /dev/null @@ -1,8 +0,0 @@ -// Code generated by gop (Go+); DO NOT EDIT. - -package main - -const _ = true - -func main() { -} diff --git a/chore/pysigfetch/pysigfetch.go b/chore/pysigfetch/pysigfetch.go new file mode 100644 index 0000000..fcf0ff2 --- /dev/null +++ b/chore/pysigfetch/pysigfetch.go @@ -0,0 +1,45 @@ +/* +Copyright 2024 The GoPlus Authors (goplus.org) +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package main + +import ( + "encoding/json" + "fmt" + "os" + + "github.com/goplus/hdq/fetcher" + _ "github.com/goplus/hdq/fetcher/torch" +) + +type module struct { + Name string `json:"name"` + Items []any `json:"items"` +} + +// Usage: pysigfetch pageType [name ...] +func main() { + if len(os.Args) < 3 { + fmt.Fprintln(os.Stderr, "Usage: pysigfetch pageType [name ...]") + os.Exit(1) + } + pageType := os.Args[1] + names := os.Args[2:] + docs := make([]any, len(names)) + for i, name := range names { + docs[i] = fetcher.FromInput(pageType, name) + } + json.NewEncoder(os.Stdout).Encode(module{pageType, docs}) +} diff --git a/chore/pysigfetch/pysigfetch.gop b/chore/pysigfetch/pysigfetch.gop deleted file mode 100644 index e69de29..0000000 diff --git a/fetcher/fetch.go b/fetcher/fetch.go new file mode 100644 index 0000000..9c4d14e --- /dev/null +++ b/fetcher/fetch.go @@ -0,0 +1,73 @@ +/* +Copyright 2024 The GoPlus Authors (goplus.org) +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package fetcher + +import ( + "reflect" + + "github.com/goplus/hdq" +) + +// func(doc hdq.NodeSet) +type Conv = any + +// ----------------------------------------------------------------------------- + +// Convert converts a html source to an object. +func Convert(conv reflect.Value, in any) any { + doc := reflect.ValueOf(hdq.Source(in)) + out := conv.Call([]reflect.Value{doc}) + return out[0].Interface() +} + +// ----------------------------------------------------------------------------- + +// New creates a new object from a html source by a registered converter. +func New(pageType string, in any) any { + page, ok := convs[pageType] + if !ok { + panic("fetcher: unknown pageType - " + pageType) + } + return Convert(page.Conv, in) +} + +// FromInput creates a new object from the html source with the specified input name. +func FromInput(pageType string, input string) any { + page, ok := convs[pageType] + if !ok { + panic("fetcher: unknown pageType - " + pageType) + } + in := page.Input(input) + return Convert(page.Conv, in) +} + +// sitePageType represents a site page type. +type sitePageType struct { + Conv reflect.Value + Input func(string) any +} + +var ( + convs = map[string]sitePageType{} +) + +// Register registers a convType with a convert function. +func Register(pageType string, conv Conv, input func(string) any) { + vConv := reflect.ValueOf(conv) + convs[pageType] = sitePageType{vConv, input} +} + +// ----------------------------------------------------------------------------- diff --git a/pysig/torch/_testdata/eye/in.html b/fetcher/torch/_testdata/eye/in.html similarity index 100% rename from pysig/torch/_testdata/eye/in.html rename to fetcher/torch/_testdata/eye/in.html diff --git a/pysig/torch/_testdata/eye/out.json b/fetcher/torch/_testdata/eye/out.json similarity index 100% rename from pysig/torch/_testdata/eye/out.json rename to fetcher/torch/_testdata/eye/out.json diff --git a/pysig/torch/_testdata/invalid/in.html b/fetcher/torch/_testdata/invalid/in.html similarity index 100% rename from pysig/torch/_testdata/invalid/in.html rename to fetcher/torch/_testdata/invalid/in.html diff --git a/pysig/torch/_testdata/invalid/out.json b/fetcher/torch/_testdata/invalid/out.json similarity index 100% rename from pysig/torch/_testdata/invalid/out.json rename to fetcher/torch/_testdata/invalid/out.json diff --git a/fetcher/torch/gop_autogen.go b/fetcher/torch/gop_autogen.go new file mode 100644 index 0000000..ba938fe --- /dev/null +++ b/fetcher/torch/gop_autogen.go @@ -0,0 +1,66 @@ +// Code generated by gop (Go+); DO NOT EDIT. + +package torch + +import ( + "github.com/goplus/hdq" + "github.com/goplus/hdq/fetcher" + "github.com/qiniu/x/errors" + "strings" +) + +const GopPackage = "github.com/goplus/hdq" +const _ = true +const spaces = " \t\r\n¶" + +type Result struct { + Name string `json:"name"` + Doc string `json:"doc"` + Sig string `json:"sig"` +} +//line fetcher/torch/pysig_torch.gop:37:1 +// New creates a new Result from a html document. +func New(doc hdq.NodeSet) Result { +//line fetcher/torch/pysig_torch.gop:39:1 + fn := doc.Any().Dl().Class("py function") +//line fetcher/torch/pysig_torch.gop:40:1 + decl := func() (_gop_ret string) { +//line fetcher/torch/pysig_torch.gop:40:1 + var _gop_err error +//line fetcher/torch/pysig_torch.gop:40:1 + _gop_ret, _gop_err = fn.FirstElementChild().Dt().Text__0() +//line fetcher/torch/pysig_torch.gop:40:1 + if _gop_err != nil { +//line fetcher/torch/pysig_torch.gop:40:1 + _gop_err = errors.NewFrame(_gop_err, "fn.firstElementChild.dt.text", "fetcher/torch/pysig_torch.gop", 40, "torch.New") +//line fetcher/torch/pysig_torch.gop:40:1 + panic(_gop_err) + } +//line fetcher/torch/pysig_torch.gop:40:1 + return + }() +//line fetcher/torch/pysig_torch.gop:41:1 + pos := strings.IndexByte(decl, '(') +//line fetcher/torch/pysig_torch.gop:42:1 + if pos > 0 { +//line fetcher/torch/pysig_torch.gop:43:1 + name := strings.TrimPrefix(decl[:pos], "torch.") +//line fetcher/torch/pysig_torch.gop:44:1 + sig := decl[pos:] +//line fetcher/torch/pysig_torch.gop:45:1 + return Result{strings.TrimSpace(name), "", strings.TrimRight(sig, spaces)} + } +//line fetcher/torch/pysig_torch.gop:47:1 + return Result{"", "", ""} +} +//line fetcher/torch/pysig_torch.gop:50:1 +// Input returns the input URL for the given name. +func Input(name string) interface{} { +//line fetcher/torch/pysig_torch.gop:52:1 + return "https://pytorch.org/docs/stable/generated/torch." + name + ".html" +} +//line fetcher/torch/pysig_torch.gop:55:1 +func init() { +//line fetcher/torch/pysig_torch.gop:56:1 + fetcher.Register("torch", New, Input) +} diff --git a/pysig/torch/pysig_torch.gop b/fetcher/torch/pysig_torch.gop similarity index 75% rename from pysig/torch/pysig_torch.gop rename to fetcher/torch/pysig_torch.gop index 38ec720..ed05a61 100644 --- a/pysig/torch/pysig_torch.gop +++ b/fetcher/torch/pysig_torch.gop @@ -19,6 +19,7 @@ import ( "strings" "github.com/goplus/hdq" + "github.com/goplus/hdq/fetcher" ) // ----------------------------------------------------------------------------- @@ -33,6 +34,7 @@ type Result struct { Sig string `json:"sig"` } +// New creates a new Result from a html document. func New(doc hdq.NodeSet) Result { fn := doc.any.dl.class("py function") decl := fn.firstElementChild.dt.text! @@ -44,3 +46,14 @@ func New(doc hdq.NodeSet) Result { } return {"", "", ""} } + +// Input returns the input URL for the given name. +func Input(name string) any { + return "https://pytorch.org/docs/stable/generated/torch." + name + ".html" +} + +func init() { + fetcher.Register("torch", New, Input) +} + +// ----------------------------------------------------------------------------- diff --git a/pysig/torch/pysig_torch_test.go b/fetcher/torch/pysig_torch_test.go similarity index 100% rename from pysig/torch/pysig_torch_test.go rename to fetcher/torch/pysig_torch_test.go diff --git a/hdq.go b/hdq.go index 22bd6d1..23200fd 100644 --- a/hdq.go +++ b/hdq.go @@ -55,6 +55,7 @@ type cachedGetter interface { Cached() int } +// NodeSet represents a set of nodes. type NodeSet struct { Data NodeEnum Err error @@ -83,6 +84,8 @@ func Source(r interface{}) (ret NodeSet) { return New(r) case io.Reader: return New(v) + case NodeSet: // input is a node set + return v default: panic("unsupport source type") } @@ -167,6 +170,7 @@ func (p *fixNodes) Cached() int { return len(p.nodes) } +// Nodes creates a node set from the given nodes. func Nodes(nodes ...*html.Node) (ret NodeSet) { return NodeSet{Data: &fixNodes{nodes}} } @@ -204,6 +208,7 @@ func anyForEach(p *html.Node, filter func(node *html.Node) error) error { return nil } +// Any returns the all nodes as a node set. func (p NodeSet) Any() (ret NodeSet) { if p.Err != nil { return p @@ -258,10 +263,12 @@ func parentLevelForEach(p *html.Node, level int, filter func(node *html.Node) er return filter(p) } +// Child returns the child node set. It is equivalent to ChildN(1). func (p NodeSet) Child() (ret NodeSet) { return p.ChildN(1) } +// ChildN returns the child node set at the given level. func (p NodeSet) ChildN(level int) (ret NodeSet) { if p.Err != nil || level == 0 { return p @@ -272,14 +279,17 @@ func (p NodeSet) ChildN(level int) (ret NodeSet) { return NodeSet{Data: &parentLevelNodes{p.Data, level}} } +// Parent returns the parent node set. It is equivalent to ParentN(1). func (p NodeSet) Parent() (ret NodeSet) { return p.ChildN(-1) } +// ParentN returns the parent node set at the given level. func (p NodeSet) ParentN(level int) (ret NodeSet) { return p.ChildN(-level) } +// One returns the first node as a node set. func (p NodeSet) One() (ret NodeSet) { if _, ok := p.Data.(oneNode); ok { return p @@ -459,6 +469,7 @@ func (p *matchedNodes) ForEach(filter func(node *html.Node) error) { }) } +// Match returns the matched node set. func (p NodeSet) Match(filter func(node *html.Node) bool) (ret NodeSet) { if p.Err != nil { return p @@ -497,6 +508,8 @@ func (p NodeSet) ChildrenAsText(doReplace bool) (ret NodeSet) { // ----------------------------------------------------------------------------- +// CollectOne returns the first node. +// If `exactly` is true, it will return an error if there are more than one node. func (p NodeSet) CollectOne__1(exactly bool) (item *html.Node, err error) { if p.Err != nil { return nil, p.Err @@ -520,10 +533,12 @@ func (p NodeSet) CollectOne__1(exactly bool) (item *html.Node, err error) { return } +// CollectOne returns the first node. func (p NodeSet) CollectOne__0() (item *html.Node, err error) { return p.CollectOne__1(false) } +// Collect returns all nodes. func (p NodeSet) Collect() (items []*html.Node, err error) { if p.Err != nil { return nil, p.Err diff --git a/hdq_test.go b/hdq_test.go index 05f6d8c..6074a29 100644 --- a/hdq_test.go +++ b/hdq_test.go @@ -19,8 +19,8 @@ import ( "testing" "github.com/goplus/hdq" + "github.com/goplus/hdq/fetcher/torch" "github.com/goplus/hdq/hdqtest" - "github.com/goplus/hdq/pysig/torch" repos "github.com/goplus/hdq/tutorial/02-GithubRepos" ) @@ -39,5 +39,5 @@ func TestGithub(t *testing.T) { } func TestTorch(t *testing.T) { - hdqtest.FromDir(t, "", "./pysig/torch/_testdata", torch.New) + hdqtest.FromDir(t, "", "./fetcher/torch/_testdata", torch.New) } diff --git a/hdqtest/hdqtest.go b/hdqtest/hdqtest.go index 1f3a27b..b587799 100644 --- a/hdqtest/hdqtest.go +++ b/hdqtest/hdqtest.go @@ -24,15 +24,12 @@ import ( "strings" "testing" - "github.com/goplus/hdq" + "github.com/goplus/hdq/fetcher" ) -// func(doc hdq.NodeSet) any -type Converter = any - // FromDir tests all html files in a directory. // optional params: [filename, scheme] -func FromDir(t *testing.T, sel, relDir string, conv Converter, params ...string) { +func FromDir(t *testing.T, sel, relDir string, conv fetcher.Conv, params ...string) { dir, err := os.Getwd() if err != nil { t.Fatal("Getwd failed:", err) @@ -73,16 +70,9 @@ func testFrom(t *testing.T, pkgDir, sel string, conv reflect.Value, fname, schem t.Fatal("ReadFile failed:", err) } expected := string(b) - ret := ConvFile(in, conv) + ret := fetcher.Convert(conv, in) retb, _ := json.MarshalIndent(ret, "", "\t") if v := string(retb); v != expected { t.Fatalf("\n==> got:\n%s\n==> expected:\n%s\n", v, expected) } } - -// ConvFile converts a html source to an object. -func ConvFile(in any, conv reflect.Value) any { - doc := reflect.ValueOf(hdq.Source(in)) - out := conv.Call([]reflect.Value{doc}) - return out[0].Interface() -} diff --git a/pysig/torch/gop_autogen.go b/pysig/torch/gop_autogen.go deleted file mode 100644 index ac351a4..0000000 --- a/pysig/torch/gop_autogen.go +++ /dev/null @@ -1,53 +0,0 @@ -// Code generated by gop (Go+); DO NOT EDIT. - -package torch - -import ( - "github.com/goplus/hdq" - "github.com/qiniu/x/errors" - "strings" -) - -const GopPackage = "github.com/goplus/hdq" -const _ = true -const spaces = " \t\r\n¶" - -type Result struct { - Name string `json:"name"` - Doc string `json:"doc"` - Sig string `json:"sig"` -} -//line pysig/torch/pysig_torch.gop:36:1 -func New(doc hdq.NodeSet) Result { -//line pysig/torch/pysig_torch.gop:37:1 - fn := doc.Any().Dl().Class("py function") -//line pysig/torch/pysig_torch.gop:38:1 - decl := func() (_gop_ret string) { -//line pysig/torch/pysig_torch.gop:38:1 - var _gop_err error -//line pysig/torch/pysig_torch.gop:38:1 - _gop_ret, _gop_err = fn.FirstElementChild().Dt().Text__0() -//line pysig/torch/pysig_torch.gop:38:1 - if _gop_err != nil { -//line pysig/torch/pysig_torch.gop:38:1 - _gop_err = errors.NewFrame(_gop_err, "fn.firstElementChild.dt.text", "pysig/torch/pysig_torch.gop", 38, "torch.New") -//line pysig/torch/pysig_torch.gop:38:1 - panic(_gop_err) - } -//line pysig/torch/pysig_torch.gop:38:1 - return - }() -//line pysig/torch/pysig_torch.gop:39:1 - pos := strings.IndexByte(decl, '(') -//line pysig/torch/pysig_torch.gop:40:1 - if pos > 0 { -//line pysig/torch/pysig_torch.gop:41:1 - name := strings.TrimPrefix(decl[:pos], "torch.") -//line pysig/torch/pysig_torch.gop:42:1 - sig := decl[pos:] -//line pysig/torch/pysig_torch.gop:43:1 - return Result{strings.TrimSpace(name), "", strings.TrimRight(sig, spaces)} - } -//line pysig/torch/pysig_torch.gop:45:1 - return Result{"", "", ""} -} diff --git a/tutorial/02-GithubRepos/gop_autogen.go b/tutorial/02-GithubRepos/gop_autogen.go index 8cb3d89..882ded1 100644 --- a/tutorial/02-GithubRepos/gop_autogen.go +++ b/tutorial/02-GithubRepos/gop_autogen.go @@ -118,35 +118,36 @@ func newRepo(node hdq.NodeSet) Repo { return Repo{Repo: repo, ForkedFrom: forkedFrom, Title: title, Language: language, UpdateTime: updateTime, Forks: forks} } //line tutorial/02-GithubRepos/repos.gop:44:1 +// New creates a new Result from a html document. func New(doc hdq.NodeSet) Result { -//line tutorial/02-GithubRepos/repos.gop:45:1 - divRepos := doc.Any().Div().Id("user-repositories-list").One() //line tutorial/02-GithubRepos/repos.gop:46:1 - repoList := divRepos.Child().Ul().One() + divRepos := doc.Any().Div().Id("user-repositories-list").One() //line tutorial/02-GithubRepos/repos.gop:47:1 + repoList := divRepos.Child().Ul().One() +//line tutorial/02-GithubRepos/repos.gop:48:1 repos := func() (_gop_ret []Repo) { -//line tutorial/02-GithubRepos/repos.gop:47:1 +//line tutorial/02-GithubRepos/repos.gop:48:1 repoList.Child().Li().Gop_Enum(func(x hdq.NodeSet) { -//line tutorial/02-GithubRepos/repos.gop:47:1 +//line tutorial/02-GithubRepos/repos.gop:48:1 _gop_ret = append(_gop_ret, newRepo(x)) }) -//line tutorial/02-GithubRepos/repos.gop:47:1 +//line tutorial/02-GithubRepos/repos.gop:48:1 return }() -//line tutorial/02-GithubRepos/repos.gop:48:1 +//line tutorial/02-GithubRepos/repos.gop:49:1 next := func() (_gop_ret string) { -//line tutorial/02-GithubRepos/repos.gop:48:1 +//line tutorial/02-GithubRepos/repos.gop:49:1 var _gop_err error -//line tutorial/02-GithubRepos/repos.gop:48:1 +//line tutorial/02-GithubRepos/repos.gop:49:1 _gop_ret, _gop_err = doc.Any().Div().Class("paginate-container").One().Any().A().ChildEqualText("Next").Href__0() -//line tutorial/02-GithubRepos/repos.gop:48:1 +//line tutorial/02-GithubRepos/repos.gop:49:1 if _gop_err != nil { -//line tutorial/02-GithubRepos/repos.gop:48:1 +//line tutorial/02-GithubRepos/repos.gop:49:1 return "" } -//line tutorial/02-GithubRepos/repos.gop:48:1 +//line tutorial/02-GithubRepos/repos.gop:49:1 return }() -//line tutorial/02-GithubRepos/repos.gop:49:1 +//line tutorial/02-GithubRepos/repos.gop:50:1 return Result{Repos: repos, Next: next} } diff --git a/tutorial/02-GithubRepos/repos.gop b/tutorial/02-GithubRepos/repos.gop index ba2d242..44cca69 100644 --- a/tutorial/02-GithubRepos/repos.gop +++ b/tutorial/02-GithubRepos/repos.gop @@ -41,6 +41,7 @@ type Result struct { Next string } +// New creates a new Result from a html document. func New(doc hdq.NodeSet) Result { divRepos := doc.any.div.id("user-repositories-list").one repoList := divRepos.child.ul.one