Skip to content

Commit

Permalink
hdq/fetcher; pysigfetch
Browse files Browse the repository at this point in the history
  • Loading branch information
xushiwei committed May 18, 2024
1 parent 0b0efb1 commit c70c9d7
Show file tree
Hide file tree
Showing 17 changed files with 232 additions and 89 deletions.
8 changes: 0 additions & 8 deletions chore/pysigfetch/gop_autogen.go

This file was deleted.

45 changes: 45 additions & 0 deletions chore/pysigfetch/pysigfetch.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
Copyright 2024 The GoPlus Authors (goplus.org)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package main

import (
"encoding/json"
"fmt"
"os"

"github.com/goplus/hdq/fetcher"
_ "github.com/goplus/hdq/fetcher/torch"
)

type module struct {
Name string `json:"name"`
Items []any `json:"items"`
}

// Usage: pysigfetch pageType [name ...]
func main() {
if len(os.Args) < 3 {
fmt.Fprintln(os.Stderr, "Usage: pysigfetch pageType [name ...]")
os.Exit(1)
}
pageType := os.Args[1]
names := os.Args[2:]
docs := make([]any, len(names))
for i, name := range names {
docs[i] = fetcher.FromInput(pageType, name)
}
json.NewEncoder(os.Stdout).Encode(module{pageType, docs})

Check warning on line 44 in chore/pysigfetch/pysigfetch.go

View check run for this annotation

qiniu-x / golangci-lint

chore/pysigfetch/pysigfetch.go#L44

Error return value of `(*encoding/json.Encoder).Encode` is not checked (errcheck)
}
Empty file removed chore/pysigfetch/pysigfetch.gop
Empty file.
73 changes: 73 additions & 0 deletions fetcher/fetch.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
/*
Copyright 2024 The GoPlus Authors (goplus.org)
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package fetcher

import (
"reflect"

"github.com/goplus/hdq"
)

// func(doc hdq.NodeSet) <any-object>
type Conv = any

// -----------------------------------------------------------------------------

// Convert converts a html source to an object.
func Convert(conv reflect.Value, in any) any {
doc := reflect.ValueOf(hdq.Source(in))
out := conv.Call([]reflect.Value{doc})
return out[0].Interface()
}

// -----------------------------------------------------------------------------

// New creates a new object from a html source by a registered converter.
func New(pageType string, in any) any {
page, ok := convs[pageType]
if !ok {
panic("fetcher: unknown pageType - " + pageType)
}
return Convert(page.Conv, in)
}

// FromInput creates a new object from the html source with the specified input name.
func FromInput(pageType string, input string) any {
page, ok := convs[pageType]
if !ok {
panic("fetcher: unknown pageType - " + pageType)
}
in := page.Input(input)
return Convert(page.Conv, in)
}

// sitePageType represents a site page type.
type sitePageType struct {
Conv reflect.Value
Input func(string) any
}

var (
convs = map[string]sitePageType{}
)

// Register registers a convType with a convert function.
func Register(pageType string, conv Conv, input func(string) any) {
vConv := reflect.ValueOf(conv)
convs[pageType] = sitePageType{vConv, input}
}

// -----------------------------------------------------------------------------
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
66 changes: 66 additions & 0 deletions fetcher/torch/gop_autogen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

13 changes: 13 additions & 0 deletions pysig/torch/pysig_torch.gop → fetcher/torch/pysig_torch.gop
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"strings"

"github.com/goplus/hdq"
"github.com/goplus/hdq/fetcher"
)

// -----------------------------------------------------------------------------
Expand All @@ -33,6 +34,7 @@ type Result struct {
Sig string `json:"sig"`
}

// New creates a new Result from a html document.
func New(doc hdq.NodeSet) Result {
fn := doc.any.dl.class("py function")
decl := fn.firstElementChild.dt.text!
Expand All @@ -44,3 +46,14 @@ func New(doc hdq.NodeSet) Result {
}
return {"", "", "<NULL>"}
}

// Input returns the input URL for the given name.
func Input(name string) any {
return "https://pytorch.org/docs/stable/generated/torch." + name + ".html"

Check warning on line 52 in fetcher/torch/pysig_torch.gop

View check run for this annotation

Codecov / codecov/patch

fetcher/torch/pysig_torch.gop#L51-L52

Added lines #L51 - L52 were not covered by tests
}

func init() {
fetcher.Register("torch", New, Input)
}

// -----------------------------------------------------------------------------
File renamed without changes.
15 changes: 15 additions & 0 deletions hdq.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ type cachedGetter interface {
Cached() int
}

// NodeSet represents a set of nodes.
type NodeSet struct {
Data NodeEnum
Err error
Expand Down Expand Up @@ -83,6 +84,8 @@ func Source(r interface{}) (ret NodeSet) {
return New(r)
case io.Reader:
return New(v)
case NodeSet: // input is a node set
return v

Check warning on line 88 in hdq.go

View check run for this annotation

Codecov / codecov/patch

hdq.go#L87-L88

Added lines #L87 - L88 were not covered by tests
default:
panic("unsupport source type")
}
Expand Down Expand Up @@ -167,6 +170,7 @@ func (p *fixNodes) Cached() int {
return len(p.nodes)
}

// Nodes creates a node set from the given nodes.
func Nodes(nodes ...*html.Node) (ret NodeSet) {
return NodeSet{Data: &fixNodes{nodes}}
}
Expand Down Expand Up @@ -204,6 +208,7 @@ func anyForEach(p *html.Node, filter func(node *html.Node) error) error {
return nil
}

// Any returns the all nodes as a node set.
func (p NodeSet) Any() (ret NodeSet) {
if p.Err != nil {
return p
Expand Down Expand Up @@ -258,10 +263,12 @@ func parentLevelForEach(p *html.Node, level int, filter func(node *html.Node) er
return filter(p)
}

// Child returns the child node set. It is equivalent to ChildN(1).
func (p NodeSet) Child() (ret NodeSet) {
return p.ChildN(1)
}

// ChildN returns the child node set at the given level.
func (p NodeSet) ChildN(level int) (ret NodeSet) {
if p.Err != nil || level == 0 {
return p
Expand All @@ -272,14 +279,17 @@ func (p NodeSet) ChildN(level int) (ret NodeSet) {
return NodeSet{Data: &parentLevelNodes{p.Data, level}}
}

// Parent returns the parent node set. It is equivalent to ParentN(1).
func (p NodeSet) Parent() (ret NodeSet) {
return p.ChildN(-1)
}

// ParentN returns the parent node set at the given level.
func (p NodeSet) ParentN(level int) (ret NodeSet) {
return p.ChildN(-level)
}

// One returns the first node as a node set.
func (p NodeSet) One() (ret NodeSet) {
if _, ok := p.Data.(oneNode); ok {
return p
Expand Down Expand Up @@ -459,6 +469,7 @@ func (p *matchedNodes) ForEach(filter func(node *html.Node) error) {
})
}

// Match returns the matched node set.
func (p NodeSet) Match(filter func(node *html.Node) bool) (ret NodeSet) {
if p.Err != nil {
return p
Expand Down Expand Up @@ -497,6 +508,8 @@ func (p NodeSet) ChildrenAsText(doReplace bool) (ret NodeSet) {

// -----------------------------------------------------------------------------

// CollectOne returns the first node.
// If `exactly` is true, it will return an error if there are more than one node.
func (p NodeSet) CollectOne__1(exactly bool) (item *html.Node, err error) {
if p.Err != nil {
return nil, p.Err
Expand All @@ -520,10 +533,12 @@ func (p NodeSet) CollectOne__1(exactly bool) (item *html.Node, err error) {
return
}

// CollectOne returns the first node.
func (p NodeSet) CollectOne__0() (item *html.Node, err error) {
return p.CollectOne__1(false)
}

// Collect returns all nodes.
func (p NodeSet) Collect() (items []*html.Node, err error) {
if p.Err != nil {
return nil, p.Err
Expand Down
4 changes: 2 additions & 2 deletions hdq_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ import (
"testing"

"github.com/goplus/hdq"
"github.com/goplus/hdq/fetcher/torch"
"github.com/goplus/hdq/hdqtest"
"github.com/goplus/hdq/pysig/torch"

repos "github.com/goplus/hdq/tutorial/02-GithubRepos"
)
Expand All @@ -39,5 +39,5 @@ func TestGithub(t *testing.T) {
}

func TestTorch(t *testing.T) {
hdqtest.FromDir(t, "", "./pysig/torch/_testdata", torch.New)
hdqtest.FromDir(t, "", "./fetcher/torch/_testdata", torch.New)
}
16 changes: 3 additions & 13 deletions hdqtest/hdqtest.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,12 @@ import (
"strings"
"testing"

"github.com/goplus/hdq"
"github.com/goplus/hdq/fetcher"
)

// func(doc hdq.NodeSet) any
type Converter = any

// FromDir tests all html files in a directory.
// optional params: [filename, scheme]
func FromDir(t *testing.T, sel, relDir string, conv Converter, params ...string) {
func FromDir(t *testing.T, sel, relDir string, conv fetcher.Conv, params ...string) {
dir, err := os.Getwd()
if err != nil {
t.Fatal("Getwd failed:", err)
Expand Down Expand Up @@ -73,16 +70,9 @@ func testFrom(t *testing.T, pkgDir, sel string, conv reflect.Value, fname, schem
t.Fatal("ReadFile failed:", err)
}
expected := string(b)
ret := ConvFile(in, conv)
ret := fetcher.Convert(conv, in)
retb, _ := json.MarshalIndent(ret, "", "\t")
if v := string(retb); v != expected {
t.Fatalf("\n==> got:\n%s\n==> expected:\n%s\n", v, expected)
}
}

// ConvFile converts a html source to an object.
func ConvFile(in any, conv reflect.Value) any {
doc := reflect.ValueOf(hdq.Source(in))
out := conv.Call([]reflect.Value{doc})
return out[0].Interface()
}
Loading

0 comments on commit c70c9d7

Please sign in to comment.