diff --git a/tutorial/02-GithubRepos/data.zip b/_testdata/github/repos/data.zip similarity index 100% rename from tutorial/02-GithubRepos/data.zip rename to _testdata/github/repos/data.zip diff --git a/tutorial/02-GithubRepos/repos_test.gop b/_testdata/github/repos/out.json similarity index 95% rename from tutorial/02-GithubRepos/repos_test.gop rename to _testdata/github/repos/out.json index dca09bb..28ba01b 100644 --- a/tutorial/02-GithubRepos/repos_test.gop +++ b/_testdata/github/repos/out.json @@ -1,13 +1,4 @@ -package repos - -import ( - "encoding/json" - "testing" - - "github.com/goplus/hdq" -) - -const outTestNew = `{ +{ "Repos": [ { "Repo": "/xushiwei/linguist", @@ -251,17 +242,4 @@ const outTestNew = `{ } ], "Next": "https://github.com/xushiwei?after=Y3Vyc29yOnYyOpK0MjAyMC0wNS0xMlQxMToxNTozN1rOD7hDgA%3D%3D\u0026tab=repositories" -}` - -func TestNew(t *testing.T) { - doc := hdq.Source("zip://data.zip#index.htm") - ret := New(doc) - b, err := json.MarshalIndent(ret, "", "\t") - if err != nil { - t.Fatal("json.MarshalIndent:", err) - } - out := string(b) - if out != outTestNew { - t.Fatal("TestNew failed:", out) - } -} +} \ No newline at end of file diff --git a/hdq_test.go b/hdq_test.go index 1bd5524..05f6d8c 100644 --- a/hdq_test.go +++ b/hdq_test.go @@ -21,6 +21,8 @@ import ( "github.com/goplus/hdq" "github.com/goplus/hdq/hdqtest" "github.com/goplus/hdq/pysig/torch" + + repos "github.com/goplus/hdq/tutorial/02-GithubRepos" ) func textOf(doc hdq.NodeSet) (ret string) { @@ -32,6 +34,10 @@ func TestText(t *testing.T) { hdqtest.FromDir(t, "", "./_testdata/text", textOf) } -func TestTestdata(t *testing.T) { +func TestGithub(t *testing.T) { + hdqtest.FromDir(t, "", "./_testdata/github", repos.New, "data.zip#index.htm", "zip") +} + +func TestTorch(t *testing.T) { hdqtest.FromDir(t, "", "./pysig/torch/_testdata", torch.New) } diff --git a/hdqtest/hdqtest.go b/hdqtest/hdqtest.go index f15db7f..1f3a27b 100644 --- a/hdqtest/hdqtest.go +++ b/hdqtest/hdqtest.go @@ -31,7 +31,8 @@ import ( type Converter = any // FromDir tests all html files in a directory. -func FromDir(t *testing.T, sel, relDir string, conv Converter) { +// optional params: [filename, scheme] +func FromDir(t *testing.T, sel, relDir string, conv Converter, params ...string) { dir, err := os.Getwd() if err != nil { t.Fatal("Getwd failed:", err) @@ -42,23 +43,30 @@ func FromDir(t *testing.T, sel, relDir string, conv Converter) { t.Fatal("ReadDir failed:", err) } vConv := reflect.ValueOf(conv) + scheme, fname := "", "/in.html" + if len(params) > 0 { + fname = "/" + params[0] + if len(params) > 1 { + scheme = params[1] + ":" + } + } for _, fi := range fis { name := fi.Name() if !fi.IsDir() || strings.HasPrefix(name, "_") { continue } t.Run(name, func(t *testing.T) { - testFrom(t, dir+"/"+name, sel, vConv) + testFrom(t, dir+"/"+name, sel, vConv, fname, scheme) }) } } -func testFrom(t *testing.T, pkgDir, sel string, conv reflect.Value) { +func testFrom(t *testing.T, pkgDir, sel string, conv reflect.Value, fname, scheme string) { if sel != "" && !strings.Contains(pkgDir, sel) { return } log.Println("Parsing", pkgDir) - in := pkgDir + "/in.html" + in := scheme + pkgDir + fname out := pkgDir + "/out.json" b, err := os.ReadFile(out) if err != nil { diff --git a/stream/http/httpstrm.go b/stream/http/httpstrm.go index be0dcc3..e01d0fc 100644 --- a/stream/http/httpstrm.go +++ b/stream/http/httpstrm.go @@ -30,16 +30,8 @@ var ( // ------------------------------------------------------------------------------------- -// Open opens a zipped file object. -func Open(file string) (io.ReadCloser, error) { - return httpOpen("http://" + file) -} - -func Opens(file string) (io.ReadCloser, error) { - return httpOpen("https://" + file) -} - -func httpOpen(url string) (io.ReadCloser, error) { +// Open opens a http file object. +func Open(url string) (io.ReadCloser, error) { resp, err := httpGet(url) if err != nil { return nil, err @@ -62,9 +54,8 @@ func httpGet(url string) (resp *http.Response, err error) { } func init() { - // http://path, https://path - stream.RegisterSchema("http", Open) - stream.RegisterSchema("https", Opens) + stream.Register("http", Open) + stream.Register("https", Open) } // ------------------------------------------------------------------------------------- diff --git a/stream/inline/inline.go b/stream/inline/inline.go new file mode 100644 index 0000000..721e683 --- /dev/null +++ b/stream/inline/inline.go @@ -0,0 +1,42 @@ +/* +Copyright 2024 The GoPlus Authors (goplus.org) +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package inline + +import ( + "io" + "strings" + + "github.com/goplus/hdq/stream" +) + +type nilCloser struct { + io.Reader +} + +func (p *nilCloser) Close() error { + return nil +} + +// Open opens a inline text object. +func Open(url string) (io.ReadCloser, error) { + file := strings.TrimPrefix(url, "inline:") + r := strings.NewReader(file) + return &nilCloser{r}, nil +} + +func init() { + stream.Register("inline", Open) +} diff --git a/stream/stream.go b/stream/stream.go index 0bfcf4c..d91ebc5 100644 --- a/stream/stream.go +++ b/stream/stream.go @@ -18,43 +18,47 @@ package stream import ( "errors" "io" + "io/fs" "os" "strings" ) -// ------------------------------------------------------------------------------------- - -func SplitSchema(path string) (schema, file string) { - idx := strings.IndexAny(path, ":/\\ ") - if idx < 0 || path[idx] != ':' { - return "", path - } - schema, file = path[:idx], path[idx+1:] - file = strings.TrimPrefix(file, "//") - return -} +var ( + ErrUnknownScheme = errors.New("unknown scheme") +) // ------------------------------------------------------------------------------------- type OpenFunc = func(file string) (io.ReadCloser, error) var ( - openSchemas = map[string]OpenFunc{} + openers = map[string]OpenFunc{} ) -func RegisterSchema(schema string, open OpenFunc) { - openSchemas[schema] = open +// Register registers a scheme with an open function. +func Register(scheme string, open OpenFunc) { + openers[scheme] = open } -func Open(path string) (io.ReadCloser, error) { - schema, file := SplitSchema(path) - if schema == "" { - return os.Open(path) +func Open(url string) (io.ReadCloser, error) { + scheme := schemeOf(url) + if scheme == "" { + return os.Open(url) + } + if open, ok := openers[scheme]; ok { + return open(url) } - if open, ok := openSchemas[schema]; ok { - return open(file) + return nil, &fs.PathError{Op: "hdq/stream.Open", Err: ErrUnknownScheme, Path: url} +} + +func schemeOf(url string) (scheme string) { + pos := strings.IndexAny(url, ":/") + if pos > 0 { + if url[pos] == ':' { + return url[:pos] + } } - return nil, errors.New("stream.Open: unsupported schema - " + schema) + return "" } // ------------------------------------------------------------------------------------- diff --git a/stream/stream_test.go b/stream/stream_test.go index e1754aa..f30b745 100644 --- a/stream/stream_test.go +++ b/stream/stream_test.go @@ -13,30 +13,18 @@ See the License for the specific language governing permissions and limitations under the License. */ -package stream +package stream_test import ( "io" - "strings" "testing" -) - -type nilCloser struct { - io.Reader -} -func (p *nilCloser) Close() error { - return nil -} - -func inlOpen(file string) (io.ReadCloser, error) { - r := strings.NewReader(file) - return &nilCloser{r}, nil -} + "github.com/goplus/hdq/stream" + _ "github.com/goplus/hdq/stream/inline" +) func TestBasic(t *testing.T) { - RegisterSchema("inl", inlOpen) - f, err := Open("inl://hello") + f, err := stream.Open("inline:hello") if err != nil { t.Fatal("Open failed:", err) } @@ -49,15 +37,15 @@ func TestBasic(t *testing.T) { } } -func TestUnknownSchema(t *testing.T) { - _, err := Open("bad://foo") - if err == nil || err.Error() != "stream.Open: unsupported schema - bad" { +func TestUnknownScheme(t *testing.T) { + _, err := stream.Open("bad://foo") + if err == nil || err.Error() != "hdq/stream.Open bad://foo: unknown scheme" { t.Fatal("Open failed:", err) } } func TestOpenFile(t *testing.T) { - _, err := Open("/bin/not-exists/foo") + _, err := stream.Open("/bin/not-exists/foo") if err == nil { t.Fatal("Open local file success?") } diff --git a/stream/zip/zipstrm.go b/stream/zip/zipstrm.go index b9d36e0..9a89944 100644 --- a/stream/zip/zipstrm.go +++ b/stream/zip/zipstrm.go @@ -37,7 +37,8 @@ func (p *readCloser) Close() error { } // Open opens a zipped file object. -func Open(file string) (io.ReadCloser, error) { +func Open(url string) (io.ReadCloser, error) { + file := strings.TrimPrefix(url, "zip:") pos := strings.Index(file, "#") if pos <= 0 { return nil, syscall.EINVAL @@ -60,8 +61,8 @@ func Open(file string) (io.ReadCloser, error) { } func init() { - // zip://file#index.htm - stream.RegisterSchema("zip", Open) + // zip:file#index.htm + stream.Register("zip", Open) } // ------------------------------------------------------------------------------------- diff --git a/tutorial/02-GithubRepos/gop_autogen_test.go b/tutorial/02-GithubRepos/gop_autogen_test.go deleted file mode 100644 index 1820925..0000000 --- a/tutorial/02-GithubRepos/gop_autogen_test.go +++ /dev/null @@ -1,276 +0,0 @@ -// Code generated by gop (Go+); DO NOT EDIT. - -package repos - -import ( - "encoding/json" - hdq1 "github.com/goplus/hdq" - "testing" -) - -const outTestNew = `{ - "Repos": [ - { - "Repo": "/xushiwei/linguist", - "ForkedFrom": "/github/linguist", - "Title": "Language Savant. If your repository's language is being reported incorrectly, send us a pull request!\n", - "Language": "Ruby", - "UpdateTime": "2021-08-08T17:39:30Z", - "Forks": 3221 - }, - { - "Repo": "/xushiwei/x", - "ForkedFrom": "/qiniu/x", - "Title": "Extension of go standard library\n", - "Language": "Go", - "UpdateTime": "2021-08-04T16:37:08Z", - "Forks": 16 - }, - { - "Repo": "/xushiwei/fyne", - "ForkedFrom": "/fyne-io/fyne", - "Title": "Cross platform GUI in Go based on Material Design\n", - "Language": "Go", - "UpdateTime": "2021-07-27T11:26:17Z", - "Forks": 726 - }, - { - "Repo": "/xushiwei/qlang", - "ForkedFrom": "/goplus/gop", - "Title": "Q Language - A script language for Go\n", - "Language": "Go", - "UpdateTime": "2021-07-20T22:00:43Z", - "Forks": 384 - }, - { - "Repo": "/xushiwei/winfsp", - "ForkedFrom": "/billziss-gh/winfsp", - "Title": "Windows File System Proxy - FUSE for Windows\n", - "Language": "C", - "UpdateTime": "2021-02-03T00:51:42Z", - "Forks": 320 - }, - { - "Repo": "/xushiwei/embeddedgo", - "ForkedFrom": "/embeddedgo/go", - "Title": "The Go programming language with support for bare-matal programing\n", - "Language": "Go", - "UpdateTime": "2020-12-31T16:37:34Z", - "Forks": 13037 - }, - { - "Repo": "/xushiwei/oak", - "ForkedFrom": "/oakmound/oak", - "Title": "A pure Go game engine\n", - "Language": "Go", - "UpdateTime": "2020-08-04T01:28:32Z", - "Forks": 58 - }, - { - "Repo": "/xushiwei/GhostDB", - "ForkedFrom": "/jakekgrog/GhostDB", - "Title": "GhostDB is a distributed, in-memory, general purpose key-value data store that delivers microsecond performance at any scale.\n", - "Language": "Go", - "UpdateTime": "2020-08-01T10:58:55Z", - "Forks": 35 - }, - { - "Repo": "/xushiwei/DeepLearning-500-questions", - "ForkedFrom": "/scutan90/DeepLearning-500-questions", - "Title": "深度学习500问,以问答形式对常用的概率知识、线性代数、机器学习、深度学习、计算机视觉等热点问题进行阐述,以帮助自己及有需要的读者。 全书分为18个章节,50余万字。由于水平有限,书中不妥之处恳请广大读者批评指正。 未完待续............ 如有意合作,联系scutjy2015@163.com 版权所有,违权必究 Tan 2018.06\n", - "Language": "", - "UpdateTime": "2020-07-20T12:06:41Z", - "Forks": 14227 - }, - { - "Repo": "/xushiwei/simdjson-go", - "ForkedFrom": "/minio/simdjson-go", - "Title": "Golang port of simdjson: parsing gigabytes of JSON per second\n", - "Language": "Go", - "UpdateTime": "2020-06-26T22:23:56Z", - "Forks": 59 - }, - { - "Repo": "/xushiwei/Paddle", - "ForkedFrom": "/PaddlePaddle/Paddle", - "Title": "PArallel Distributed Deep LEarning: Machine Learning Framework from Industrial Practice (『飞桨』核心框架,深度学习\u0026机器学习高性能单机、分布式训练和跨平台部署)\n", - "Language": "C++", - "UpdateTime": "2020-06-21T15:35:31Z", - "Forks": 3953 - }, - { - "Repo": "/xushiwei/gorgonia", - "ForkedFrom": "/gorgonia/gorgonia", - "Title": "Gorgonia is a library that helps facilitate machine learning in Go.\n", - "Language": "Go", - "UpdateTime": "2020-06-20T08:32:36Z", - "Forks": 359 - }, - { - "Repo": "/xushiwei/caire", - "ForkedFrom": "/esimov/caire", - "Title": "Content aware image resize library\n", - "Language": "Go", - "UpdateTime": "2020-06-18T08:10:21Z", - "Forks": 360 - }, - { - "Repo": "/xushiwei/goplus-play", - "ForkedFrom": "/visualfc/goplus-play", - "Title": "Playground of the Go+ language\n", - "Language": "JavaScript", - "UpdateTime": "2020-06-17T15:40:34Z", - "Forks": 2 - }, - { - "Repo": "/xushiwei/bpl", - "ForkedFrom": "/qiniu/bpl", - "Title": "Binary Processing Language\n", - "Language": "Go", - "UpdateTime": "2020-06-07T14:40:37Z", - "Forks": 28 - }, - { - "Repo": "/xushiwei/c2goasm", - "ForkedFrom": "/minio/c2goasm", - "Title": "C to Go Assembly\n", - "Language": "Go", - "UpdateTime": "2020-06-04T02:52:47Z", - "Forks": 92 - }, - { - "Repo": "/xushiwei/asm2plan9s", - "ForkedFrom": "/minio/asm2plan9s", - "Title": "Tool to generate BYTE sequences for Go assembly as generated by YASM\n", - "Language": "Go", - "UpdateTime": "2020-06-04T00:32:38Z", - "Forks": 31 - }, - { - "Repo": "/xushiwei/scipy", - "ForkedFrom": "/scipy/scipy", - "Title": "Scipy library main repository\n", - "Language": "Python", - "UpdateTime": "2020-05-24T16:58:25Z", - "Forks": 3826 - }, - { - "Repo": "/xushiwei/sympy", - "ForkedFrom": "/sympy/sympy", - "Title": "A computer algebra system written in pure Python\n", - "Language": "Python", - "UpdateTime": "2020-05-24T12:35:57Z", - "Forks": 3435 - }, - { - "Repo": "/xushiwei/matplotlib", - "ForkedFrom": "/matplotlib/matplotlib", - "Title": "matplotlib: plotting with Python\n", - "Language": "Python", - "UpdateTime": "2020-05-24T00:49:40Z", - "Forks": 5961 - }, - { - "Repo": "/xushiwei/notebook", - "ForkedFrom": "/jupyter/notebook", - "Title": "Jupyter Interactive Notebook\n", - "Language": "JavaScript", - "UpdateTime": "2020-05-23T00:20:16Z", - "Forks": 3473 - }, - { - "Repo": "/xushiwei/jax", - "ForkedFrom": "/google/jax", - "Title": "Composable transformations of Python+NumPy programs: differentiate, vectorize, JIT to GPU/TPU, and more\n", - "Language": "Python", - "UpdateTime": "2020-05-22T06:56:01Z", - "Forks": 1288 - }, - { - "Repo": "/xushiwei/flax", - "ForkedFrom": "/google/flax", - "Title": "Flax is a neural network library for JAX that is designed for flexibility.\n", - "Language": "Python", - "UpdateTime": "2020-05-21T22:57:17Z", - "Forks": 249 - }, - { - "Repo": "/xushiwei/liner", - "ForkedFrom": "/peterh/liner", - "Title": "Pure Go line editor with history, inspired by linenoise\n", - "Language": "Go", - "UpdateTime": "2020-05-16T16:24:21Z", - "Forks": 105 - }, - { - "Repo": "/xushiwei/query", - "ForkedFrom": "/couchbase/query", - "Title": "Query engine.\n", - "Language": "Go", - "UpdateTime": "2020-05-15T23:14:38Z", - "Forks": 41 - }, - { - "Repo": "/xushiwei/pandas", - "ForkedFrom": "/pandas-dev/pandas", - "Title": "Flexible and powerful data analysis / manipulation library for Python, providing labeled data structures similar to R data.frame objects, statistical functions, and much more\n", - "Language": "Python", - "UpdateTime": "2020-05-13T20:43:46Z", - "Forks": 12866 - }, - { - "Repo": "/xushiwei/hugo", - "ForkedFrom": "/gohugoio/hugo", - "Title": "The world’s fastest framework for building websites.\n", - "Language": "Go", - "UpdateTime": "2020-05-13T19:44:45Z", - "Forks": 6086 - }, - { - "Repo": "/xushiwei/netlify-cms", - "ForkedFrom": "/netlify/netlify-cms", - "Title": "A Git-based CMS for Static Site Generators\n", - "Language": "JavaScript", - "UpdateTime": "2020-05-13T16:47:59Z", - "Forks": 2446 - }, - { - "Repo": "/xushiwei/presto", - "ForkedFrom": "/prestodb/presto", - "Title": "The official home of the Presto distributed SQL query engine for big data\n", - "Language": "Java", - "UpdateTime": "2020-05-13T01:19:32Z", - "Forks": 4234 - }, - { - "Repo": "/xushiwei/gonum", - "ForkedFrom": "/gonum/gonum", - "Title": "Gonum is a set of numeric libraries for the Go programming language. It contains libraries for matrices, statistics, optimization, and more\n", - "Language": "Go", - "UpdateTime": "2020-05-12T11:15:37Z", - "Forks": 409 - } - ], - "Next": "https://github.com/xushiwei?after=Y3Vyc29yOnYyOpK0MjAyMC0wNS0xMlQxMToxNTozN1rOD7hDgA%3D%3D\u0026tab=repositories" -}` -//line tutorial/02-GithubRepos/repos_test.gop:256:1 -func TestNew(t *testing.T) { -//line tutorial/02-GithubRepos/repos_test.gop:257:1 - doc := hdq1.Source("zip://data.zip#index.htm") -//line tutorial/02-GithubRepos/repos_test.gop:258:1 - ret := New(doc) -//line tutorial/02-GithubRepos/repos_test.gop:259:1 - b, err := json.MarshalIndent(ret, "", "\t") -//line tutorial/02-GithubRepos/repos_test.gop:260:1 - if err != nil { -//line tutorial/02-GithubRepos/repos_test.gop:261:1 - t.Fatal("json.MarshalIndent:", err) - } -//line tutorial/02-GithubRepos/repos_test.gop:263:1 - out := string(b) -//line tutorial/02-GithubRepos/repos_test.gop:264:1 - if out != outTestNew { -//line tutorial/02-GithubRepos/repos_test.gop:265:1 - t.Fatal("TestNew failed:", out) - } -}