Skip to content

Commit

Permalink
add piece attr output
Browse files Browse the repository at this point in the history
  • Loading branch information
himcc committed Jan 7, 2023
1 parent 5f14a39 commit 7461e0c
Show file tree
Hide file tree
Showing 4 changed files with 137 additions and 14 deletions.
7 changes: 5 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,11 @@ Options:
-t, --text Text output for none-block selection mode
-R, --Raw Raw text output, no trimming of leading and trailing white space
-p, --piece sub CSS selectors within -css to split that block up into pieces
format: PieceName=[RAW:]selector_string
RAW: will return the selected as-is; else the text will be returned
format: PieceName=[OutputStyle:]selector_string
OutputStyle:
RAW : will return the selected as-is
attr[xxx] : will return the value of an attribute named xxx
else the text will be returned
-d, --delimiter delimiter for pieces csv output [= ]
-w, --wrap-html wrap up the output with html tags
-y, --style style component within the wrapped html head
Expand Down
2 changes: 1 addition & 1 deletion cascadia_cliDef.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ type rootT struct {
CSS []string `cli:"*c,css" usage:"CSS selectors (can provide more if not using --piece)"`
TextOut bool `cli:"t,text" usage:"Text output for none-block selection mode"`
TextRaw bool `cli:"R,Raw" usage:"Raw text output, no trimming of leading and trailing white space"`
Piece MapStringString `cli:"p,piece" usage:"sub CSS selectors within -css to split that block up into pieces\n\t\t\tformat: PieceName=[RAW:]selector_string\n\t\t\tRAW: will return the selected as-is; else the text will be returned"`
Piece MapStringString `cli:"p,piece" usage:"sub CSS selectors within -css to split that block up into pieces\n\t\t\tformat: PieceName=[OutputStyle:]selector_string\n\t\t\tOutputStyle:\n\t\t\t\tRAW : will return the selected as-is\n\t\t\t\tattr[xxx] : will return the value of an attribute named xxx \n\t\t\telse the text will be returned"`
Deli string `cli:"d,delimiter" usage:"delimiter for pieces csv output" dft:"\t"`
WrapHTML bool `cli:"w,wrap-html" usage:"wrap up the output with html tags"`
Style string `cli:"y,style" usage:"style component within the wrapped html head"`
Expand Down
45 changes: 35 additions & 10 deletions cascadia_main.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,23 @@ import (
// Constant and data type/structure definitions

const (
IsRaw = "RAW:"
IsRaw = "RAW"
WrapHTMLEnd = `</body>`
)

type OutputStyle int

const (
OutputStyleRAW OutputStyle = iota
OutputStyleATTR
OutputStyleTEXT
)

type MapStringString struct {
Keys []string
Values map[string]string
Raw map[string]bool
Keys []string
Values map[string]string
OutputStyles map[string]OutputStyle
AttrName map[string]string
}

// The OptsT type defines all the configurable options from cli.
Expand Down Expand Up @@ -174,10 +183,14 @@ func Cascadia(bi io.Reader, bw io.Writer, Opts OptsT) error {
//fmt.Printf("] #%d: %s\n", index, item.Text())
for _, key := range piece.Keys {
//fmt.Printf("] %s: %s\n", key, piece.Values[key])
if piece.Raw[key] {
switch piece.OutputStyles[key] {
case OutputStyleRAW:
html.Render(bw, item.Find(piece.Values[key]).Get(0))
fmt.Fprintf(bw, deli)
} else {
case OutputStyleATTR:
fmt.Fprintf(bw, "%s%s",
item.Find(piece.Values[key]).AttrOr(piece.AttrName[key], ""), deli)
case OutputStyleTEXT:
fmt.Fprintf(bw, "%s%s",
item.Find(piece.Values[key]).Contents().Text(), deli)
}
Expand All @@ -202,17 +215,29 @@ func (MapStringString) DecodeSlice() {}
func (m *MapStringString) Decode(s string) error {
if (m.Values) == nil {
m.Values = make(map[string]string)
m.Raw = make(map[string]bool)
m.OutputStyles = make(map[string]OutputStyle)
m.AttrName = make(map[string]string)
}
matches := regexp.MustCompile("(.*)=(.*)").FindStringSubmatch(s)
if len(matches) < 2 {
return errors.New("format error. To get help, run: " + progname)
}
key := matches[1]
val := matches[2]
if len(val) >= 4 && val[:4] == IsRaw {
m.Raw[key] = true
val = val[4:]
index := strings.Index(val, ":")
if index > 0 {
style := val[:index]
val = val[index+1:]
if style == IsRaw {
m.OutputStyles[key] = OutputStyleRAW
} else if strings.HasPrefix(style, "attr[") && strings.HasSuffix(style, "]") {
m.OutputStyles[key] = OutputStyleATTR
m.AttrName[key] = style[5 : len(style)-1]
} else {
m.OutputStyles[key] = OutputStyleTEXT
}
} else {
m.OutputStyles[key] = OutputStyleTEXT
}
m.Keys = append(m.Keys, key)
m.Values[key] = val
Expand Down
97 changes: 96 additions & 1 deletion cascadia_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ func TestSelectors(t *testing.T) {
}
}

////////////////////////////////////////////////////////////////////////////
// //////////////////////////////////////////////////////////////////////////
// The following is taken from
// https://github.com/andybalholm/cascadia/blob/c56252c33997d9b9991f4c1e3b5fbc82d6d656b3/selector_test.go
type selectorTest struct {
Expand Down Expand Up @@ -549,3 +549,98 @@ var selectorTests = []selectorTest{
},
},
}

type PieceAttrTest struct {
HTML, selector string
results []string
piece MapStringString
}

var PieceAttrTests = []PieceAttrTest{
{
`<ul>
<li><a id="a1" href="http://www.google.com/finance"/>
<li><a id="a2" href="http://finance.yahoo.com/"/>
<li><a id="a3" href="https://www.google.com/news"></a>
<li><a id="a4" href="http://news.yahoo.com"/>
</ul>`,
`li`,
[]string{
`id,`,
`a1,`,
`a2,`,
`a3,`,
`a4,`,
},
MapStringString{
[]string{"id"},
map[string]string{"id": "a"},
map[string]OutputStyle{"id": OutputStyleATTR},
map[string]string{"id": "id"},
},
},
{
`<ul>
<li><a id="a1" href="http://www.google.com/finance"/>
<li><a id="a2" href="http://finance.yahoo.com/"/>
<li><a id="a3" href="https://www.google.com/news"></a>
<li><a id="a4" href="http://news.yahoo.com"/>
</ul>`,
`li`,
[]string{
`href2,`,
`,`,
`,`,
`,`,
`,`,
},
MapStringString{
[]string{"href2"},
map[string]string{"href2": "a"},
map[string]OutputStyle{"href2": OutputStyleATTR},
map[string]string{"href2": "href2"},
},
},
{
`<ul>
<li><a id="a1" href="http://www.google.com/finance"/>
<li><a id="a2" href="http://finance.yahoo.com/"/>
<li><a id="a3" href="https://www.google.com/news"></a>
<li><a id="a4" href="http://news.yahoo.com"/>
</ul>`,
`li`,
[]string{
`href,`,
`http://www.google.com/finance,`,
`http://finance.yahoo.com/,`,
`https://www.google.com/news,`,
`http://news.yahoo.com,`,
},
MapStringString{
[]string{"href"},
map[string]string{"href": "a"},
map[string]OutputStyle{"href": OutputStyleATTR},
map[string]string{"href": "href"},
},
},
}

func TestPieceAttr(t *testing.T) {
for _, test := range PieceAttrTests {
buf := bytes.NewBufferString("")
Opts.CSS, Opts.Piece, Opts.Deli,
Opts.WrapHTML, Opts.TextOut, Opts.TextRaw, Opts.Quiet =
[]string{test.selector}, test.piece, ",",
false, false, false, false
Cascadia(strings.NewReader(test.HTML), buf, Opts)
got := buf.String()
if len(got) == 0 && len(test.results) == 0 {
// correct
continue
}
want := strings.Join(test.results, "\n") + "\n"
if got != want {
t.Errorf("wanted %s, got %s instead", want, got)
}
}
}

0 comments on commit 7461e0c

Please sign in to comment.