Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
jakopako committed Sep 13, 2022
1 parent 8b7497e commit a07aa37
Show file tree
Hide file tree
Showing 6 changed files with 97 additions and 10 deletions.
65 changes: 58 additions & 7 deletions generate/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,18 @@ import (
"strconv"
"strings"

"github.com/gdamore/tcell/v2"
"github.com/jakopako/goskyr/scraper"
"github.com/jakopako/goskyr/utils"
"github.com/rivo/tview"
"golang.org/x/net/html"
)

type locationProps struct {
loc scraper.ElementLocation
count int
examples []string
selected bool
}

type locationManager []*locationProps
Expand Down Expand Up @@ -152,7 +155,7 @@ outer:
}
}

func GetDynamicFieldsConfig(s *scraper.Scraper, minOcc int) error {
func GetDynamicFieldsConfig(s *scraper.Scraper, minOcc int, showDetails bool) error {
if s.URL == "" {
return errors.New("URL field cannot be empty")
}
Expand Down Expand Up @@ -262,12 +265,7 @@ parse:
return locMan[p].loc.Selector > locMan[q].loc.Selector
})

colorReset := "\033[0m"
colorGreen := "\033[32m"
colorBlue := "\033[34m"
for i, e := range locMan {
fmt.Printf("%sfield [%d]%s\n %slocation:%s %+v\n %sexamples:%s\n\t%s\n\n", colorGreen, i, colorReset, colorBlue, colorReset, e.loc, colorBlue, colorReset, strings.Join(e.examples, "\n\t"))
}
showFieldsTable(locMan, showDetails)

reader := bufio.NewReader(os.Stdin)
fmt.Println("please select one or more of the suggested fields by typing the according numbers separated by spaces:")
Expand All @@ -294,3 +292,56 @@ parse:
}
return fmt.Errorf("no fields found")
}

func showFieldsTable(locMan locationManager, showDetails bool) {
app := tview.NewApplication()
table := tview.NewTable().SetBorders(true)
cols, rows := 5, len(locMan)+1
for r := 0; r < rows; r++ {
for c := 0; c < cols; c++ {
color := tcell.ColorWhite
if c < 1 || r < 1 {
if c < 1 && r > 0 {
color = tcell.ColorGreen
table.SetCell(r, c, tview.NewTableCell(fmt.Sprintf("field [%d]", r-1)).
SetTextColor(color).
SetAlign(tview.AlignCenter))
} else if r == 0 && c > 0 {
color = tcell.ColorBlue
table.SetCell(r, c, tview.NewTableCell(fmt.Sprintf("example [%d]", c-1)).
SetTextColor(color).
SetAlign(tview.AlignCenter))
} else {
table.SetCell(r, c,
tview.NewTableCell("").
SetTextColor(color).
SetAlign(tview.AlignCenter))
}
} else {
ss := utils.ShortenString(locMan[r-1].examples[c-1], 50)
table.SetCell(r, c,
tview.NewTableCell(ss).
SetTextColor(color).
SetAlign(tview.AlignCenter))
}
}
}
table.Select(1, 1).SetFixed(1, 1).SetDoneFunc(func(key tcell.Key) {
if key == tcell.KeyEscape {
app.Stop()
}
if key == tcell.KeyEnter {
table.SetSelectable(true, false)
}
}).SetSelectedFunc(func(row int, column int) {
locMan[row-1].selected = !locMan[row-1].selected
if locMan[row-1].selected {
table.GetCell(row, 0).SetTextColor(tcell.ColorRed)
} else {
table.GetCell(row, 0).SetTextColor(tcell.ColorGreen)
}
})
if err := app.SetRoot(table, true).SetFocus(table).Run(); err != nil {
panic(err)
}
}
9 changes: 9 additions & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,15 @@ require (
require (
github.com/BurntSushi/toml v1.1.0 // indirect
github.com/andybalholm/cascadia v1.3.1 // indirect
github.com/gdamore/encoding v1.0.0 // indirect
github.com/gdamore/tcell/v2 v2.4.1-0.20210905002822-f057f0a857a1 // indirect
github.com/joho/godotenv v1.4.0 // indirect
github.com/lucasb-eyer/go-colorful v1.2.0 // indirect
github.com/mattn/go-runewidth v0.0.13 // indirect
github.com/rivo/tview v0.0.0-20220911190240-55965cf21d8e // indirect
github.com/rivo/uniseg v0.4.2 // indirect
golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e // indirect
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 // indirect
golang.org/x/text v0.3.7 // indirect
olympos.io/encoding/edn v0.0.0-20201019073823-d3554ca0b0a3 // indirect
)
18 changes: 18 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,40 @@ github.com/PuerkitoBio/goquery v1.8.0 h1:PJTF7AmFCFKk1N6V6jmKfrNH9tV5pNE6lZMkG0g
github.com/PuerkitoBio/goquery v1.8.0/go.mod h1:ypIiRMtY7COPGk+I/YbZLbxsxn9g5ejnI2HSMtkjZvI=
github.com/andybalholm/cascadia v1.3.1 h1:nhxRkql1kdYCc8Snf7D5/D3spOX+dBgjA6u8x004T2c=
github.com/andybalholm/cascadia v1.3.1/go.mod h1:R4bJ1UQfqADjvDa4P6HZHLh/3OxWWEqc0Sk8XGwHqvA=
github.com/gdamore/encoding v1.0.0 h1:+7OoQ1Bc6eTm5niUzBa0Ctsh6JbMW6Ra+YNuAtDBdko=
github.com/gdamore/encoding v1.0.0/go.mod h1:alR0ol34c49FCSBLjhosxzcPHQbf2trDkoo5dl+VrEg=
github.com/gdamore/tcell/v2 v2.4.1-0.20210905002822-f057f0a857a1 h1:QqwPZCwh/k1uYqq6uXSb9TRDhTkfQbO80v8zhnIe5zM=
github.com/gdamore/tcell/v2 v2.4.1-0.20210905002822-f057f0a857a1/go.mod h1:Az6Jt+M5idSED2YPGtwnfJV0kXohgdCBPmHGSYc1r04=
github.com/goodsign/monday v1.0.0 h1:Yyk/s/WgudMbAJN6UWSU5xAs8jtNewfqtVblAlw0yoc=
github.com/goodsign/monday v1.0.0/go.mod h1:r4T4breXpoFwspQNM+u2sLxJb2zyTaxVGqUfTBjWOu8=
github.com/ilyakaznacheev/cleanenv v1.3.0 h1:RapuLclPPUbmdd5Bi5UXScwMEZA6+ZNLU5OW9itPjj0=
github.com/ilyakaznacheev/cleanenv v1.3.0/go.mod h1:i0owW+HDxeGKE0/JPREJOdSCPIyOnmh6C0xhWAkF/xA=
github.com/joho/godotenv v1.4.0 h1:3l4+N6zfMWnkbPEXKng2o2/MR5mSwTrBih4ZEkkz1lg=
github.com/joho/godotenv v1.4.0/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4=
github.com/lucasb-eyer/go-colorful v1.2.0 h1:1nnpGOrhyZZuNyfu1QjKiUICQ74+3FNCN69Aj6K7nkY=
github.com/lucasb-eyer/go-colorful v1.2.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0=
github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU=
github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/rivo/tview v0.0.0-20220911190240-55965cf21d8e h1:XsKimyZ6sBbEy3P5Nt1ml4fg4WYeIOuzoc0S0ye5OJ8=
github.com/rivo/tview v0.0.0-20220911190240-55965cf21d8e/go.mod h1:YX2wUZOcJGOIycErz2s9KvDaP0jnWwRCirQMPLPpQ+Y=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rivo/uniseg v0.4.2 h1:YwD0ulJSJytLpiaWua0sBDusfsCZohxjxzVTYjwxfV8=
github.com/rivo/uniseg v0.4.2/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
golang.org/x/net v0.0.0-20210916014120-12bc252f5db8/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/net v0.0.0-20220225172249-27dd8689420f h1:oA4XRj0qtSt8Yo1Zms0CUlsT3KG69V2UGQWPBxujDmc=
golang.org/x/net v0.0.0-20220225172249-27dd8689420f/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk=
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e h1:fLOSk5Q00efkSvAm+4xcoXD+RRmLmmulPn5I3Y9F2EM=
golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20201210144234-2321bbc49cbf/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 h1:JGgROgKl9N8DuW20oFS5gxc+lE67/N3FcwmBPMe7ArY=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
Expand Down
3 changes: 2 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ func main() {
// add flag to pass min nr of items for the generate flag.
generateConfig := flag.String("generate", "", "Needs an additional argument of the url whose config needs to be generated.")
m := flag.Int("min", 20, "The minimum number of events on a page. This is needed to filter out noise.")
d := flag.Bool("details", false, "Show details when presenting the different fields found with the generate flag.")

flag.Parse()

Expand All @@ -48,7 +49,7 @@ func main() {

if *generateConfig != "" {
s := &scraper.Scraper{URL: *generateConfig}
err := automate.GetDynamicFieldsConfig(s, *m)
err := automate.GetDynamicFieldsConfig(s, *m, *d)
if err != nil {
log.Fatal(err)
}
Expand Down
4 changes: 2 additions & 2 deletions scraper/scraper.go
Original file line number Diff line number Diff line change
Expand Up @@ -511,8 +511,8 @@ func getTextString(t *ElementLocation, s *goquery.Selection) (string, error) {
fieldString, err = extractStringRegex(&t.RegexExtract, fieldNode.Data)
if err == nil {
fieldString = strings.TrimSpace(fieldString)
if t.MaxLength > 0 && t.MaxLength < len(fieldString) {
fieldString = fieldString[:t.MaxLength] + "..."
if t.MaxLength > 0 {
fieldString = utils.ShortenString(fieldString, t.MaxLength)
}
return fieldString, nil
} else if t.ChildIndex != -1 {
Expand Down
8 changes: 8 additions & 0 deletions utils/utils.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package utils

import (
"fmt"
"net/http"
)

Expand All @@ -21,3 +22,10 @@ func FetchUrl(url string, userAgent string) (*http.Response, error) {
req.Header.Set("Accept", "*/*")
return client.Do(req)
}

func ShortenString(s string, l int) string {
if len(s) > l {
return fmt.Sprintf("%s...", s[:l-3])
}
return s
}

0 comments on commit a07aa37

Please sign in to comment.