Skip to content

Commit

Permalink
extractors/pixivision: Add support
Browse files Browse the repository at this point in the history
  • Loading branch information
iawia002 committed Mar 6, 2018
1 parent f3822ed commit b750e9b
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 29 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ Site | URL | Videos | Images
抖音 | <https://www.douyin.com> | ✓ | |
哔哩哔哩 | <https://www.bilibili.com> | ✓ | |
半次元 | <https://bcy.net> | | ✓ |
pixivision | <https://www.pixivision.net> | | ✓ |


## Development
Expand Down
10 changes: 0 additions & 10 deletions downloader/downloader.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,8 @@ import (
"fmt"
"io"
"log"
"net/url"
"os"
"os/exec"
"strings"
"sync"
"time"

Expand Down Expand Up @@ -37,14 +35,6 @@ type VideoData struct {
Type string
}

// GetNameAndExt return the name and ext of the URL
func (data URLData) GetNameAndExt() (string, string) {
u, _ := url.ParseRequestURI(data.URL)
s := strings.Split(u.Path, "/")
filename := strings.Split(s[len(s)-1], ".")
return filename[0], filename[1]
}

func (data VideoData) printInfo() {
fmt.Println()
fmt.Println(" Site: ", data.Site)
Expand Down
24 changes: 5 additions & 19 deletions extractors/bcy.go
Original file line number Diff line number Diff line change
@@ -1,36 +1,22 @@
package extractors

import (
"log"
"strings"

"github.com/PuerkitoBio/goquery"

"github.com/iawia002/annie/downloader"
"github.com/iawia002/annie/parser"
"github.com/iawia002/annie/request"
"github.com/iawia002/annie/utils"
)

// Bcy download function
func Bcy(url string) downloader.VideoData {
html := request.Get(url)
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
if err != nil {
log.Fatal(err)
}
title := strings.TrimSpace(doc.Find("h1").First().Text())
urls := []downloader.URLData{}
urlData := downloader.URLData{}
doc.Find("img[class=\"detail_std detail_clickable\"]").Each(
func(i int, s *goquery.Selection) {
urlData.URL, _ = s.Attr("src")
title, urls := parser.GetImages(
url, html, "detail_std detail_clickable", func(u string) string {
// https://img9.bcyimg.com/drawer/15294/post/1799t/1f5a87801a0711e898b12b640777720f.jpg/w650
urlData.URL = urlData.URL[:len(urlData.URL)-5]
urlData.Size = request.Size(urlData.URL, url)
_, urlData.Ext = urlData.GetNameAndExt()
urls = append(urls, urlData)
return u[:len(u)-5]
},
)

data := downloader.VideoData{
Site: "半次元 bcy.net",
Title: utils.FileName(title),
Expand Down
23 changes: 23 additions & 0 deletions extractors/pixivision.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
package extractors

import (
"github.com/iawia002/annie/downloader"
"github.com/iawia002/annie/parser"
"github.com/iawia002/annie/request"
"github.com/iawia002/annie/utils"
)

// Pixivision download function
func Pixivision(url string) downloader.VideoData {
html := request.Get(url)
title, urls := parser.GetImages(url, html, "am__work__illust ", nil)
data := downloader.VideoData{
Site: "pixivision pixivision.net",
Title: utils.FileName(title),
Type: "image",
URLs: urls,
Size: 0,
}
data.Download(url)
return data
}
2 changes: 2 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ func main() {
extractors.Bilibili(videoURL)
case "bcy":
extractors.Bcy(videoURL)
case "pixivision":
extractors.Pixivision(videoURL)
default:
fmt.Println("unsupported URL")
}
Expand Down
44 changes: 44 additions & 0 deletions parser/parser.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
package parser

import (
"fmt"
"log"
"strings"

"github.com/PuerkitoBio/goquery"

"github.com/iawia002/annie/downloader"
"github.com/iawia002/annie/request"
"github.com/iawia002/annie/utils"
)

// GetDoc return Document object of the HTML string
func GetDoc(html string) *goquery.Document {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(html))
if err != nil {
log.Fatal(err)
}
return doc
}

// GetImages find the img with a given class name
func GetImages(
url, html, imgClass string, urlHandler func(string) string,
) (string, []downloader.URLData) {
doc := GetDoc(html)
title := strings.TrimSpace(doc.Find("h1").First().Text())
urls := []downloader.URLData{}
urlData := downloader.URLData{}
doc.Find(fmt.Sprintf("img[class=\"%s\"]", imgClass)).Each(
func(i int, s *goquery.Selection) {
urlData.URL, _ = s.Attr("src")
if urlHandler != nil {
urlData.URL = urlHandler(urlData.URL)
}
urlData.Size = request.Size(urlData.URL, url)
_, urlData.Ext = utils.GetNameAndExt(urlData.URL)
urls = append(urls, urlData)
},
)
return title, urls
}
9 changes: 9 additions & 0 deletions utils/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package utils

import (
"fmt"
"net/url"
"os"
"regexp"
"strings"
Expand Down Expand Up @@ -59,3 +60,11 @@ func StringInSlice(str string, list []string) bool {
}
return false
}

// GetNameAndExt return the name and ext of the URL
func GetNameAndExt(uri string) (string, string) {
u, _ := url.ParseRequestURI(uri)
s := strings.Split(u.Path, "/")
filename := strings.Split(s[len(s)-1], ".")
return filename[0], filename[1]
}

0 comments on commit b750e9b

Please sign in to comment.