readcomiconline fixes

Girbons · Aug 21, 2022 · 9142efa · 9142efa
1 parent 42a880c
commit 9142efa
Show file tree

Hide file tree

Showing 4 changed files with 63 additions and 30 deletions.
diff --git a/internal/version/version.go b/internal/version/version.go
@@ -9,7 +9,7 @@ import (
 
 // Tag specifies the current release tag.
 // It needs to be manually updated.
-const Tag = "v0.33.3"
+const Tag = "v0.33.4-alpha"
 
 // IsNewAvailable will fetch the latest project releases
 // and will compare the latest release Tag against the current Tag.

diff --git a/pkg/http/client.go b/pkg/http/client.go
@@ -20,7 +20,6 @@ func NewComicClient() *ComicClient {
 func (c *ComicClient) PrepareRequest(link string) (*http.Request, error) {
 	req, err := http.NewRequest("GET", link, nil)
 	req.Header.Add("Referer", link)
-
 	return req, err
 }
 

diff --git a/pkg/sites/loader.go b/pkg/sites/loader.go
@@ -94,7 +94,7 @@ func LoadComicFromSource(options *config.Options) ([]*core.Comic, error) {
 	)
 
 	switch options.Source {
-	case "readcomiconline.li":
+	case "readcomiconline.li", "readcomicsonline.ru":
 		base = NewReadComiconline(options)
 	case "www.comicextra.com":
 		base = NewComicextra(options)

diff --git a/pkg/sites/readcomiconline.go b/pkg/sites/readcomiconline.go
@@ -12,6 +12,7 @@ import (
 )
 
 var baseUrl = "https://readcomiconline.li"
+var highQuality = "?quality=hd&readType=1"
 
 // ReadComicOnline represents a readcomiconline instance.
 type ReadComicOnline struct {
@@ -20,28 +21,63 @@ type ReadComicOnline struct {
 
 // NewReadComiconline returns a readcomiconline instance.
 func NewReadComiconline(options *config.Options) *ReadComicOnline {
-	return &ReadComicOnline{
-		options: options,
-	}
+	return &ReadComicOnline{options: options}
 }
 
 func (c *ReadComicOnline) retrieveImageLinks(comic *core.Comic) ([]string, error) {
-	var links []string
+	var (
+		links []string
+		pages []string
+	)
 
 	comic.URLSource = strings.Split(comic.URLSource, "?")[0]
 
-	response, err := soup.Get(comic.URLSource + "?quality=hd&readType=1")
+	response, err := soup.Get(comic.URLSource + highQuality)
 	if err != nil {
 		return nil, err
 	}
 
-	re := regexp.MustCompile(`push\(\"(.*?)\"\)`)
-	match := re.FindAllStringSubmatch(response, -1)
+	document := soup.HTMLParse(response)
 
-	for i := range match {
-		url := match[i][1]
-		if util.IsURLValid(url) {
-			links = append(links, url)
+	if strings.HasSuffix(c.options.Source, ".ru") {
+		results := document.Find("select", "id", "page-list")
+		// extract pages
+		for _, el := range results.FindAll("option") {
+			pages = append(pages, el.Attrs()["value"])
+		}
+
+		for _, page := range pages {
+			url := comic.URLSource
+
+			if !strings.HasSuffix(url, "/") {
+				url += "/"
+			}
+			url += page + highQuality
+
+			resp, _ := soup.Get(url)
+			inner_document := soup.HTMLParse(resp)
+
+			for _, l := range inner_document.FindAll("img") {
+				image_link := strings.Replace(l.Attrs()["src"], " ", "", -1)
+
+				if image_link != "" && strings.Contains(image_link, "chapters") {
+					links = append(links, image_link)
+				}
+			}
+
+		}
+	} else {
+		fmt.Println(response)
+		re := regexp.MustCompile(`push\(\'(.*?)\'\)`)
+		match := re.FindAllStringSubmatch(response, -1)
+
+		baseImageUrl := "https://2.bp.blogspot.com/"
+
+		for i := range match {
+			url := baseImageUrl + match[i][1]
+			if util.IsURLValid(url) {
+				links = append(links, url)
+			}
 		}
 	}
 
@@ -75,54 +111,52 @@ func (c *ReadComicOnline) retrieveLastIssue(url string) (string, error) {
 
 // RetrieveIssueLinks gets a slice of urls for all issues in a comic
 func (c *ReadComicOnline) RetrieveIssueLinks() ([]string, error) {
+	var links []string
+
 	url := c.options.URL
 
 	if c.options.Last {
 		issue, err := c.retrieveLastIssue(url)
 		return []string{issue}, err
 	}
 
-	if c.options.All && c.isSingleIssue(url) {
-		url = baseUrl + "/Comic/" + util.TrimAndSplitURL(url)[3]
-	} else if c.isSingleIssue(url) {
-		return []string{url}, nil
+	if c.options.All {
+		url = "https://" + c.options.Source + "/Comic/" + util.TrimAndSplitURL(url)[4]
 	}
 
-	name := util.TrimAndSplitURL(url)[4]
-	var (
-		pages []string
-		links []string
-	)
-
 	response, err := soup.Get(url)
 	if err != nil {
 		return nil, err
 	}
 
-	pages = append(pages, url)
+	name := util.TrimAndSplitURL(url)[4]
+
 	re := regexp.MustCompile("<a[^>]+href=\"([^\">]+" + "/" + name + "/.+)\"")
 	match := re.FindAllStringSubmatch(response, -1)
 
 	for i := range match {
 		url := match[i][1]
-		if !util.IsValueInSlice(url, pages) {
+
+		if !strings.HasPrefix(url, ".ru") {
 			url = baseUrl + strings.Split(url, "?")[0]
-			if util.IsURLValid(url) && !util.IsValueInSlice(url, links) {
-				links = append(links, url)
-			}
+		}
+
+		if util.IsURLValid(url) && !util.IsValueInSlice(url, links) {
+			links = append(links, url)
 		}
 	}
 
 	if c.options.Debug {
 		c.options.Logger.Debug(fmt.Sprintf("Issues Links retrieved: %s", strings.Join(links, " ")))
 	}
 
-	return links, err
+	return links, nil
 }
 
 // GetInfo extracts the basic info from the given url.
 func (c *ReadComicOnline) GetInfo(url string) (string, string) {
 	parts := util.TrimAndSplitURL(url)
+
 	name := parts[4]
 	issueNumber := strings.Split(strings.Replace(parts[5], "Issue-", "", -1), "?")[0]