Skip to content

Commit

Permalink
[foolfuuka] improve 'board' pattern & support pages (#5408)
Browse files Browse the repository at this point in the history
  • Loading branch information
mikf committed Apr 1, 2024
1 parent 0c17884 commit 64948f2
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 10 deletions.
24 changes: 14 additions & 10 deletions gallery_dl/extractor/foolfuuka.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,8 @@ class FoolfuukaThreadExtractor(FoolfuukaExtractor):

def __init__(self, match):
FoolfuukaExtractor.__init__(self, match)
self.board = match.group(match.lastindex-1)
self.thread = match.group(match.lastindex)
self.board = self.groups[-2]
self.thread = self.groups[-1]
self.data = None

def metadata(self):
Expand All @@ -140,20 +140,22 @@ def posts(self):
class FoolfuukaBoardExtractor(FoolfuukaExtractor):
"""Base extractor for FoolFuuka based boards/archives"""
subcategory = "board"
pattern = BASE_PATTERN + r"/([^/?#]+)/\d*$"
pattern = BASE_PATTERN + r"/([^/?#]+)(?:/(?:page/)?(\d*))?$"
example = "https://archived.moe/a/"

def __init__(self, match):
FoolfuukaExtractor.__init__(self, match)
self.board = match.group(match.lastindex)
self.board = self.groups[-2]
self.page = self.groups[-1]

def items(self):
index_base = "{}/_/api/chan/index/?board={}&page=".format(
self.root, self.board)
thread_base = "{}/{}/thread/".format(self.root, self.board)

for page in itertools.count(1):
with self.request(index_base + format(page)) as response:
page = self.page
for pnum in itertools.count(text.parse_int(page, 1)):
with self.request(index_base + format(pnum)) as response:
try:
threads = response.json()
except ValueError:
Expand All @@ -167,6 +169,9 @@ def items(self):
thread["_extractor"] = FoolfuukaThreadExtractor
yield Message.Queue, thread["url"], thread

if page:
return


class FoolfuukaSearchExtractor(FoolfuukaExtractor):
"""Base extractor for search results on FoolFuuka based boards/archives"""
Expand All @@ -179,17 +184,16 @@ class FoolfuukaSearchExtractor(FoolfuukaExtractor):
def __init__(self, match):
FoolfuukaExtractor.__init__(self, match)
self.params = params = {}
args = match.group(match.lastindex).split("/")
key = None

for arg in args:
key = None
for arg in self.groups[-1].split("/"):
if key:
params[key] = text.unescape(arg)
key = None
else:
key = arg

board = match.group(match.lastindex-1)
board = self.groups[-2]
if board != "_":
params["boards"] = board

Expand Down
20 changes: 20 additions & 0 deletions test/results/desuarchive.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,32 @@
"#sha1_url": "e7d624aded15a069194e38dc731ec23217a422fb",
},

{
"#url" : "https://desuarchive.org/a",
"#category": ("foolfuuka", "desuarchive", "board"),
"#class" : foolfuuka.FoolfuukaBoardExtractor,
},

{
"#url" : "https://desuarchive.org/a/",
"#category": ("foolfuuka", "desuarchive", "board"),
"#class" : foolfuuka.FoolfuukaBoardExtractor,
},

{
"#url" : "https://desuarchive.org/a/2",
"#category": ("foolfuuka", "desuarchive", "board"),
"#class" : foolfuuka.FoolfuukaBoardExtractor,
},

{
"#url" : "https://desuarchive.org/a/page/2",
"#category": ("foolfuuka", "desuarchive", "board"),
"#class" : foolfuuka.FoolfuukaBoardExtractor,
"#pattern" : foolfuuka.FoolfuukaThreadExtractor.pattern,
"#count" : 10,
},

{
"#url" : "https://desuarchive.org/_/search/text/test/",
"#category": ("foolfuuka", "desuarchive", "search"),
Expand Down

0 comments on commit 64948f2

Please sign in to comment.