From 36547ffa3e9b9db2a8822762fa2ac4bc066b741c Mon Sep 17 00:00:00 2001 From: sokomishalov Date: Wed, 2 Jun 2021 13:09:16 +0300 Subject: [PATCH] pikabu pagination implemented --- .../skraper/provider/pikabu/PikabuSkraper.kt | 53 ++++++++++--------- 1 file changed, 29 insertions(+), 24 deletions(-) diff --git a/skrapers/src/main/kotlin/ru/sokomishalov/skraper/provider/pikabu/PikabuSkraper.kt b/skrapers/src/main/kotlin/ru/sokomishalov/skraper/provider/pikabu/PikabuSkraper.kt index 128aa780..5bf1114b 100644 --- a/skrapers/src/main/kotlin/ru/sokomishalov/skraper/provider/pikabu/PikabuSkraper.kt +++ b/skrapers/src/main/kotlin/ru/sokomishalov/skraper/provider/pikabu/PikabuSkraper.kt @@ -42,33 +42,38 @@ open class PikabuSkraper @JvmOverloads constructor( ) : Skraper { override fun getPosts(path: String): Flow = flow { - val page = getPage(path = path) + var page = 0 + while (true) { + val document = getPage(path = path, page = ++page) - val rawPosts = page - ?.getElementsByTag("article") - .orEmpty() + val rawPosts = document + ?.getElementsByTag("article") + .orEmpty() - emitBatch(rawPosts) { - val storyBlocks = getElementsByClass("story-block") + if (rawPosts.isEmpty()) break; - val title = extractPostTitle() - val text = storyBlocks.parseText() + emitBatch(rawPosts) { + val storyBlocks = getElementsByClass("story-block") - val caption = when { - text.isBlank() -> title - else -> "${title}\n\n${text}" - } + val title = extractPostTitle() + val text = storyBlocks.parseText() + + val caption = when { + text.isBlank() -> title + else -> "${title}\n\n${text}" + } - Post( - id = extractPostId(), - text = String(caption.toByteArray(UTF_8)), - publishedAt = extractPostPublishDate(), - statistics = PostStatistics( - likes = extractPostLikes(), - comments = extractPostCommentsCount(), - ), - media = storyBlocks.extractPostMediaItems() - ) + Post( + id = extractPostId(), + text = String(caption.toByteArray(UTF_8)), + publishedAt = extractPostPublishDate(), + statistics = PostStatistics( + likes = extractPostLikes(), + comments = extractPostCommentsCount(), + ), + media = storyBlocks.extractPostMediaItems() + ) + } } } @@ -118,9 +123,9 @@ open class PikabuSkraper @JvmOverloads constructor( } } - private suspend fun getPage(path: String): Document? { + private suspend fun getPage(path: String, page: Int = 1): Document? { return client.fetchDocument( - request = HttpRequest(url = baseUrl.buildFullURL(path = path)), + request = HttpRequest(url = baseUrl.buildFullURL(path = path, queryParams = mapOf("page" to page))), charset = Charset.forName("windows-1251") ) }