Skip to content

Commit

Permalink
pikabu pagination implemented
Browse files Browse the repository at this point in the history
  • Loading branch information
sokomishalov committed Jun 2, 2021
1 parent 84ad94c commit 36547ff
Showing 1 changed file with 29 additions and 24 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -42,33 +42,38 @@ open class PikabuSkraper @JvmOverloads constructor(
) : Skraper {

override fun getPosts(path: String): Flow<Post> = flow {
val page = getPage(path = path)
var page = 0
while (true) {
val document = getPage(path = path, page = ++page)

val rawPosts = page
?.getElementsByTag("article")
.orEmpty()
val rawPosts = document
?.getElementsByTag("article")
.orEmpty()

emitBatch(rawPosts) {
val storyBlocks = getElementsByClass("story-block")
if (rawPosts.isEmpty()) break;

val title = extractPostTitle()
val text = storyBlocks.parseText()
emitBatch(rawPosts) {
val storyBlocks = getElementsByClass("story-block")

val caption = when {
text.isBlank() -> title
else -> "${title}\n\n${text}"
}
val title = extractPostTitle()
val text = storyBlocks.parseText()

val caption = when {
text.isBlank() -> title
else -> "${title}\n\n${text}"
}

Post(
id = extractPostId(),
text = String(caption.toByteArray(UTF_8)),
publishedAt = extractPostPublishDate(),
statistics = PostStatistics(
likes = extractPostLikes(),
comments = extractPostCommentsCount(),
),
media = storyBlocks.extractPostMediaItems()
)
Post(
id = extractPostId(),
text = String(caption.toByteArray(UTF_8)),
publishedAt = extractPostPublishDate(),
statistics = PostStatistics(
likes = extractPostLikes(),
comments = extractPostCommentsCount(),
),
media = storyBlocks.extractPostMediaItems()
)
}
}
}

Expand Down Expand Up @@ -118,9 +123,9 @@ open class PikabuSkraper @JvmOverloads constructor(
}
}

private suspend fun getPage(path: String): Document? {
private suspend fun getPage(path: String, page: Int = 1): Document? {
return client.fetchDocument(
request = HttpRequest(url = baseUrl.buildFullURL(path = path)),
request = HttpRequest(url = baseUrl.buildFullURL(path = path, queryParams = mapOf("page" to page))),
charset = Charset.forName("windows-1251")
)
}
Expand Down

0 comments on commit 36547ff

Please sign in to comment.