-
Notifications
You must be signed in to change notification settings - Fork 42
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
345e12c
commit 7cde60f
Showing
4 changed files
with
195 additions
and
0 deletions.
There are no files selected for viewing
18 changes: 18 additions & 0 deletions
18
skrapers/src/main/kotlin/ru/sokomishalov/skraper/provider/tiktok/DefaultTiktokSigner.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
package ru.sokomishalov.skraper.provider.tiktok | ||
|
||
import com.fasterxml.jackson.databind.JsonNode | ||
import ru.sokomishalov.skraper.SkraperClient | ||
import ru.sokomishalov.skraper.model.URLString | ||
|
||
|
||
/** | ||
* Default implementation for tiktok signature generator | ||
*/ | ||
object DefaultTiktokSigner : TiktokSigner { | ||
override suspend fun SkraperClient.sign(url: URLString, metadata: JsonNode?): URLString { | ||
val signature = "n8-4gAAgEBAnfAa1UkRDL5.P-ZAAMEd" // todo | ||
val verifyFp = "verify_kb1fb81s_VMXJ7vcj_SitV_4NvW_BM1h_kQs5FFRI0BSx" | ||
|
||
return "${url}&_signature=${signature}&verifyFp=${verifyFp}" | ||
} | ||
} |
141 changes: 141 additions & 0 deletions
141
skrapers/src/main/kotlin/ru/sokomishalov/skraper/provider/tiktok/TikTokSkraper.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
package ru.sokomishalov.skraper.provider.tiktok | ||
|
||
import com.fasterxml.jackson.databind.JsonNode | ||
import com.fasterxml.jackson.databind.node.ObjectNode | ||
import ru.sokomishalov.skraper.Skraper | ||
import ru.sokomishalov.skraper.SkraperClient | ||
import ru.sokomishalov.skraper.client.jdk.DefaultBlockingSkraperClient | ||
import ru.sokomishalov.skraper.fetchDocument | ||
import ru.sokomishalov.skraper.fetchJson | ||
import ru.sokomishalov.skraper.internal.number.div | ||
import ru.sokomishalov.skraper.internal.serialization.* | ||
import ru.sokomishalov.skraper.model.* | ||
import ru.sokomishalov.skraper.model.MediaSize.* | ||
import java.time.Duration | ||
|
||
|
||
class TikTokSkraper @JvmOverloads constructor( | ||
override val client: SkraperClient = DefaultBlockingSkraperClient, | ||
override val baseUrl: String = "https://tiktok.com", | ||
private val apiBaseUrl: String = "https://m.tiktok.com/api", | ||
private val signer: TiktokSigner = DefaultTiktokSigner | ||
) : Skraper { | ||
|
||
override suspend fun getPosts(path: String, limit: Int): List<Post> { | ||
val userData = getUser(path = path) | ||
|
||
val secUid = userData?.getString("secUid").orEmpty() | ||
val userId = userData?.getString("userId").orEmpty() | ||
|
||
val url = apiBaseUrl.buildFullURL( | ||
path = "/item_list", | ||
queryParams = mapOf( | ||
"secUid" to secUid, | ||
"id" to userId, | ||
"count" to limit, | ||
"minCursor" to 0, | ||
"maxCursor" to 0, | ||
"lang" to "en", | ||
"region" to "US", | ||
"appId" to "1233", | ||
"sourceType" to 8, | ||
"type" to 1 | ||
) | ||
) | ||
|
||
val signedUrl = with(signer) { client.sign(url = url, metadata = userData) } | ||
|
||
val data = client.fetchJson( | ||
url = signedUrl, | ||
headers = mapOf( | ||
"Referer" to "$baseUrl${path}", | ||
"Origin" to baseUrl, | ||
"User-Agent" to USER_AGENT | ||
) | ||
) | ||
|
||
val items = data | ||
?.getByPath("body.itemListData") | ||
?.mapNotNull { it.getByPath("itemInfos") } | ||
?.toList() | ||
.orEmpty() | ||
|
||
return items.map { item -> | ||
with(item) { | ||
Post( | ||
id = getString("id").orEmpty(), | ||
text = getString("text"), | ||
rating = getInt("diggCount"), | ||
commentsCount = getInt("commentCount"), | ||
viewsCount = getInt("playCount"), | ||
media = getByPath("video")?.run { | ||
listOf(Video( | ||
url = get("urls")?.firstOrNull()?.asText().orEmpty(), | ||
aspectRatio = getDouble("videoMeta.width") / getDouble("videoMeta.height"), | ||
duration = getLong("videoMeta.duration")?.let { sec -> Duration.ofSeconds(sec) } | ||
)) | ||
}.orEmpty() | ||
) | ||
} | ||
} | ||
} | ||
|
||
override suspend fun resolve(media: Media): Media { | ||
return media | ||
} | ||
|
||
override suspend fun getPageInfo(path: String): PageInfo? { | ||
val user = getUser(path = path) | ||
|
||
return user?.run { | ||
PageInfo( | ||
nick = getString("uniqueId").orEmpty(), | ||
name = getString("nickName"), | ||
description = getString("signature"), | ||
followersCount = getInt("fans"), | ||
avatarsMap = mapOf( | ||
SMALL to user.getFirstAvatar("covers").toImage(), | ||
MEDIUM to user.getFirstAvatar("coversMedium", "covers").toImage(), | ||
LARGE to user.getFirstAvatar("coversLarge", "coversMedium", "covers").toImage() | ||
) | ||
) | ||
} | ||
} | ||
|
||
|
||
private suspend fun getUser(path: String): JsonNode? { | ||
val document = client.fetchDocument(url = "${baseUrl}${path}") | ||
|
||
val json = document | ||
?.getElementById("__NEXT_DATA__") | ||
?.html() | ||
?.readJsonNodes() | ||
|
||
return json?.getByPath("props.pageProps.userData").apply { | ||
|
||
val tac = document | ||
?.getElementsByTag("script") | ||
?.firstOrNull { it.html().startsWith("tac=") } | ||
?.html() | ||
?.removeSurrounding("tac='", "'") | ||
|
||
tac?.let { (this as? ObjectNode)?.put("tac", it) } | ||
} | ||
} | ||
|
||
private fun JsonNode?.getFirstAvatar(vararg names: String): String { | ||
return names | ||
.mapNotNull { | ||
this | ||
?.get(it) | ||
?.firstOrNull() | ||
?.asText() | ||
} | ||
.firstOrNull() | ||
.orEmpty() | ||
} | ||
|
||
companion object { | ||
private const val USER_AGENT = "Mozilla/5.0 (iPhone; CPU iPhone OS 11_0 like Mac OS X) AppleWebKit/604.1.38 (KHTML, like Gecko) Version/11.0 Mobile/15A372 Safari/604.1" | ||
} | ||
} |
27 changes: 27 additions & 0 deletions
27
skrapers/src/main/kotlin/ru/sokomishalov/skraper/provider/tiktok/TiktokSigner.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
package ru.sokomishalov.skraper.provider.tiktok | ||
|
||
import com.fasterxml.jackson.databind.JsonNode | ||
import ru.sokomishalov.skraper.SkraperClient | ||
import ru.sokomishalov.skraper.model.URLString | ||
|
||
|
||
/** | ||
* Represents tiktok signature generating algorithm. | ||
* Tiktok changes this algorithm so often and that's why this interface exposed to the public library API. | ||
* In case of it changes again, you can reimplement (or modify) it by yourself. | ||
* | ||
* Current js-implementation looks like: @see [signature.js](https://github.com/drawrowfly/tiktok-scraper/blob/master/lib/helpers/signature.js) | ||
* | ||
* The easy way to not break up your head is to write simple NodeJS web-app which will generate a signature | ||
* and to write own JVM-implementation which will request it by HTTP. | ||
*/ | ||
interface TiktokSigner { | ||
|
||
/** | ||
* @param url request url | ||
* @param metadata user/trend info | ||
* @return url | ||
*/ | ||
suspend fun SkraperClient.sign(url: URLString, metadata: JsonNode?): URLString | ||
|
||
} |
9 changes: 9 additions & 0 deletions
9
skrapers/src/test/kotlin/ru/sokomishalov/skraper/provider/tiktok/TikTokSkraperTest.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
package ru.sokomishalov.skraper.provider.tiktok | ||
|
||
import ru.sokomishalov.skraper.Skraper | ||
import ru.sokomishalov.skraper.provider.SkraperTck | ||
|
||
class TikTokSkraperTest : SkraperTck() { | ||
override val skraper: Skraper = TikTokSkraper(client = client) | ||
override val path: String = "/@meme" | ||
} |