From e44d1f6118c4c9c73d3e9510a14e137edbc778fd Mon Sep 17 00:00:00 2001 From: sokomishalov Date: Thu, 26 Mar 2020 14:39:45 +0300 Subject: [PATCH] new clients --- .../ru/sokomishalov/skraper/SkraperClient.kt | 9 ++- .../skraper/SkraperClientExtensions.kt | 34 +++++--- .../skraper/client/HttpMethodType.kt | 32 ++++++++ .../jdk/DefaultBlockingSkraperClient.kt | 14 +++- .../skraper/client/ktor/KtorSkraperClient.kt | 34 ++++++-- .../client/okhttp3/OkHttp3SkraperClient.kt | 49 +++++++----- .../reactornetty/ReactorNettySkraperClient.kt | 22 +++++- .../spring/SpringReactiveSkraperClient.kt | 14 +++- .../skraper/internal/net/UrlExtensions.kt | 27 +++++-- .../serialization/JacksonExtensions.kt | 77 ++++++++++--------- .../facebook/FacebookSkraperExtensions.kt | 2 +- .../provider/instagram/InstagramSkraper.kt | 11 +-- .../skraper/client/SkraperClientTck.kt | 39 +++++++--- .../skraper/provider/SkraperTck.kt | 9 ++- 14 files changed, 267 insertions(+), 106 deletions(-) create mode 100644 skrapers/src/main/kotlin/ru/sokomishalov/skraper/client/HttpMethodType.kt diff --git a/skrapers/src/main/kotlin/ru/sokomishalov/skraper/SkraperClient.kt b/skrapers/src/main/kotlin/ru/sokomishalov/skraper/SkraperClient.kt index 82f3666c..b9f5fe0e 100644 --- a/skrapers/src/main/kotlin/ru/sokomishalov/skraper/SkraperClient.kt +++ b/skrapers/src/main/kotlin/ru/sokomishalov/skraper/SkraperClient.kt @@ -15,6 +15,8 @@ */ package ru.sokomishalov.skraper +import ru.sokomishalov.skraper.client.HttpMethodType +import ru.sokomishalov.skraper.client.HttpMethodType.GET import ru.sokomishalov.skraper.model.URLString /** @@ -22,6 +24,11 @@ import ru.sokomishalov.skraper.model.URLString */ interface SkraperClient { - suspend fun fetch(url: URLString, headers: Map = emptyMap()): ByteArray? + suspend fun fetch( + url: URLString, + method: HttpMethodType = GET, + headers: Map = emptyMap(), + body: ByteArray? = null + ): ByteArray? } \ No newline at end of file diff --git a/skrapers/src/main/kotlin/ru/sokomishalov/skraper/SkraperClientExtensions.kt b/skrapers/src/main/kotlin/ru/sokomishalov/skraper/SkraperClientExtensions.kt index a018c36f..5182f94d 100644 --- a/skrapers/src/main/kotlin/ru/sokomishalov/skraper/SkraperClientExtensions.kt +++ b/skrapers/src/main/kotlin/ru/sokomishalov/skraper/SkraperClientExtensions.kt @@ -18,6 +18,8 @@ package ru.sokomishalov.skraper import com.fasterxml.jackson.databind.JsonNode import org.jsoup.Jsoup import org.jsoup.nodes.Document +import ru.sokomishalov.skraper.client.HttpMethodType +import ru.sokomishalov.skraper.client.HttpMethodType.GET import ru.sokomishalov.skraper.internal.serialization.readJsonNodes import ru.sokomishalov.skraper.model.URLString import java.nio.charset.Charset @@ -28,24 +30,36 @@ import kotlin.text.Charsets.UTF_8 * @author sokomishalov */ -suspend fun SkraperClient.fetchBytes(url: URLString, headers: Map = emptyMap()): ByteArray? { +suspend fun SkraperClient.fetchBytes( + url: URLString, + method: HttpMethodType = GET, + headers: Map = emptyMap(), + body: ByteArray? = null +): ByteArray? { return runCatching { - fetch(url = url, headers = headers) + fetch(url, method, headers, body) }.getOrNull() } -suspend fun SkraperClient.fetchJson(url: URLString, headers: Map = emptyMap()): JsonNode? { +suspend fun SkraperClient.fetchJson( + url: URLString, + method: HttpMethodType = GET, + headers: Map = emptyMap(), + body: ByteArray? = null +): JsonNode? { return runCatching { - fetch(url = url, headers = headers)?.run { - readJsonNodes() - } + fetch(url, method, headers, body)?.run { readJsonNodes() } }.getOrNull() } -suspend fun SkraperClient.fetchDocument(url: URLString, headers: Map = emptyMap(), charset: Charset = UTF_8): Document? { +suspend fun SkraperClient.fetchDocument( + url: URLString, + method: HttpMethodType = GET, + headers: Map = emptyMap(), + body: ByteArray? = null, + charset: Charset = UTF_8 +): Document? { return runCatching { - fetch(url = url, headers = headers)?.run { - Jsoup.parse(toString(charset)) - } + fetch(url, method, headers, body)?.run { Jsoup.parse(toString(charset)) } }.getOrNull() } \ No newline at end of file diff --git a/skrapers/src/main/kotlin/ru/sokomishalov/skraper/client/HttpMethodType.kt b/skrapers/src/main/kotlin/ru/sokomishalov/skraper/client/HttpMethodType.kt new file mode 100644 index 00000000..956f7089 --- /dev/null +++ b/skrapers/src/main/kotlin/ru/sokomishalov/skraper/client/HttpMethodType.kt @@ -0,0 +1,32 @@ +/** + * Copyright (c) 2019-present Mikhael Sokolov + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +@file:Suppress("unused") + +package ru.sokomishalov.skraper.client + +/** + * @author sokomishalov + */ +enum class HttpMethodType { + GET, + HEAD, + POST, + PUT, + PATCH, + DELETE, + OPTIONS, + TRACE +} \ No newline at end of file diff --git a/skrapers/src/main/kotlin/ru/sokomishalov/skraper/client/jdk/DefaultBlockingSkraperClient.kt b/skrapers/src/main/kotlin/ru/sokomishalov/skraper/client/jdk/DefaultBlockingSkraperClient.kt index 516530a9..dd4cb752 100644 --- a/skrapers/src/main/kotlin/ru/sokomishalov/skraper/client/jdk/DefaultBlockingSkraperClient.kt +++ b/skrapers/src/main/kotlin/ru/sokomishalov/skraper/client/jdk/DefaultBlockingSkraperClient.kt @@ -15,9 +15,10 @@ */ package ru.sokomishalov.skraper.client.jdk -import kotlinx.coroutines.Dispatchers +import kotlinx.coroutines.Dispatchers.IO import kotlinx.coroutines.withContext import ru.sokomishalov.skraper.SkraperClient +import ru.sokomishalov.skraper.client.HttpMethodType import ru.sokomishalov.skraper.internal.net.openStreamForRedirectable import ru.sokomishalov.skraper.model.URLString import java.net.URL @@ -29,9 +30,14 @@ import java.net.URL */ object DefaultBlockingSkraperClient : SkraperClient { - override suspend fun fetch(url: URLString, headers: Map): ByteArray? { - return withContext(Dispatchers.IO) { - URL(url).openStreamForRedirectable(headers = headers) + override suspend fun fetch( + url: URLString, + method: HttpMethodType, + headers: Map, + body: ByteArray? + ): ByteArray? { + return withContext(IO) { + URL(url).openStreamForRedirectable(method, headers, body) }.use { it.readBytes() } diff --git a/skrapers/src/main/kotlin/ru/sokomishalov/skraper/client/ktor/KtorSkraperClient.kt b/skrapers/src/main/kotlin/ru/sokomishalov/skraper/client/ktor/KtorSkraperClient.kt index 69101589..e72f5f85 100644 --- a/skrapers/src/main/kotlin/ru/sokomishalov/skraper/client/ktor/KtorSkraperClient.kt +++ b/skrapers/src/main/kotlin/ru/sokomishalov/skraper/client/ktor/KtorSkraperClient.kt @@ -16,23 +16,47 @@ package ru.sokomishalov.skraper.client.ktor import io.ktor.client.HttpClient -import io.ktor.client.request.get import io.ktor.client.request.header +import io.ktor.client.request.request +import io.ktor.content.ByteArrayContent +import io.ktor.http.ContentType +import io.ktor.http.HttpHeaders.UnsafeHeadersList +import io.ktor.http.HttpMethod +import io.ktor.http.takeFrom import ru.sokomishalov.skraper.SkraperClient +import ru.sokomishalov.skraper.client.HttpMethodType import ru.sokomishalov.skraper.model.URLString class KtorSkraperClient( private val client: HttpClient = DEFAULT_CLIENT ) : SkraperClient { - override suspend fun fetch(url: URLString, headers: Map): ByteArray? { - return client.get(url) { - headers.forEach { (k, v) -> header(k, v) } + override suspend fun fetch( + url: URLString, + method: HttpMethodType, + headers: Map, + body: ByteArray? + ): ByteArray? { + return client.request { + this.url.takeFrom(url) + this.method = HttpMethod.parse(method.name) + headers + .filterKeys { it !in UnsafeHeadersList } + .forEach { (k, v) -> + header(k, v) + } + body?.let { + this.body = ByteArrayContent( + bytes = it, + contentType = headers["Content-Type"]?.let { t -> ContentType.parse(t) } + ) + } } } companion object { - val DEFAULT_CLIENT = HttpClient { + @JvmStatic + val DEFAULT_CLIENT: HttpClient = HttpClient { followRedirects = true } } diff --git a/skrapers/src/main/kotlin/ru/sokomishalov/skraper/client/okhttp3/OkHttp3SkraperClient.kt b/skrapers/src/main/kotlin/ru/sokomishalov/skraper/client/okhttp3/OkHttp3SkraperClient.kt index 1d0c47f4..d3c2e97e 100644 --- a/skrapers/src/main/kotlin/ru/sokomishalov/skraper/client/okhttp3/OkHttp3SkraperClient.kt +++ b/skrapers/src/main/kotlin/ru/sokomishalov/skraper/client/okhttp3/OkHttp3SkraperClient.kt @@ -18,11 +18,15 @@ package ru.sokomishalov.skraper.client.okhttp3 import kotlinx.coroutines.ExperimentalCoroutinesApi import kotlinx.coroutines.suspendCancellableCoroutine import okhttp3.* +import okhttp3.MediaType.Companion.toMediaType +import okio.BufferedSink import ru.sokomishalov.skraper.SkraperClient +import ru.sokomishalov.skraper.client.HttpMethodType import ru.sokomishalov.skraper.model.URLString import java.io.IOException import kotlin.coroutines.resumeWithException + /** * Huge appreciation to my russian colleague * @see link @@ -34,11 +38,17 @@ class OkHttp3SkraperClient( ) : SkraperClient { @Suppress("BlockingMethodInNonBlockingContext") - override suspend fun fetch(url: URLString, headers: Map): ByteArray? { + override suspend fun fetch( + url: URLString, + method: HttpMethodType, + headers: Map, + body: ByteArray? + ): ByteArray? { val request = Request .Builder() .url(url) .headers(Headers.headersOf(*(headers.flatMap { listOf(it.key, it.value) }.toTypedArray()))) + .method(method = method.name, body = body?.createRequest(contentType = headers["Content-Type"])) .build() return client @@ -48,31 +58,34 @@ class OkHttp3SkraperClient( ?.bytes() } - companion object { - val DEFAULT_CLIENT: OkHttpClient = OkHttpClient - .Builder() - .followRedirects(true) - .followSslRedirects(true) - .build() - } - @OptIn(ExperimentalCoroutinesApi::class) private suspend fun Call.await(): Response { return suspendCancellableCoroutine { continuation -> enqueue(object : Callback { - override fun onResponse(call: Call, response: Response) { - continuation.resume(response) {} - } - - override fun onFailure(call: Call, e: IOException) { - if (continuation.isCancelled) return - continuation.resumeWithException(e) - } + override fun onResponse(call: Call, response: Response) = continuation.resume(response) { Unit } + override fun onFailure(call: Call, e: IOException) = if (continuation.isCancelled.not()) continuation.resumeWithException(e) else Unit }) continuation.invokeOnCancellation { - runCatching { cancel() } + runCatching { cancel() }.getOrNull() } } } + + private fun ByteArray.createRequest(contentType: String?): RequestBody? { + return object : RequestBody() { + override fun contentType(): MediaType? = contentType?.toMediaType() + override fun contentLength(): Long = this@createRequest.size.toLong() + override fun writeTo(sink: BufferedSink) = sink.write(this@createRequest).run { Unit } + } + } + + companion object { + @JvmStatic + val DEFAULT_CLIENT: OkHttpClient = OkHttpClient + .Builder() + .followRedirects(true) + .followSslRedirects(true) + .build() + } } \ No newline at end of file diff --git a/skrapers/src/main/kotlin/ru/sokomishalov/skraper/client/reactornetty/ReactorNettySkraperClient.kt b/skrapers/src/main/kotlin/ru/sokomishalov/skraper/client/reactornetty/ReactorNettySkraperClient.kt index 316006f1..868ba552 100644 --- a/skrapers/src/main/kotlin/ru/sokomishalov/skraper/client/reactornetty/ReactorNettySkraperClient.kt +++ b/skrapers/src/main/kotlin/ru/sokomishalov/skraper/client/reactornetty/ReactorNettySkraperClient.kt @@ -13,14 +13,22 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +@file:Suppress("ReactorUnusedPublisher") + package ru.sokomishalov.skraper.client.reactornetty +import io.netty.handler.codec.http.HttpMethod import io.netty.handler.ssl.SslContextBuilder import io.netty.handler.ssl.util.InsecureTrustManagerFactory import kotlinx.coroutines.reactive.awaitFirstOrNull +import reactor.core.publisher.Mono +import reactor.netty.ByteBufFlux +import reactor.netty.ByteBufMono import reactor.netty.http.client.HttpClient import ru.sokomishalov.skraper.SkraperClient +import ru.sokomishalov.skraper.client.HttpMethodType import ru.sokomishalov.skraper.model.URLString +import kotlin.text.Charsets.UTF_8 /** * @author sokomishalov @@ -29,16 +37,26 @@ class ReactorNettySkraperClient( private val client: HttpClient = DEFAULT_CLIENT ) : SkraperClient { - override suspend fun fetch(url: URLString, headers: Map): ByteArray? { + override suspend fun fetch( + url: URLString, + method: HttpMethodType, + headers: Map, + body: ByteArray? + ): ByteArray? { return client .headers { headers.forEach { (k, v) -> it[k] = v } } - .get() + .request(HttpMethod.valueOf(method.name)) .uri(url) + .send(when (body) { + null -> ByteBufMono.empty() + else -> ByteBufFlux.fromString(Mono.just(body.toString(UTF_8))) + }) .responseSingle { _, u -> u.asByteArray() } .awaitFirstOrNull() } companion object { + @JvmStatic val DEFAULT_CLIENT: HttpClient = HttpClient .create() .followRedirect(true) diff --git a/skrapers/src/main/kotlin/ru/sokomishalov/skraper/client/spring/SpringReactiveSkraperClient.kt b/skrapers/src/main/kotlin/ru/sokomishalov/skraper/client/spring/SpringReactiveSkraperClient.kt index e845969a..e51bc0e2 100644 --- a/skrapers/src/main/kotlin/ru/sokomishalov/skraper/client/spring/SpringReactiveSkraperClient.kt +++ b/skrapers/src/main/kotlin/ru/sokomishalov/skraper/client/spring/SpringReactiveSkraperClient.kt @@ -17,6 +17,8 @@ package ru.sokomishalov.skraper.client.spring import io.netty.handler.ssl.SslContextBuilder import io.netty.handler.ssl.util.InsecureTrustManagerFactory +import org.springframework.http.HttpMethod +import org.springframework.http.HttpMethod.GET import org.springframework.http.client.reactive.ReactorClientHttpConnector import org.springframework.web.reactive.function.client.ExchangeStrategies import org.springframework.web.reactive.function.client.WebClient @@ -24,6 +26,7 @@ import org.springframework.web.reactive.function.client.awaitBodyOrNull import org.springframework.web.reactive.function.client.awaitExchange import reactor.netty.http.client.HttpClient import ru.sokomishalov.skraper.SkraperClient +import ru.sokomishalov.skraper.client.HttpMethodType import ru.sokomishalov.skraper.model.URLString /** @@ -33,16 +36,23 @@ class SpringReactiveSkraperClient( private val webClient: WebClient = DEFAULT_CLIENT ) : SkraperClient { - override suspend fun fetch(url: URLString, headers: Map): ByteArray? { + override suspend fun fetch( + url: URLString, + method: HttpMethodType, + headers: Map, + body: ByteArray? + ): ByteArray? { return webClient - .get() + .method(HttpMethod.resolve(method.name) ?: GET) .uri(url) .headers { headers.forEach { (k, v) -> it[k] = v } } + .apply { body?.let { bodyValue(it) } } .awaitExchange() .awaitBodyOrNull() } companion object { + @JvmStatic val DEFAULT_CLIENT: WebClient = WebClient .builder() .clientConnector( diff --git a/skrapers/src/main/kotlin/ru/sokomishalov/skraper/internal/net/UrlExtensions.kt b/skrapers/src/main/kotlin/ru/sokomishalov/skraper/internal/net/UrlExtensions.kt index 0d8d0729..51649061 100644 --- a/skrapers/src/main/kotlin/ru/sokomishalov/skraper/internal/net/UrlExtensions.kt +++ b/skrapers/src/main/kotlin/ru/sokomishalov/skraper/internal/net/UrlExtensions.kt @@ -17,21 +17,29 @@ package ru.sokomishalov.skraper.internal.net import kotlinx.coroutines.Dispatchers.IO import kotlinx.coroutines.withContext +import ru.sokomishalov.skraper.client.HttpMethodType +import ru.sokomishalov.skraper.client.HttpMethodType.GET +import java.io.DataOutputStream import java.io.InputStream import java.net.HttpURLConnection import java.net.HttpURLConnection.* import java.net.URL + /** * @author sokomishalov */ @PublishedApi -internal suspend fun URL.openStreamForRedirectable(headers: Map = emptyMap()): InputStream { +internal suspend fun URL.openStreamForRedirectable( + method: HttpMethodType = GET, + headers: Map = emptyMap(), + body: ByteArray? = null +): InputStream { return withContext(IO) { val conn = openConnection() as HttpURLConnection - conn.applyDefaultHeaders(headers = headers) + conn.applyData(method, headers, body) val status = conn.responseCode @@ -40,7 +48,7 @@ internal suspend fun URL.openStreamForRedirectable(headers: Map val newConn = URL(conn.getHeaderField("Location")).openConnection() as HttpURLConnection newConn.apply { setRequestProperty("Cookie", conn.getHeaderField("Set-Cookie")) - applyDefaultHeaders(headers) + applyData(method, headers, body) } newConn.inputStream } @@ -49,8 +57,17 @@ internal suspend fun URL.openStreamForRedirectable(headers: Map } } -private fun HttpURLConnection.applyDefaultHeaders(headers: Map = emptyMap()) { +private fun HttpURLConnection.applyData( + method: HttpMethodType, + headers: Map, + body: ByteArray? +) { + requestMethod = method.name + headers.forEach { (k, v) -> addRequestProperty(k, v) } + body?.let { + doOutput = true + DataOutputStream(outputStream).use { wr -> wr.write(it) } + } connectTimeout = 5_000 readTimeout = 5_000 - headers.forEach { (k, v) -> addRequestProperty(k, v) } } \ No newline at end of file diff --git a/skrapers/src/main/kotlin/ru/sokomishalov/skraper/internal/serialization/JacksonExtensions.kt b/skrapers/src/main/kotlin/ru/sokomishalov/skraper/internal/serialization/JacksonExtensions.kt index e3f21c45..c5100c1f 100644 --- a/skrapers/src/main/kotlin/ru/sokomishalov/skraper/internal/serialization/JacksonExtensions.kt +++ b/skrapers/src/main/kotlin/ru/sokomishalov/skraper/internal/serialization/JacksonExtensions.kt @@ -18,13 +18,12 @@ package ru.sokomishalov.skraper.internal.serialization import com.fasterxml.jackson.annotation.JsonInclude.Include.NON_NULL -import com.fasterxml.jackson.core.JsonParser.Feature.ALLOW_SINGLE_QUOTES -import com.fasterxml.jackson.core.JsonParser.Feature.ALLOW_UNQUOTED_FIELD_NAMES +import com.fasterxml.jackson.core.json.JsonReadFeature.* import com.fasterxml.jackson.databind.DeserializationFeature.* import com.fasterxml.jackson.databind.JsonNode import com.fasterxml.jackson.databind.MapperFeature.ACCEPT_CASE_INSENSITIVE_ENUMS -import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.databind.SerializationFeature.* +import com.fasterxml.jackson.databind.json.JsonMapper import com.fasterxml.jackson.databind.node.MissingNode @@ -34,12 +33,12 @@ import com.fasterxml.jackson.databind.node.MissingNode @PublishedApi internal inline fun ByteArray?.readJsonNodes(): JsonNode? { - return OBJECT_MAPPER.readTree(this) + return JSON_MAPPER.readTree(this) } @PublishedApi internal inline fun String?.readJsonNodes(): JsonNode? { - return OBJECT_MAPPER.readTree(this) + return JSON_MAPPER.readTree(this) } internal fun JsonNode.getByKeyContaining(keyPart: String): JsonNode? { @@ -77,37 +76,39 @@ internal inline fun JsonNode.getDouble(path: String, delimiter: String = "."): D } @PublishedApi -internal val OBJECT_MAPPER: ObjectMapper by lazy { - ObjectMapper().apply { - enable( - ALLOW_SINGLE_QUOTES, - ALLOW_UNQUOTED_FIELD_NAMES - ) - enable( - READ_UNKNOWN_ENUM_VALUES_USING_DEFAULT_VALUE, - READ_ENUMS_USING_TO_STRING, - ACCEPT_SINGLE_VALUE_AS_ARRAY, - ACCEPT_EMPTY_STRING_AS_NULL_OBJECT, - ACCEPT_EMPTY_ARRAY_AS_NULL_OBJECT - ) - enable( - ACCEPT_CASE_INSENSITIVE_ENUMS - ) - enable( - WRITE_ENUMS_USING_TO_STRING - ) - disable( - FAIL_ON_EMPTY_BEANS, - WRITE_DATES_AS_TIMESTAMPS - ) - disable( - ADJUST_DATES_TO_CONTEXT_TIME_ZONE, - FAIL_ON_UNKNOWN_PROPERTIES, - FAIL_ON_INVALID_SUBTYPE, - FAIL_ON_IGNORED_PROPERTIES, - FAIL_ON_UNRESOLVED_OBJECT_IDS, - FAIL_ON_TRAILING_TOKENS - ) - setSerializationInclusion(NON_NULL) - } +internal val JSON_MAPPER: JsonMapper by lazy { + JsonMapper + .builder() + .enable( + ALLOW_UNESCAPED_CONTROL_CHARS, + ALLOW_SINGLE_QUOTES, + ALLOW_UNQUOTED_FIELD_NAMES + ) + .enable( + READ_UNKNOWN_ENUM_VALUES_USING_DEFAULT_VALUE, + READ_ENUMS_USING_TO_STRING, + ACCEPT_SINGLE_VALUE_AS_ARRAY, + ACCEPT_EMPTY_STRING_AS_NULL_OBJECT, + ACCEPT_EMPTY_ARRAY_AS_NULL_OBJECT + ) + .enable( + ACCEPT_CASE_INSENSITIVE_ENUMS + ) + .enable( + WRITE_ENUMS_USING_TO_STRING + ) + .disable( + FAIL_ON_EMPTY_BEANS, + WRITE_DATES_AS_TIMESTAMPS + ) + .disable( + ADJUST_DATES_TO_CONTEXT_TIME_ZONE, + FAIL_ON_UNKNOWN_PROPERTIES, + FAIL_ON_INVALID_SUBTYPE, + FAIL_ON_IGNORED_PROPERTIES, + FAIL_ON_UNRESOLVED_OBJECT_IDS, + FAIL_ON_TRAILING_TOKENS + ) + .serializationInclusion(NON_NULL) + .build() } \ No newline at end of file diff --git a/skrapers/src/main/kotlin/ru/sokomishalov/skraper/provider/facebook/FacebookSkraperExtensions.kt b/skrapers/src/main/kotlin/ru/sokomishalov/skraper/provider/facebook/FacebookSkraperExtensions.kt index 46556c6c..c869b356 100644 --- a/skrapers/src/main/kotlin/ru/sokomishalov/skraper/provider/facebook/FacebookSkraperExtensions.kt +++ b/skrapers/src/main/kotlin/ru/sokomishalov/skraper/provider/facebook/FacebookSkraperExtensions.kt @@ -33,7 +33,7 @@ suspend fun FacebookSkraper.getCommunityInfo(community: String): PageInfo? { } suspend fun FacebookSkraper.getUserPosts(username: String, limit: Int = DEFAULT_POSTS_LIMIT): List { - return getPosts(path = "/pg/${username}/posts", limit = limit) + return getPosts(path = "/${username}/posts", limit = limit) } suspend fun FacebookSkraper.getUserInfo(username: String): PageInfo? { diff --git a/skrapers/src/main/kotlin/ru/sokomishalov/skraper/provider/instagram/InstagramSkraper.kt b/skrapers/src/main/kotlin/ru/sokomishalov/skraper/provider/instagram/InstagramSkraper.kt index 1ec55804..aab7771e 100644 --- a/skrapers/src/main/kotlin/ru/sokomishalov/skraper/provider/instagram/InstagramSkraper.kt +++ b/skrapers/src/main/kotlin/ru/sokomishalov/skraper/provider/instagram/InstagramSkraper.kt @@ -31,6 +31,7 @@ import ru.sokomishalov.skraper.model.MediaSize.* */ class InstagramSkraper @JvmOverloads constructor( override val client: SkraperClient = DefaultBlockingSkraperClient, + private val apiQueryId: String = "17888483320059182", override val baseUrl: URLString = "https://instagram.com" ) : Skraper { @@ -69,7 +70,11 @@ class InstagramSkraper @JvmOverloads constructor( internal suspend fun getPostsByUserId(userId: Long?, limit: Int): List { val data = client.fetchJson(url = baseUrl.buildFullURL( path = "/graphql/query/", - queryParams = mapOf("query_id" to QUERY_ID, "id" to userId, "first" to limit) + queryParams = mapOf( + "query_id" to apiQueryId, + "id" to userId, + "first" to limit + ) )) val postsNodes = data @@ -135,8 +140,4 @@ class InstagramSkraper @JvmOverloads constructor( } ) } - - companion object { - private const val QUERY_ID = "17888483320059182" - } } \ No newline at end of file diff --git a/skrapers/src/test/kotlin/ru/sokomishalov/skraper/client/SkraperClientTck.kt b/skrapers/src/test/kotlin/ru/sokomishalov/skraper/client/SkraperClientTck.kt index 1e5a866c..fac1759d 100644 --- a/skrapers/src/test/kotlin/ru/sokomishalov/skraper/client/SkraperClientTck.kt +++ b/skrapers/src/test/kotlin/ru/sokomishalov/skraper/client/SkraperClientTck.kt @@ -18,19 +18,23 @@ package ru.sokomishalov.skraper.client import kotlinx.coroutines.runBlocking import org.junit.Test import ru.sokomishalov.skraper.SkraperClient +import ru.sokomishalov.skraper.client.HttpMethodType.POST import ru.sokomishalov.skraper.fetchBytes import ru.sokomishalov.skraper.fetchDocument import ru.sokomishalov.skraper.fetchJson +import ru.sokomishalov.skraper.internal.serialization.getString +import kotlin.test.assertEquals import kotlin.test.assertNotNull import kotlin.test.assertNull import kotlin.test.assertTrue +import kotlin.text.Charsets.UTF_8 abstract class SkraperClientTck { protected abstract val client: SkraperClient @Test - fun `Fetch byte array assertions`() = runBlocking { + fun `Fetch byte array`() = runBlocking { val bytes = client.fetch("https://www.wikipedia.org/") assertTrue { bytes != null } @@ -38,15 +42,15 @@ abstract class SkraperClientTck { } @Test - fun `Redirect to https assertion`() = runBlocking { + fun `Redirect to https`() = runBlocking { val bytes = client.fetch("http://twitter.com/") - assertTrue { bytes != null } - assertTrue { bytes!!.isNotEmpty() } + assertNotNull(bytes) + assertTrue { bytes.isNotEmpty() } } @Test - fun `Fetch document assertion`() = runBlocking { + fun `Fetch document`() = runBlocking { val document = client.fetchDocument("https://facebook.com") assertTrue { document != null } @@ -54,17 +58,28 @@ abstract class SkraperClientTck { } @Test - fun `Fetch json example`() = runBlocking { - val user = "sokomishalov" - val reposJson = client.fetchJson("https://api.github.com/users/$user/repos") + fun `Fetch complex json`() = runBlocking { + val echoJson = client.fetchJson( + url = "https://postman-echo.com/post", + method = POST, + headers = mapOf( + "foo" to "bar", + "Content-Type" to "application/json" + ), + body = """ + { + "bar": "foo" + } + """.trimIndent().toByteArray(UTF_8) + ) - assertNotNull(reposJson) - assertTrue { reposJson.isArray } - assertTrue { reposJson[0]["owner"]["login"].asText().toLowerCase() == user } + assertNotNull(echoJson) + assertEquals("bar", echoJson.getString("headers.foo")) + assertEquals("foo", echoJson.getString("data.bar")) } @Test - fun `Bad pages errors`() = runBlocking { + fun `Bad url`() = runBlocking { assertNull(client.fetchBytes("https://very-badurl.badurl")) } } \ No newline at end of file diff --git a/skrapers/src/test/kotlin/ru/sokomishalov/skraper/provider/SkraperTck.kt b/skrapers/src/test/kotlin/ru/sokomishalov/skraper/provider/SkraperTck.kt index 0158512e..7c750c6c 100644 --- a/skrapers/src/test/kotlin/ru/sokomishalov/skraper/provider/SkraperTck.kt +++ b/skrapers/src/test/kotlin/ru/sokomishalov/skraper/provider/SkraperTck.kt @@ -41,8 +41,11 @@ import kotlin.test.assertTrue abstract class SkraperTck { companion object { - private val LOGGER: Logger = LoggerFactory.getLogger(SkraperTck::class.java) - private val MAPPER: ObjectMapper = ObjectMapper().apply { + @JvmStatic + private val log: Logger = LoggerFactory.getLogger(SkraperTck::class.java) + + @JvmStatic + private val mapper: ObjectMapper = ObjectMapper().apply { registerModule(JavaTimeModule()) registerModule(Jdk8Module()) setSerializationInclusion(NON_NULL) @@ -103,7 +106,7 @@ abstract class SkraperTck { private suspend fun logAction(action: suspend Skraper.() -> T): T { return skraper.action().also { - LOGGER.info(MAPPER.writerWithDefaultPrettyPrinter().writeValueAsString(it)) + log.info(mapper.writerWithDefaultPrettyPrinter().writeValueAsString(it)) } } } \ No newline at end of file