Skip to content

Commit

Permalink
feat: use okhttp caching http client fetch rss feed.
Browse files Browse the repository at this point in the history
avoid rss article updates repeated.
  • Loading branch information
lcomplete committed Aug 29, 2024
1 parent c876a5b commit 366e682
Show file tree
Hide file tree
Showing 8 changed files with 111 additions and 40 deletions.
3 changes: 2 additions & 1 deletion app/server/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -110,4 +110,5 @@ fabric.properties
# Android studio 3.1+ serialized cache file
.idea/caches/build_file_checksums.ser

lucene/
lucene/
feed_cache/
5 changes: 5 additions & 0 deletions app/server/huntly-server/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,11 @@
<groupId>cn.shenyanchao.ik-analyzer</groupId>
<artifactId>ik-analyzer</artifactId>
</dependency>
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
<version>4.12.0</version>
</dependency>

<dependency>
<groupId>org.springframework.boot</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,55 +8,76 @@
import com.rometools.rome.io.FeedException;
import com.rometools.rome.io.SyndFeedInput;
import lombok.experimental.UtilityClass;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import okhttp3.Response;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;

import java.io.IOException;
import java.io.StringReader;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.nio.charset.Charset;
import java.time.Duration;

/**
* Utility methods related to feed handling
* code from <a href="https://github.com/Athou/commafeed">commafeed</a> project
*/
@UtilityClass
public class FeedUtils {
public static SyndFeed parseFeedUrl(String feedUrl, HttpClient client) {
HttpRequest request = HttpRequest.newBuilder().GET().uri(URI.create(feedUrl))
public static SyndFeed parseFeedUrl(String feedUrl, OkHttpClient client) {
Request request = new Request.Builder()
.url(feedUrl)
.build();
HttpResponse<byte[]> response = null;
try {
response = client.send(request, HttpResponse.BodyHandlers.ofByteArray());
try(Response response = client.newCall(request).execute()) {
byte[] xmlBytes = null;
if (response.body() == null) {
throw new ConnectorFetchException("xml response null for url: " + feedUrl);
}

xmlBytes = response.body().bytes();
Charset encoding = FeedUtils.guessEncoding(xmlBytes);
String xmlString = XmlUtils.removeInvalidXmlCharacters(new String(xmlBytes, encoding));
if (xmlString == null) {
throw new ConnectorFetchException("xml fetch failed for url: " + feedUrl);
}
return new SyndFeedInput().build(new StringReader(xmlString));
} catch (IOException e) {
throw new RuntimeException(e);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
var xmlBytes = response.body();
Charset encoding = FeedUtils.guessEncoding(xmlBytes);
String xmlString = XmlUtils.removeInvalidXmlCharacters(new String(xmlBytes, encoding));
if (xmlString == null) {
throw new ConnectorFetchException("xml fetch failed for url: " + feedUrl);
}

try {
SyndFeed feed = new SyndFeedInput().build(new StringReader(xmlString));
return feed;
} catch (FeedException e) {
throw new RuntimeException(e);
}
}

public static SyndFeed parseFeedUrl(String feedUrl) {
var client = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(60))
.followRedirects(HttpClient.Redirect.ALWAYS).build();
return parseFeedUrl(feedUrl, client);
}
// public static SyndFeed parseFeedUrl(String feedUrl, HttpClient client) {
// HttpRequest request = HttpRequest.newBuilder().GET().uri(URI.create(feedUrl))
// .build();
// HttpResponse<byte[]> response = null;
// try {
// response = client.send(request, HttpResponse.BodyHandlers.ofByteArray());
// } catch (IOException e) {
// throw new RuntimeException(e);
// } catch (InterruptedException e) {
// throw new RuntimeException(e);
// }
// var xmlBytes = response.body();
// Charset encoding = FeedUtils.guessEncoding(xmlBytes);
// String xmlString = XmlUtils.removeInvalidXmlCharacters(new String(xmlBytes, encoding));
// if (xmlString == null) {
// throw new ConnectorFetchException("xml fetch failed for url: " + feedUrl);
// }
//
// try {
// SyndFeed feed = new SyndFeedInput().build(new StringReader(xmlString));
// return feed;
// } catch (FeedException e) {
// throw new RuntimeException(e);
// }
// }

// public static SyndFeed parseFeedUrl(String feedUrl) {
// var client = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(60))
// .followRedirects(HttpClient.Redirect.ALWAYS).build();
// return parseFeedUrl(feedUrl, client);
// }

public static Charset guessEncoding(byte[] bytes) {
String extracted = extractDeclaredEncoding(bytes);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,14 @@
import com.huntly.server.connector.ConnectorProperties;
import com.huntly.server.connector.InfoConnector;
import com.huntly.server.domain.exceptions.ConnectorFetchException;
import com.huntly.server.util.HttpUtils;
import com.huntly.server.util.SiteUtils;
import com.rometools.rome.feed.synd.SyndCategory;
import com.rometools.rome.feed.synd.SyndContent;
import com.rometools.rome.feed.synd.SyndEntry;
import com.rometools.rome.feed.synd.SyndFeed;
import lombok.extern.slf4j.Slf4j;
import okhttp3.OkHttpClient;
import org.apache.commons.lang3.ObjectUtils;
import org.apache.commons.lang3.StringUtils;

Expand All @@ -28,18 +30,16 @@
public class RSSConnector extends InfoConnector {
private final ConnectorProperties connectorProperties;

private final OkHttpClient okClient;

private final HttpClient client;

public RSSConnector(ConnectorProperties connectorProperties) {
this.connectorProperties = connectorProperties;
this.okClient = HttpUtils.buildFeedOkHttpClient(connectorProperties.getProxySetting());
this.client = buildHttpClient(connectorProperties);
}

public RSSConnector(ConnectorProperties connectorProperties, HttpClient httpClient) {
this.connectorProperties = connectorProperties;
this.client = httpClient;
}

@Override
public List<CapturePage> fetchAllPages() {
return fetchNewestPages();
Expand All @@ -52,7 +52,7 @@ public List<CapturePage> fetchNewestPages() {
}

try {
SyndFeed feed = FeedUtils.parseFeedUrl(connectorProperties.getSubscribeUrl(), client);
SyndFeed feed = FeedUtils.parseFeedUrl(connectorProperties.getSubscribeUrl(), okClient);
var entries = feed.getEntries();
List<CapturePage> pages = new ArrayList<>();
for (var entry : entries) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
public class AppConstants {
public static final String DEFAULT_LUCENE_DIR = "lucene";

public static final String HTTP_FEED_CACHE_DIR = "feed_cache";

public static final Long HTTP_FEED_CACHE_MAXSIZE = 50L * 1024L * 1024L; // 50 MB

public static final Integer DEFAULT_FETCH_INTERVAL_SECONDS = 600;

public static final Integer DEFAULT_COLD_DATA_KEEP_DAYS = 60;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,13 @@ private void fetchPages(Connector connector) {
}
}

var savedPage = capturePageService.save(page);
Page savedPage = null;
//Avoid frequent updates of RSS articles.
if (isRssFetch && existPage != null && Objects.equals(existPage.getTitle(), page.getTitle()) && Objects.equals(existPage.getConnectedAt(), page.getConnectedAt())) {
savedPage = existPage;
} else {
savedPage = capturePageService.save(page);
}

if (isRssFetch && isExecuteFetch) {
pageArticleContentService.saveContent(savedPage.getId(), rawContent, ArticleContentCategory.RAW_CONTENT);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,10 @@ public Connector followFeed(String subscribeUrl) {
public PreviewFeedsInfo previewFeeds(String subscribeUrl) {
PreviewFeedsInfo feedsInfo = new PreviewFeedsInfo();
feedsInfo.setFeedUrl(subscribeUrl);
var httpClient = HttpUtils.buildHttpClient(globalSettingService.getProxySetting());
SyndFeed syndFeed = FeedUtils.parseFeedUrl(subscribeUrl, httpClient);
var proxySetting = globalSettingService.getProxySetting();
var httpClient = HttpUtils.buildHttpClient(proxySetting);
var feedClient = HttpUtils.buildFeedOkHttpClient(proxySetting);
SyndFeed syndFeed = FeedUtils.parseFeedUrl(subscribeUrl, feedClient);
if (syndFeed != null) {
feedsInfo.setSiteLink(syndFeed.getLink());
feedsInfo.setTitle(syndFeed.getTitle());
Expand Down
Original file line number Diff line number Diff line change
@@ -1,27 +1,59 @@
package com.huntly.server.util;

import com.huntly.server.domain.constant.AppConstants;
import com.huntly.server.domain.model.ProxySetting;
import lombok.experimental.UtilityClass;
import okhttp3.Cache;
import okhttp3.ConnectionSpec;
import okhttp3.OkHttpClient;
import org.apache.commons.lang3.StringUtils;

import javax.net.ssl.KeyManager;
import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import java.io.File;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.net.ProxySelector;
import java.net.http.HttpClient;
import java.security.KeyManagementException;
import java.security.NoSuchAlgorithmException;
import java.security.cert.CertificateException;
import java.security.cert.X509Certificate;
import java.time.Duration;
import java.util.Arrays;

/**
* @author lcomplete
*/
@UtilityClass
public class HttpUtils {
public static OkHttpClient buildFeedOkHttpClient(ProxySetting proxySetting, Integer timeoutSeconds) {
var builder = new OkHttpClient.Builder()
.cache(new Cache(
new File(AppConstants.HTTP_FEED_CACHE_DIR), AppConstants.HTTP_FEED_CACHE_MAXSIZE
))
.connectionSpecs(Arrays.asList(ConnectionSpec.MODERN_TLS, ConnectionSpec.COMPATIBLE_TLS, ConnectionSpec.CLEARTEXT))
.followRedirects(true);
if (proxySetting != null && StringUtils.isNotBlank(proxySetting.getHost())) {
builder = builder.proxy(
new Proxy(
Proxy.Type.HTTP,
new InetSocketAddress(proxySetting.getHost(), proxySetting.getPort())
)
);
}
if (timeoutSeconds != null) {
builder = builder.callTimeout(Duration.ofSeconds(timeoutSeconds));
}
return builder.build();
}

public static OkHttpClient buildFeedOkHttpClient(ProxySetting proxySetting) {
return buildFeedOkHttpClient(proxySetting, 30);
}

public static HttpClient buildHttpClient(ProxySetting proxySetting, Integer timeoutSeconds) {
// Configure SSLContext with a TrustManager that accepts any certificate
SSLContext sslContext = null;
Expand All @@ -31,7 +63,7 @@ public static HttpClient buildHttpClient(ProxySetting proxySetting, Integer time
} catch (NoSuchAlgorithmException | KeyManagementException e) {
throw new RuntimeException(e);
}

var clientBuilder = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(timeoutSeconds))
.sslContext(sslContext)
.followRedirects(HttpClient.Redirect.ALWAYS);
Expand Down

0 comments on commit 366e682

Please sign in to comment.