diff --git a/.gitignore b/.gitignore
index 2e33739ec64..e3abb530fc8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,8 +3,11 @@ src/main/gen/
src/main/generated/
src-gen/
+
.lycheecache
+jcef-bundle/
+
javafx/javafx-sdk-*
javafx/javafx-jmods-*
javafx/javafx.html
diff --git a/build.gradle b/build.gradle
index 87bed09a28c..912ec1edcb3 100644
--- a/build.gradle
+++ b/build.gradle
@@ -256,7 +256,7 @@ dependencies {
implementation 'org.controlsfx:controlsfx:11.2.1'
// region HTTP clients
- implementation 'org.htmlunit:htmlunit:4.4.0' // used for web scraping
+ implementation 'me.friwi:jcefmaven:126.2.0' // used for web scraping
implementation 'org.jsoup:jsoup:1.18.1'
implementation 'com.konghq:unirest-java-core:4.4.4'
implementation 'com.konghq:unirest-modules-gson:4.4.4'
diff --git a/src/main/java/module-info.java b/src/main/java/module-info.java
index dc18d9dd3f2..34224f33dc2 100644
--- a/src/main/java/module-info.java
+++ b/src/main/java/module-info.java
@@ -90,7 +90,7 @@
requires org.glassfish.hk2.api;
// region: http clients
- requires htmlunit;
+ requires jcefmaven;
requires org.apache.httpcomponents.core5.httpcore5;
requires org.jsoup;
requires unirest.java.core;
@@ -184,5 +184,6 @@
requires mslinks;
requires org.antlr.antlr4.runtime;
requires org.libreoffice.uno;
+ requires jcef;
// endregion
}
diff --git a/src/main/java/org/jabref/logic/importer/fetcher/ACS.java b/src/main/java/org/jabref/logic/importer/fetcher/ACS.java
index 581810d0479..1f6d8d5be33 100644
--- a/src/main/java/org/jabref/logic/importer/fetcher/ACS.java
+++ b/src/main/java/org/jabref/logic/importer/fetcher/ACS.java
@@ -10,17 +10,21 @@
import org.jabref.model.entry.field.StandardField;
import org.jabref.model.entry.identifier.DOI;
-import org.htmlunit.BrowserVersion;
-import org.htmlunit.WebClient;
-import org.htmlunit.html.HtmlPage;
-import org.jsoup.Jsoup;
-import org.jsoup.nodes.Document;
-import org.jsoup.nodes.Element;
+import me.friwi.jcefmaven.CefAppBuilder;
+import me.friwi.jcefmaven.MavenCefAppHandlerAdapter;
+import org.cef.CefApp;
+import org.cef.CefClient;
+import org.cef.CefSettings;
+import org.cef.browser.CefBrowser;
+import org.cef.browser.CefFrame;
+import org.cef.callback.CefStringVisitor;
+import org.cef.handler.CefDisplayHandlerAdapter;
+import org.cef.handler.CefLoadHandlerAdapter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
- * FulltextFetcher implementation that attempts to find a PDF URL at ACS.
+ * FulltextFetcher implementation that attempts to find a PDF URL at ACS.
*/
public class ACS implements FulltextFetcher {
private static final Logger LOGGER = LoggerFactory.getLogger(ACS.class);
@@ -42,24 +46,51 @@ public Optional findFullText(BibEntry entry) throws IOException {
String source = SOURCE.formatted(doi.get().getDOI());
- try (final WebClient webClient = new WebClient(BrowserVersion.CHROME)) {
- webClient.getOptions().setSSLClientProtocols("TLSv1.3", "TLSv1.2");
- // inspired by https://www.innoq.com/en/blog/2016/01/webscraping/
- webClient.getCookieManager().setCookiesEnabled(true);
- webClient.getOptions().setJavaScriptEnabled(true);
- webClient.getOptions().setTimeout(10_000);
- webClient.waitForBackgroundJavaScript(5000);
- webClient.getOptions().setThrowExceptionOnScriptError(false);
- webClient.getOptions().setPrintContentOnFailingStatusCode(true);
+ CefAppBuilder builder = new CefAppBuilder();
+ builder.setAppHandler(new MavenCefAppHandlerAdapter(){});
+ CefApp cefApp;
+ try {
+ cefApp = builder.build();
+ } catch (Exception e) {
+ LOGGER.error("Could not initialize CEF", e);
+ throw new IOException(e);
+ }
+
+ CefClient client = cefApp.createClient();
+ CefBrowser browser = client.createBrowser(source, false, false);
+
+ client.addLoadHandler(new CefLoadHandlerAdapter() {
+ @Override
+ public void onLoadEnd(CefBrowser browser, CefFrame frame, int httpStatusCode) {
+ System.out.println("lalala");
+ if (frame.isMain()) {
+ frame.executeJavaScript(
+ "document.documentElement.outerHTML;",
+ frame.getURL(),
+ 0
+ );
+ }
+ }
+ });
- HtmlPage page = webClient.getPage(source);
- boolean pdfButtonExists = page.querySelectorAll("a[title=\"PDF\"].article__btn__secondary").isEmpty();
- if (pdfButtonExists) {
- LOGGER.info("Fulltext PDF found at ACS.");
- // We "guess" the URL instead of parsing the HTML for the actual link
- return Optional.of(new URL(source.replaceFirst("/abs/", "/pdf/")));
+ client.addDisplayHandler(new CefDisplayHandlerAdapter() {
+ @Override
+ public boolean onConsoleMessage(CefBrowser browser, CefSettings.LogSeverity level, String message, String source, int line) {
+ // Capture the result of the JavaScript execution in the console message
+ System.out.println("Page HTML content:\n" + message);
+ return true;
}
+ });
+
+ browser.loadURL(source);
+
+ try {
+ Thread.sleep(5000);
+ } catch (
+ InterruptedException e) {
+ throw new RuntimeException(e);
}
+
return Optional.empty();
}