3

huzpsb · Oct 3, 2023 · e6ca88a · e6ca88a
1 parent 5e8c4e9
commit e6ca88a
Show file tree

Hide file tree

Showing 5 changed files with 12 additions and 7 deletions.
diff --git a/README.MD b/README.MD
@@ -25,20 +25,23 @@ Crawl4j可能不会去考虑：
 - 热插拔：Crawl4j的索引数据库不支持热插拔。包括Token持久化在内的模块全部不会被考虑实现。
 
 ## 使用
-Crawl4j需要Java 21或以上版本。  
+
+Crawl4j需要Java 8或以上版本。  
 对于普通用户，只需要从[Release](https://github.com/huzpsb/crawl4j/releases)页面下载最新的jar包并运行即可。  
 我们提供交互式的指南，让你可以轻松地使用Crawl4j。
 
 ## 开发与TODO
+
 Crawl4j的开发目前处于早期阶段，因此我们欢迎任何形式的贡献。   
 你可以通过提交Issue来提出你的想法，或者直接提交Pull Request。  
 目前而言，我们正在着手完成：
 
-- [ ] 网页版搜索引擎 
+- [ ] 网页版搜索引擎
 - [ ] 更好的Spider.txt合规性支持
 - [x] 交互式引导
 
 ## 注意事项
+
 1，Crawl4j目前处于早期阶段，因此可能会有很多问题。并且设计上Crawl4j就不是为企业准备的。  
 如果有因为Crawl4j的缺陷导致的任何损失，我无法承担任何责任。如果不能接受这一点，请不要使用Crawl4j。
 

diff --git a/src/org/eu/huzpsb/crawl4j/protocol/Fetcher.java b/src/org/eu/huzpsb/crawl4j/protocol/Fetcher.java
@@ -34,7 +34,7 @@ public static String getPage(String url) {
             while ((length = inputStream.read(buffer)) != -1) {
                 result.write(buffer, 0, length);
             }
-            String str = result.toString(StandardCharsets.UTF_8);
+            String str = result.toString("UTF-8");
             if (str.contains("\ufffd")) {
                 str = result.toString("GBK");
             }

diff --git a/src/org/eu/huzpsb/crawl4j/reporter/Collector.java b/src/org/eu/huzpsb/crawl4j/reporter/Collector.java
@@ -10,7 +10,7 @@ public class Collector {
 
     static {
         try {
-            writer = new PrintWriter("c4j.db", StandardCharsets.UTF_8);
+            writer = new PrintWriter("c4j.db", "UTF-8");
         } catch (Exception e) {
             throw new RuntimeException(e);
         }

diff --git a/src/org/eu/huzpsb/crawl4j/search/Indexer.java b/src/org/eu/huzpsb/crawl4j/search/Indexer.java
@@ -27,7 +27,7 @@ public static void doIndex() {
             Fetcher.UA = "Mozilla/5.0 (compatible; Like Baiduspider; Crawl4j/1.0; +https://huzpsb.eu.org/crawl4j/)";
         }
         for (int i = 0; i < 50; i++) {
-            Thread.ofVirtual().name("T-" + i).start(new Worker());
+            new Thread(new Worker()).start();
         }
         try {
             Thread.sleep(time * 60L * 1000L);

diff --git a/src/org/eu/huzpsb/crawl4j/search/SearchCLI.java b/src/org/eu/huzpsb/crawl4j/search/SearchCLI.java
@@ -20,7 +20,7 @@ public static void main(String[] args) throws Exception {
         // token -> (articleId -> weight)
         Map<Integer, String> lines = new HashMap<>();
         Map<Integer, String> titles = new HashMap<>();
-        Scanner scanner = new Scanner(db, StandardCharsets.UTF_8);
+        Scanner scanner = new Scanner(db, "UTF-8");
         int idx = 1000000;
         while (true) {
             try {
@@ -44,9 +44,11 @@ public static void main(String[] args) throws Exception {
             System.exit(0);
         }
         System.out.println("索引建立完成，耗时：" + (System.currentTimeMillis() - start) + "ms");
+        Scanner sc = new Scanner(System.in);
         while (true) {
             System.out.print("请输入关键词：");
-            String keyword = new Scanner(System.in).nextLine();
+            String keyword = sc.nextLine();
+            System.out.println(keyword);
             start = System.currentTimeMillis();
             Map<Integer, Integer> result = new HashMap<>();
             // articleId -> weight