-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathNewsScraper.java
56 lines (43 loc) · 1.47 KB
/
NewsScraper.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
/*
* December 28, 2023
* Author: Mrida Yawale
* Purpose: Finds and stores headlines of the three news items
*/
import java.io.IOException;
import java.util.Scanner;
public class NewsScraper {
public String getHTML(String url) {
URLReader ur = new URLReader(url);
String html = ur.readURL();
return html;
}
public News getHeadlines(String html, String pattern) throws IOException {
int beginIndex = html.indexOf(pattern);
// Default to 0, assigned in the while loop
int endIndex = 0;
String newsItemHTML = " ";
News news = new News();
int count = 1;
while (beginIndex >= 0 && endIndex >= 0) {
// end index for the current look up is the begin index for the next lookup
// Next look up
endIndex = html.indexOf(pattern, beginIndex + 1);
if (endIndex < 0) {
break;
}
newsItemHTML = html.substring(beginIndex, endIndex);
NewsItem newsItem = new NewsItem(newsItemHTML);
if (count == 1) {
news.setFirstItem(newsItem);
} else if (count == 2) {
news.setSecondItem(newsItem);
} else if (count == 3) {
news.setThirdItem(newsItem);
}
count ++;
// Begin index for next look up is the end index of the last look up
beginIndex = endIndex;
}
return news;
}
}