-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathWebScrap.java
69 lines (66 loc) · 2.21 KB
/
WebScrap.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import java.util.ArrayList;
import java.util.List;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlAnchor;
import com.gargoylesoftware.htmlunit.html.HtmlDivision;
import com.gargoylesoftware.htmlunit.html.HtmlElement;
import com.gargoylesoftware.htmlunit.html.HtmlListItem;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
public class WebScrap {
public static void main(String[] args) {
String baseUrl = "https://www.apartments.com/dallas-tx/?bb=6_4xqq6l1J8v6pgivB";
WebClient client = new WebClient();
client.getOptions().setCssEnabled(false);
client.getOptions().setJavaScriptEnabled(false);
try {
String searchUrl = baseUrl;
HtmlPage page = client.getPage(searchUrl);
// System.out.println(page.asXml());
List<Item> itemList = new ArrayList<>();
List<?> placardDiv = page.getByXPath("//*[@class=\"placardContainer\"]/li");
if(!placardDiv.isEmpty())
{
for(Object obj : placardDiv)
{
HtmlElement placardContainer = (HtmlElement)obj;
System.out.println(placardContainer.getDescendants());
if(placardContainer != null)
{
Item item = new Item();
HtmlAnchor anchor = (HtmlAnchor)placardContainer.getFirstByXPath("//a[@class='placardTitle']");
if(anchor != null)
{
item.setPlacardTitle(anchor.getAttribute("title"));
}
item.setPlacardHref(anchor.getHrefAttribute());
List<?> imageDivsList = placardContainer.getByXPath("//div[@class='item']");
if(!imageDivsList.isEmpty())
{
List<String> imageList = new ArrayList<>();
for(Object ob : imageDivsList)
{
HtmlDivision imageDiv = (HtmlDivision)ob;
String imageUrl = imageDiv.getAttribute("data-img");
imageList.add(imageUrl);
}
item.setImageUrl(imageList);
}
itemList.add(item);
}
}
ObjectMapper mapper = new ObjectMapper();
for(Item i : itemList)
{
String jsonString = mapper.writeValueAsString(i);
System.out.println(jsonString);
}
client.close();
}
}
catch(Exception ex)
{
ex.printStackTrace();
}
}
}