-
Notifications
You must be signed in to change notification settings - Fork 0
/
index.php
153 lines (122 loc) · 3.42 KB
/
index.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
<?
class RecursiveDOMIterator implements RecursiveIterator
{
/**
* Current Position in DOMNodeList
* @var Integer
*/
protected $_position;
/**
* The DOMNodeList with all children to iterate over
* @var DOMNodeList
*/
protected $_nodeList;
/**
* @param DOMNode $domNode
* @return void
*/
public function __construct(DOMNode $domNode)
{
$this->_position = 0;
$this->_nodeList = $domNode->childNodes;
}
/**
* Returns the current DOMNode
* @return DOMNode
*/
public function current()
{
return $this->_nodeList->item($this->_position);
}
/**
* Returns an iterator for the current iterator entry
* @return RecursiveDOMIterator
*/
public function getChildren()
{
return new self($this->current());
}
/**
* Returns if an iterator can be created for the current entry.
* @return Boolean
*/
public function hasChildren()
{
return $this->current()->hasChildNodes();
}
/**
* Returns the current position
* @return Integer
*/
public function key()
{
return $this->_position;
}
/**
* Moves the current position to the next element.
* @return void
*/
public function next()
{
$this->_position++;
}
/**
* Rewind the Iterator to the first element
* @return void
*/
public function rewind()
{
$this->_position = 0;
}
/**
* Checks if current position is valid
* @return Boolean
*/
public function valid()
{
return $this->_position < $this->_nodeList->length;
}
}
//echo file_get_html('http://www.google.com/')->plaintext;
$url = 'http://www.investing.com/currencies/eur-usd';
$url = 'www.investing.com/equities/apple-computer-inc?cid=23227';
//$html = file_get_contents($url);
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_HEADER, true);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt( $ch, CURLOPT_ENCODING, "UTF-8" );
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.2) Gecko/20090729 Firefox/3.5.2 GTB5');
$html = curl_exec($ch);
curl_close($ch);
//$html = file_get_html('index.php');
$dom = new DOMDocument();
$dom->loadHTML($html);
$dit = new RecursiveIteratorIterator(
new RecursiveDOMIterator($dom),
RecursiveIteratorIterator::SELF_FIRST);
foreach($dit as $node) {
if($node->nodeType === XML_ELEMENT_NODE) {
if($node->nodeName == 'html') continue;
if($node->nodeName == 'body') continue;
if($node->nodeName == 'script') continue;
if($node->nodeName == 'style') continue;
if($node->nodeName == 'div') continue;
if($node->nodeName == 'textarea') continue;
echo $node->nodeName."<br>";
if($node->nodeName == 'link'){
echo $node->getAttribute('rel')."<br>";
echo $node->getAttribute('href')."<br>";
}
if($node->nodeName == 'meta'){
echo $node->getAttribute('name')."<br>";
echo $node->getAttribute('content')."<br>";
}
if($node->nodeName == 'a'){
echo $node->getAttribute('href')."<br>";
//echo $node->getAttribute('content');
}
echo $node->nodeValue."<br><hr>";
}
}
?>