-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathEdxUrlsParser.php
58 lines (50 loc) · 1.34 KB
/
EdxUrlsParser.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
<?php
/**
* Class for extracting course urls from edx.org
*
* @author Tatiana Braginets
*/
require_once 'AbstractUrlsParser.php';
require_once 'simple_html_dom.php';
class EdxUrlsParser extends AbstractUrlsParser
{
private $html_data, $courseShortDesc;
/**
* Inits parser by getting web content of edx.org
*/
public function __construct()
{
$this->urls = array();
$this->urlsToImages = array();
$this->courseShortDesc = array();
$this->html_data = file_get_html('http://www.edx.org');
}
/**
* Extracts course urls and images for edx.org
*/
public function parse()
{
$this->isParsed = true;
$ids = array();
if ($this->html_data) {
foreach($this->html_data->find('article.course') as $a) {
$url = "https://www.edx.org/courses/" . $a->id . '/about';
$this->urls[] = $url;
$image = $this->html_data->find('article[id=' . $a->id . '] div.cover-image img');
$desc = $this->html_data->find('article[id=' .$a->id . '] div.desc p text');
$this->courseImages[$url] = "https://www.edx.org" . $image[0]->src;
$this->courseShortDesc[$url] = $desc[0];
}
}
}
/**
* Get course short description
* @return string short description for a course
*/
public function getCourseShortDesc($url)
{
$this->checkState();
return $this->courseShortDesc[$url];
}
}
?>