This repository has been archived by the owner on Feb 4, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Tom Janssens
committed
Apr 28, 2015
1 parent
84a175f
commit f9ca357
Showing
21 changed files
with
1,050 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
<?php | ||
/** | ||
* Belgian Police Web Platform - Police Component | ||
* | ||
* @copyright Copyright (C) 2012 - 2013 Timble CVBA. (http://www.timble.net) | ||
* @license GNU GPLv3 <http://www.gnu.org/licenses/gpl.html> | ||
* @link https://github.com/belgianpolice/internet-platform | ||
*/ | ||
|
||
use Nooku\Library; | ||
|
||
class LinksControllerLink extends Library\ControllerModel | ||
{ | ||
|
||
} |
15 changes: 15 additions & 0 deletions
15
application/admin/component/links/controller/permission/link.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
<?php | ||
/** | ||
* Belgian Police Web Platform - Links Component | ||
* | ||
* @copyright Copyright (C) 2012 - 2013 Timble CVBA. (http://www.timble.net) | ||
* @license GNU GPLv3 <http://www.gnu.org/licenses/gpl.html> | ||
* @link https://github.com/belgianpolice/internet-platform | ||
*/ | ||
|
||
use Nooku\Library; | ||
|
||
class LinksControllerPermissionLink extends ApplicationControllerPermissionAbstract | ||
{ | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
<?php | ||
/** | ||
* Belgian Police Web Platform - Links Component | ||
* | ||
* @copyright Copyright (C) 2012 - 2013 Timble CVBA. (http://www.timble.net) | ||
* @license GNU GPLv3 <http://www.gnu.org/licenses/gpl.html> | ||
* @link https://github.com/belgianpolice/internet-platform | ||
*/ | ||
|
||
use Nooku\Library; | ||
|
||
/** | ||
* Articles HTML View | ||
* | ||
* @author Tom Janssens <http://nooku.assembla.com/profile/tomjanssens> | ||
* @package Component\Articles | ||
*/ | ||
class LinksViewLinkHtml extends Library\ViewHtml | ||
{ | ||
public function render() | ||
{ | ||
$model = $this->getModel(); | ||
$link = $model->getRow(); | ||
|
||
$this->childs = $this->getObject('com:links.model.relations')->links_link_id($link->id)->getRowset(); | ||
|
||
return parent::render(); | ||
} | ||
} |
50 changes: 50 additions & 0 deletions
50
application/admin/component/links/view/link/templates/default.html.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
<? | ||
/** | ||
* Belgian Police Web Platform - Links Component | ||
* | ||
* @copyright Copyright (C) 2012 - 2013 Timble CVBA. (http://www.timble.net) | ||
* @license GNU GPLv3 <http://www.gnu.org/licenses/gpl.html> | ||
* @link https://github.com/belgianpolice/internet-platform | ||
*/ | ||
?> | ||
|
||
<script src="assets://js/koowa.js" /> | ||
|
||
<ktml:module position="actionbar"> | ||
<ktml:toolbar type="actionbar"> | ||
</ktml:module> | ||
|
||
<form action="" method="post" id="link-form" class="-koowa-form"> | ||
<div class="main"> | ||
<div class="title"> | ||
<input disabled class="required" type="text" name="title" maxlength="255" value="<?= escape($link->title) ?>" placeholder="<?= translate('Title') ?>" /> | ||
<div class="slug"> | ||
<span class="add-on">URL</span> | ||
<input disabled type="text" name="slug" maxlength="255" value="<?= escape($link->url) ?>" /> | ||
</div> | ||
</div> | ||
|
||
<div class="scrollable"> | ||
<fieldset> | ||
<legend><?= translate('Mentioned on') ?></legend> | ||
<table class="table table--striped"> | ||
<thead> | ||
<tr> | ||
<th><?= translate('Title') ?></th> | ||
<th><?= translate('Status') ?></th> | ||
</tr> | ||
</thead> | ||
<tbody> | ||
<? foreach($childs AS $child) : ?> | ||
<tr> | ||
<td><?= $child->child_title ?><br /> | ||
<small><a target="_blank" href="<?= $child->child_url ?>"><?= $child->child_url ?></a></small></td> | ||
<td><?= $child->child_status ?></td> | ||
</tr> | ||
<? endforeach ?> | ||
</tbody> | ||
</table> | ||
</fieldset> | ||
</div> | ||
</div> | ||
</form> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
<?xml version="1.0" encoding="utf-8"?> | ||
<metadata> | ||
<view title="Links"> | ||
<message><![CDATA[]]></message> | ||
</view> | ||
</metadata> |
66 changes: 66 additions & 0 deletions
66
application/admin/component/links/view/links/templates/default.html.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
<? | ||
/** | ||
* Belgian Police Web Platform - Links Component | ||
* | ||
* @copyright Copyright (C) 2012 - 2013 Timble CVBA. (http://www.timble.net) | ||
* @license GNU GPLv3 <http://www.gnu.org/licenses/gpl.html> | ||
* @link https://github.com/belgianpolice/internet-platform | ||
*/ | ||
?> | ||
|
||
<script src="assets://js/koowa.js" /> | ||
<style src="assets://css/koowa.css" /> | ||
|
||
<form action="" method="get" class="-koowa-grid"> | ||
<table> | ||
<thead> | ||
<tr> | ||
<th width="100%"> | ||
<?= helper('grid.sort', array('column' => 'url', 'title' => 'Link')) ?> | ||
</th> | ||
<th> | ||
<?= helper('grid.sort', array('column' => 'links')) ?> | ||
</th> | ||
<th> | ||
<?= helper('grid.sort', array('column' => 'status')) ?> | ||
</th> | ||
<th> | ||
<?= helper('grid.sort', array('column' => 'last_crawled_on', 'title' => 'Last crawled on')) ?> | ||
</th> | ||
<th> | ||
<?= helper('grid.sort', array('column' => 'last_checked_on', 'title' => 'Last checked on')) ?> | ||
</th> | ||
</tr> | ||
</thead> | ||
<tfoot> | ||
<tr> | ||
<td colspan="7"> | ||
<?= helper('com:application.paginator.pagination', array('total' => $total)) ?> | ||
</td> | ||
</tr> | ||
</tfoot> | ||
<tbody> | ||
<? foreach ($links as $link) : ?> | ||
<tr> | ||
<td class="ellipsis" style="padding: 8px 10px"> | ||
<a href="<?= route('view=link&id='.$link->id); ?>"><?= $link->title ?><br /><small><?= $link->url ?></small></a> | ||
</td> | ||
<td> | ||
<?= $link->links ?> | ||
</td> | ||
<td> | ||
<?= $link->status ?> | ||
</td> | ||
<td> | ||
<?= helper('date.format', array('date'=> $link->last_crawled_on ? $link->last_crawled_on : $link->created_on, 'format' => translate('D d.m.Y - G:i'))) ?> | ||
</td> | ||
<td> | ||
<? if($link->last_checked_on) : ?> | ||
<?= helper('date.format', array('date'=> $link->last_checked_on, 'format' => translate('D d.m.Y - G:i'))) ?> | ||
<? endif ?> | ||
</td> | ||
</tr> | ||
<? endforeach; ?> | ||
</tbody> | ||
</table> | ||
</form> |
8 changes: 8 additions & 0 deletions
8
application/admin/component/links/view/links/templates/default.xml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
<?xml version="1.0" encoding="utf-8"?> | ||
<metadata> | ||
<layout title="Default layout"> | ||
<message> | ||
<![CDATA[]]> | ||
</message> | ||
</layout> | ||
</metadata> |
155 changes: 155 additions & 0 deletions
155
application/manager/component/links/controller/Crawler.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,155 @@ | ||
<?php | ||
/** | ||
* @author Oscar Casajuana a.k.a. elboletaire <elboletaire {at} underave {dot} net> | ||
*/ | ||
/* | ||
* This program is free software: you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation, either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* This program is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU General Public License | ||
* along with this program. If not, see <http://www.gnu.org/licenses/> | ||
*/ | ||
|
||
class Crawler | ||
{ | ||
private $depth = 2; | ||
private $url; | ||
private $results = array(); | ||
private $same_host = false; | ||
private $host; | ||
|
||
public function setDepth($depth) { $this->depth = $depth; } | ||
public function setHost($host) { $this->host = $host; } | ||
public function getResults() { return $this->results; } | ||
public function setSameHost($same_host) { $this->same_host = $same_host; } | ||
|
||
public function setUrl($url) | ||
{ | ||
$this->url = $url; | ||
$this->setHost($this->getHostFromUrl($url)); | ||
} | ||
|
||
public function __construct($url = null, $depth = null, $same_host = false) | ||
{ | ||
if (!empty($url)) { | ||
$this->setUrl($url); | ||
} | ||
if (isset($depth) && !is_null($depth)) { | ||
$this->setDepth($depth); | ||
} | ||
$this->setSameHost($same_host); | ||
} | ||
|
||
public function crawl() | ||
{ | ||
if (empty($this->url)) { | ||
throw new Exception('URL must be set'); | ||
} | ||
$this->_crawl($this->url, $this->depth); | ||
return $this->results; | ||
} | ||
|
||
private function _crawl($url, $depth) | ||
{ | ||
static $seen = array(); | ||
|
||
if (empty($url)) return; | ||
|
||
if (!$url = $this->buildUrl($this->url, $url)) { | ||
return; | ||
} | ||
|
||
if ($depth === 0 || isset($seen[$url])) { | ||
return; | ||
} | ||
|
||
$seen[$url] = true; | ||
|
||
$dom = new DOMDocument('1.0'); | ||
libxml_use_internal_errors(true); | ||
@$dom->loadHTMLFile($url); | ||
|
||
$this->results[] = array( | ||
'url' => $url, | ||
// 'content' => $dom->saveHTML() | ||
); | ||
|
||
$anchors = $dom->getElementsByTagName('a'); | ||
foreach ($anchors as $element) | ||
{ | ||
if (!$href = $this->buildUrl($url, $element->getAttribute('href'))) { | ||
continue; | ||
} | ||
$this->_crawl($href, $depth - 1); | ||
} | ||
|
||
return $url; | ||
} | ||
|
||
private function buildUrl($url, $href) | ||
{ | ||
if (0 !== strpos($href, 'http')) | ||
{ | ||
if (0 === strpos($href, 'javascript:') || 0 === strpos($href, '#')) | ||
{ | ||
return false; | ||
} | ||
$path = '/' . ltrim($href, '/'); | ||
if (extension_loaded('http')) | ||
{ | ||
$new_href = http_build_url($url, array('path' => $path), HTTP_URL_REPLACE, $parts); | ||
} | ||
else | ||
{ | ||
$parts = parse_url($url); | ||
$new_href = $this->buildUrlFromParts($parts); | ||
$new_href .= $path; | ||
} | ||
// Relative urls... (like ./viewforum.php) | ||
if (0 === strpos($href, './') && !empty($parts['path'])) | ||
{ | ||
// If the path isn't really a path (doesn't end with slash)... | ||
if (!preg_match('@/$@', $parts['path'])) { | ||
$path_parts = explode('/', $parts['path']); | ||
array_pop($path_parts); | ||
$parts['path'] = implode('/', $path_parts) . '/'; | ||
} | ||
|
||
$new_href = $this->buildUrlFromParts($parts) . $parts['path'] . ltrim($href, './'); | ||
} | ||
$href = $new_href; | ||
} | ||
$href = rtrim($href, '/'); | ||
if ($this->same_host && $this->host != $this->getHostFromUrl($href)) { | ||
return false; | ||
} | ||
return $href; | ||
} | ||
|
||
private function buildUrlFromParts($parts) | ||
{ | ||
$new_href = $parts['scheme'] . '://'; | ||
if (isset($parts['user']) && isset($parts['pass'])) { | ||
$new_href .= $parts['user'] . ':' . $parts['pass'] . '@'; | ||
} | ||
$new_href .= $parts['host']; | ||
if (isset($parts['port'])) { | ||
$new_href .= ':' . $parts['port']; | ||
} | ||
return $new_href; | ||
} | ||
|
||
private function getHostFromUrl($url) | ||
{ | ||
$parts = parse_url($url); | ||
preg_match("@([^/.]+)\.([^.]{2,6}(?:\.[^.]{2,3})?)$@", $parts['host'], $host); | ||
return array_shift($host); | ||
} | ||
} |
Oops, something went wrong.