Skip to content

Commit

Permalink
[HaveIBeenPwnedBridge] Use API to get Data (#2720)
Browse files Browse the repository at this point in the history
  • Loading branch information
yamanq authored May 12, 2022
1 parent 73b1a6a commit e5829d3
Showing 1 changed file with 54 additions and 36 deletions.
90 changes: 54 additions & 36 deletions bridges/HaveIBeenPwnedBridge.php
Original file line number Diff line number Diff line change
@@ -1,4 +1,11 @@
<?php
/**
* Uses the API as documented here:
* https://haveibeenpwned.com/API/v3#AllBreaches
*
* Gets the latest breaches by the date of the breach or when it was added to
* HIBP.
* */
class HaveIBeenPwnedBridge extends BridgeAbstract {
const NAME = 'Have I Been Pwned (HIBP) Bridge';
const URI = 'https://haveibeenpwned.com';
Expand All @@ -21,72 +28,82 @@ class HaveIBeenPwnedBridge extends BridgeAbstract {
'defaultValue' => 20,
)
));
const API_URI = 'https://haveibeenpwned.com/api/v3';

const CACHE_TIMEOUT = 3600;

private $breachDateRegex = '/Breach date: ([0-9]{1,2} [A-Z-a-z]+ [0-9]{4})/';
private $dateAddedRegex = '/Date added to HIBP: ([0-9]{1,2} [A-Z-a-z]+ [0-9]{4})/';
private $accountsRegex = '/Compromised accounts: ([0-9,]+)/';

private $breaches = array();

public function collectData() {

$html = getSimpleHTMLDOM(self::URI . '/PwnedWebsites');
$data = json_decode(getContents(self::API_URI . '/breaches'), true);

$breaches = array();

foreach($html->find('div.row') as $breach) {
foreach($data as $breach) {
$item = array();

if ($breach->class != 'row') {
continue;
}

preg_match($this->breachDateRegex, $breach->find('p', 1)->plaintext, $breachDate)
or returnServerError('Could not extract details');

preg_match($this->dateAddedRegex, $breach->find('p', 1)->plaintext, $dateAdded)
or returnServerError('Could not extract details');

preg_match($this->accountsRegex, $breach->find('p', 1)->plaintext, $accounts)
or returnServerError('Could not extract details');
$pwnCount = number_format($breach['PwnCount']);
$item['title'] = $breach['Title'] . ' - '
. $pwnCount . ' breached accounts';
$item['dateAdded'] = $breach['AddedDate'];
$item['breachDate'] = $breach['BreachDate'];
$item['uri'] = self::URI . '/PwnedWebsites' . $breach['Name'];

$permalink = $breach->find('p', 1)->find('a', 0)->href;

// Remove permalink
$breach->find('p', 1)->find('a', 0)->outertext = '';

$item['title'] = html_entity_decode($breach->find('h3', 0)->plaintext, ENT_QUOTES)
. ' - ' . $accounts[1] . ' breached accounts';
$item['dateAdded'] = strtotime($dateAdded[1]);
$item['breachDate'] = strtotime($breachDate[1]);
$item['uri'] = self::URI . '/PwnedWebsites' . $permalink;

$item['content'] = '<p>' . $breach->find('p', 0)->innertext . '</p>';
$item['content'] = '<p>' . $breach['Description'] . '</p>';
$item['content'] .= '<p>' . $this->breachType($breach) . '</p>';
$item['content'] .= '<p>' . $breach->find('p', 1)->innertext . '</p>';

$breachDate = date('j F Y', strtotime($breach['BreachDate']));
$addedDate = date('j F Y', strtotime($breach['AddedDate']));
$compData = implode(', ', $breach['DataClasses']);

$item['content'] .= <<<EOD
<p>
<strong>Breach date:</strong> {$breachDate}<br>
<strong>Date added to HIBP:</strong> {$addedDate}<br>
<strong>Compromised accounts:</strong> {$pwnCount}<br>
<strong>Compromised data:</strong> {$compData}<br>
EOD;
$item['uid'] = $breach['Name'];
$this->breaches[] = $item;
}

$this->orderBreaches();
$this->createItems();
}

private const BREACH_TYPES = array(
'IsVerified' => array(
false => 'Unverified breach, may be sourced from elsewhere'
),
'IsFabricated' => array(
true => 'Fabricated breach, likely not legitimate'
),
'IsSensitive' => array(
true => 'Sensitive breach, not publicly searchable'
),
'IsRetired' => array(
true => 'Retired breach, removed from system'
),
'IsSpamList' => array(
true => 'Spam list, used for spam marketing'
),
'IsMalware' => array(
true => 'Malware breach'
),
);

/**
* Extract data breach type(s)
*/
private function breachType($breach) {

$content = '';

if ($breach->find('h3 > i', 0)) {

foreach ($breach->find('h3 > i') as $i) {
$content .= $i->title . '.<br>';
foreach (self::BREACH_TYPES as $type => $message) {
if (isset($message[$breach[$type]])) {
$content .= $message[$breach[$type]] . '.<br>';
}

}

return $content;
Expand Down Expand Up @@ -127,6 +144,7 @@ private function createItems() {
$item['timestamp'] = $breach[$this->getInput('order')];
$item['uri'] = $breach['uri'];
$item['content'] = $breach['content'];
$item['uid'] = $breach['uid'];

$this->items[] = $item;

Expand Down

0 comments on commit e5829d3

Please sign in to comment.