-
Notifications
You must be signed in to change notification settings - Fork 1
/
Heuristic.php
115 lines (93 loc) · 3.24 KB
/
Heuristic.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
<?php
declare(strict_types=1);
namespace Babelfish\Strategy;
use Babelfish\File\SourceFile;
use Babelfish\Language;
use function pathinfo;
use function preg_match;
use function strlen;
use function strtolower;
use function substr;
use const PATHINFO_EXTENSION;
final class Heuristic implements Strategy
{
private const HEURISTICS_CONSIDER_BYTES = 50 * 1024;
/**
* @return Language[]
*/
public function getLanguages(SourceFile $file, Language ...$language_candidates): array
{
static $heuristics_indexed_by_extension = null;
if ($heuristics_indexed_by_extension === null) {
/** @psalm-var array<string, array<string, mixed>> $heuristics_indexed_by_extension */
$heuristics_indexed_by_extension = include __DIR__ . '/../Data/Heuristics.php';
}
$file_extension = pathinfo($file->getName(), PATHINFO_EXTENSION);
if ($file_extension === '') {
return [];
}
$file_extension = '.' . strtolower($file_extension);
if (! isset($heuristics_indexed_by_extension[$file_extension])) {
return [];
}
/**
* @psalm-var array<string, array{positive: string, negative: string, and: array<string, array{positive: string, negative: string}>}> $heuristics
*/
$heuristics = $heuristics_indexed_by_extension[$file_extension] ?? [];
$languages = [];
$data = $this->getDataToAnalyze($file);
foreach ($heuristics as $language_name => $rules) {
foreach ($language_candidates as $language_candidate) {
if (strtolower($language_name) !== strtolower($language_candidate->getName()) || ! $this->validateRules($data, $rules)) {
continue;
}
$languages[] = $language_candidate;
}
}
return $languages;
}
private function getDataToAnalyze(SourceFile $file): string
{
$data = '';
foreach ($file->getLines() as $line) {
$data .= $line . "\n";
if (strlen($data) >= self::HEURISTICS_CONSIDER_BYTES) {
break;
}
}
return substr($data, 0, self::HEURISTICS_CONSIDER_BYTES);
}
/**
* @param string[] $rules
*
* @psalm-param array{positive: string, negative: string, and: array<string, array{positive: string, negative: string}>} $rules
*/
private function validateRules(string $data, array $rules): bool
{
if (! isset($rules['and'])) {
return $this->validateSimpleRule($data, $rules);
}
foreach ($rules['and'] as $rule) {
$is_rule_valid = $this->validateSimpleRule($data, $rule);
if (! $is_rule_valid) {
return false;
}
}
return true;
}
/**
* @param string[] $rule
*
* @psalm-param array{positive: string, negative: string} $rule
*/
private function validateSimpleRule(string $data, array $rule): bool
{
if (isset($rule['positive'])) {
return preg_match($rule['positive'], $data) === 1;
}
if (isset($rule['negative'])) {
return preg_match($rule['negative'], $data) === 0;
}
return true;
}
}