Skip to content

Commit

Permalink
Add linguist-detectable applying + checkDetected option
Browse files Browse the repository at this point in the history
  • Loading branch information
Nixinova committed Sep 14, 2024
1 parent f7dc8ac commit 2403222
Show file tree
Hide file tree
Showing 9 changed files with 35 additions and 14 deletions.
4 changes: 4 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
# Changelog

## Next
- Added application of GitHub-Linguist override rule `linguist-detectable`.
- Added option `checkDetected` to control the application of `linguist-detectable` overrides.

## 2.7.1
*2024-05-08*
- Fixed gitignore file reading not pertaining exactly to spec.
Expand Down
7 changes: 6 additions & 1 deletion readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ const { files, languages, unknown } = await linguist(fileNames, { fileContent, .
Whether to display sub-languages instead of their parents when possible (defaults to `false`).
- `quick` (boolean):
Whether to skip complex language analysis such as the checking of heuristics and gitattributes statements (defaults to `false`).
Alias for `checkAttributes:false, checkIgnored:false, checkHeuristics:false, checkShebang:false, checkModeline:false`.
Alias for `checkAttributes:false, checkIgnored:false, checkDetected:false, checkHeuristics:false, checkShebang:false, checkModeline:false`.
- `offline` (boolean):
Whether to use pre-packaged metadata files instead of fetching them from GitHub at runtime (defaults to `false`).
- `keepVendored` (boolean):
Expand All @@ -143,6 +143,8 @@ const { files, languages, unknown } = await linguist(fileNames, { fileContent, .
- `checkIgnored` (boolean):
Force the checking of `.gitignore` files (defaults to `true` unless `quick` is set).
Does nothing when `fileContent` is set.
- `checkDetected` (boolean):
Force files marked with `linguist-detectable` to show up in the output, even if the file is not part of the declared `categories`.
- `checkHeuristics` (boolean):
Apply heuristics to ambiguous languages (defaults to `true` unless `quick` is set).
- `checkShebang` (boolean):
Expand Down Expand Up @@ -199,6 +201,9 @@ linguist --version
- `--checkIgnored`:
Force the checking of `.gitignore` files.
Use alongside `--quick` to override it disabling this option.
- `--checkDetected`:
Force files marked with `linguist-detectable` to show up in the output, even if the file is not part of the declared `--categories`.
Use alongside `--quick` to override it disabling this option.
- `--checkHeuristics`:
Apply heuristics to ambiguous languages.
Use alongside `--quick` to override it disabling this option.
Expand Down
1 change: 1 addition & 0 deletions src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ program
.option('-r|--relativePaths [bool]', 'Convert absolute file paths to relative', false)
.option('-A|--checkAttributes [bool]', 'Force the checking of gitattributes files', true)
.option('-I|--checkIgnored [bool]', 'Force the checking of gitignore files', true)
.option('-D|--checkDetected [bool]', 'Force files marked with linguist-detectable to always appear in output', true)
.option('-H|--checkHeuristics [bool]', 'Apply heuristics to ambiguous languages', true)
.option('-S|--checkShebang [bool]', 'Check shebang lines for explicit classification', true)
.option('-M|--checkModeline [bool]', 'Check modelines for explicit classification', true)
Expand Down
9 changes: 6 additions & 3 deletions src/helpers/parse-gitattributes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ export type FlagAttributes = {
'vendored': boolean | null,
'generated': boolean | null,
'documentation': boolean | null,
'detectable': boolean | null,
'binary': boolean | null,
'language': T.LanguageResult;
};
Expand Down Expand Up @@ -34,11 +35,13 @@ export default function parseAttributes(content: string, folderRoot: string = '.
const falseParts = (str: string) => attrParts.filter(part => part.includes(str) && isFalse(part));
const hasTrueParts = (str: string) => trueParts(str).length > 0;
const hasFalseParts = (str: string) => falseParts(str).length > 0;
const boolOrNullVal = (str: string) => hasTrueParts(str) ? true : hasFalseParts(str) ? false : null;

const attrs = {
'generated': hasTrueParts('linguist-generated') ? true : hasFalseParts('linguist-generated') ? false : null,
'vendored': hasTrueParts('linguist-vendored') ? true : hasFalseParts('linguist-vendored') ? false : null,
'documentation': hasTrueParts('linguist-documentation') ? true : hasFalseParts('linguist-documentation') ? false : null,
'generated': boolOrNullVal('linguist-generated'),
'vendored': boolOrNullVal('linguist-vendored'),
'documentation': boolOrNullVal('linguist-documentation'),
'detectable': boolOrNullVal('linguist-detectable'),
'binary': hasTrueParts('binary') || hasFalseParts('text') ? true : hasFalseParts('binary') || hasTrueParts('text') ? false : null,
'language': trueParts('linguist-language').at(-1)?.split('=')[1] ?? null,
}
Expand Down
14 changes: 11 additions & 3 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ async function analyse(rawPaths?: string | string[], opts: T.Options = {}): Prom
// Normalise input option arguments
opts = {
checkIgnored: !opts.quick,
checkDetected: !opts.quick,
checkAttributes: !opts.quick,
checkHeuristics: !opts.quick,
checkShebang: !opts.quick,
Expand Down Expand Up @@ -372,17 +373,24 @@ async function analyse(rawPaths?: string | string[], opts: T.Options = {}): Prom
}

// Skip specified categories
// todo linguist-detectable
if (opts.categories?.length) {
const categories: T.Category[] = ['data', 'markup', 'programming', 'prose'];
const hiddenCategories = categories.filter(cat => !opts.categories!.includes(cat));
for (const [file, lang] of Object.entries(results.files.results)) {
if (!hiddenCategories.some(cat => lang && langData[lang]?.type === cat)) {
// Skip if language is not hidden
if (!hiddenCategories.some(cat => lang && langData[lang]?.type === cat))
continue;
// Skip if language is forced as detectable
if (opts.checkDetected) {
const detectable = ignore().add(getFlaggedGlobs('detectable', true));
if (detectable.ignores(relPath(file)))
continue;
}
// Delete result otherwise
delete results.files.results[file];
if (lang) {
if (lang)
delete results.languages.results[lang];
}
}
for (const category of hiddenCategories) {
for (const [lang, { type }] of Object.entries(results.languages.results)) {
Expand Down
1 change: 1 addition & 0 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ export interface Options {
quick?: boolean
offline?: boolean
checkIgnored?: boolean
checkDetected?: boolean
checkAttributes?: boolean
checkHeuristics?: boolean
checkShebang?: boolean
Expand Down
11 changes: 4 additions & 7 deletions test/expected.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"files": {
"count": 10,
"bytes": 65,
"count": 11,
"bytes": 69,
"results": {
"~/al.al": "Perl",
"~/alternatives.asc": "AGS Script",
Expand All @@ -11,6 +11,7 @@
"~/hashbang": "JavaScript",
"~/modeline.txt": "C++",
"~/package-lock.json": "JSON",
"~/detected.json": "JSON",
"~/Pipfile": "TOML",
"~/unknown": null
},
Expand All @@ -25,11 +26,7 @@
"Perl": { "type": "programming", "bytes": 0, "color": "#0298c3" },
"AGS Script": { "type": "programming", "bytes": 14, "color": "#B9D9FF" },
"JavaScript": { "type": "programming", "bytes": 23, "color": "#f1e05a" },
"JSON": {
"type": "data",
"bytes": 4,
"color": "#292929"
},
"JSON": { "type": "data", "bytes": 6, "color": "#292929"},
"Text": { "type": "prose", "bytes": 0 },
"C++": { "type": "programming", "bytes": 15, "color": "#f34b7d" },
"TOML": { "type": "data", "bytes": 0, "color": "#9c4221" }
Expand Down
1 change: 1 addition & 0 deletions test/samples/.gitattributes
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
file.txt linguist-language=JS
vendored linguist-vendored
package-lock.json -linguist-generated
detected.json linguist-detectable
1 change: 1 addition & 0 deletions test/samples/detected.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}

0 comments on commit 2403222

Please sign in to comment.