From fef9713de4fe310b74d1671bcb6c13c7bcac10a8 Mon Sep 17 00:00:00 2001 From: Nixinova Date: Tue, 23 Jul 2024 21:16:30 +1200 Subject: [PATCH 1/3] Add `minSize` option which adds to `other` result An implementation of a `minSize` option (proposed in #32). This allows filtering the output to only show languages with a resulting size greater than what the user inputs. This is done by moving the results of these too-small languages to an `other` key and deleting the original results. This may be too ad-hoc - for instance, maybe this should be a CLI-only option and the actual data isn't changed? Is the `other` key needed, or not? For this reason this feature is not being committed straight to main and will sit as a pull request for proposal. --- src/cli.ts | 1 + src/index.ts | 24 ++++++++++++++++++++++++ src/types.ts | 3 ++- 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/src/cli.ts b/src/cli.ts index 0a76c12..5652320 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -22,6 +22,7 @@ program .option('-j|--json [bool]', 'Display the output as JSON', false) .option('-t|--tree ', 'Which part of the output JSON to display (dot-delimited)') .option('-F|--listFiles [bool]', 'Whether to list every matching file under the language results', false) + .option('-m|--minSize ', 'Minimum file size to show language results for (must have a unit: b, kb, mb, %)') .option('-q|--quick [bool]', 'Skip complex language analysis (alias for -{A|I|H|S}=false)', false) .option('-o|--offline [bool]', 'Use packaged data files instead of fetching latest from GitHub', false) .option('-V|--keepVendored [bool]', 'Prevent skipping over vendored/generated files', false) diff --git a/src/index.ts b/src/index.ts index 8ff2768..c83c421 100644 --- a/src/index.ts +++ b/src/index.ts @@ -431,6 +431,30 @@ async function analyse(rawPaths?: string | string[], opts: T.Options = {}): Prom results.languages.bytes += fileSize; } + // Ignore languages with a bytes/% size less than the declared min size + if (opts.minSize) { + const totalSize = results.languages.bytes; + const minSizeAmt = parseFloat(opts.minSize.replace(/[a-z]+$/i, '')); // '2KB' -> 2 + const minSizeUnit = opts.minSize.replace(/^\d+/, '').toLowerCase(); // '2KB' -> 'kb' + const conversionFactors: Record number> = { + 'b': n => n, + 'kb': n => n * 1e3, + 'mb': n => n * 1e6, + '%': n => n * totalSize / 100, + }; + const minBytesSize = conversionFactors[minSizeUnit](+minSizeAmt); + // Apply specified minimums: delete language results that do not reach the threshold + for (const [lang, data] of Object.entries(results.languages.results)) { + if (data.bytes < minBytesSize) { + // Add data to 'Other' result + results.languages.results.other ??= { type: 'data' /*arbitrary*/, bytes: 0 }; + results.languages.results.other.bytes += data.bytes; + // Remove language result + delete results.languages.results[lang]; + } + } + } + // Set counts results.files.count = Object.keys(results.files.results).length; results.languages.count = Object.keys(results.languages.results).length; diff --git a/src/types.ts b/src/types.ts index 6876fd6..b747069 100644 --- a/src/types.ts +++ b/src/types.ts @@ -18,6 +18,7 @@ export interface Options { keepVendored?: boolean keepBinary?: boolean relativePaths?: boolean + minSize?: `${number}${Lowercase<'B' | 'KB' | 'MB' | '%'>}` childLanguages?: boolean quick?: boolean offline?: boolean @@ -39,7 +40,7 @@ export interface Results { languages: { count: Integer bytes: Bytes - results: Record Date: Sat, 8 Feb 2025 17:17:38 +1300 Subject: [PATCH 2/3] Rework minSize option to be CLI-only --- src/cli.ts | 28 +++++++++++++++++++++++++++- src/index.ts | 24 ------------------------ src/types.ts | 3 +-- 3 files changed, 28 insertions(+), 27 deletions(-) diff --git a/src/cli.ts b/src/cli.ts index 5652320..3976cf4 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -60,12 +60,38 @@ if (args.analyze) (async () => { const { files, languages, unknown } = data; // Print output if (!args.json) { - const sortedEntries = Object.entries(languages.results).sort((a, b) => a[1].bytes < b[1].bytes ? +1 : -1); + // Ignore languages with a bytes/% size less than the declared min size + if (args.minSize) { + let otherBytes = 0; + const totalSize = languages.bytes; + const minSizeAmt = parseFloat(args.minSize.replace(/[a-z]+$/i, "")); // '2KB' -> 2 + const minSizeUnit = args.minSize.replace(/^\d+/, "").toLowerCase(); // '2KB' -> 'kb' + const conversionFactors: Record number> = { + b: (n) => n, + kb: (n) => n * 1e3, + mb: (n) => n * 1e6, + "%": (n) => (n * totalSize) / 100, + }; + const minBytesSize = conversionFactors[minSizeUnit](+minSizeAmt); + // Apply specified minimums: delete language results that do not reach the threshold + for (const [lang, data] of Object.entries(languages.results)) { + if (data.bytes < minBytesSize) { + // Add to other bytes count + otherBytes += data.bytes; + // Remove language result + delete languages.results[lang]; + } + } + languages.results['Other'] = { type: null!, bytes: otherBytes, color: undefined }; + } + + const sortedEntries = Object.entries(languages.results).sort((a, b) => (a[1].bytes < b[1].bytes ? +1 : -1)); const totalBytes = languages.bytes; console.log(`\n Analysed ${files.bytes.toLocaleString()} B from ${files.count} files with linguist-js`); console.log(`\n Language analysis results: \n`); let count = 0; if (sortedEntries.length === 0) console.log(` None`); + // Collate files per language const filesPerLanguage: Record = {}; if (args.listFiles) { diff --git a/src/index.ts b/src/index.ts index c83c421..8ff2768 100644 --- a/src/index.ts +++ b/src/index.ts @@ -431,30 +431,6 @@ async function analyse(rawPaths?: string | string[], opts: T.Options = {}): Prom results.languages.bytes += fileSize; } - // Ignore languages with a bytes/% size less than the declared min size - if (opts.minSize) { - const totalSize = results.languages.bytes; - const minSizeAmt = parseFloat(opts.minSize.replace(/[a-z]+$/i, '')); // '2KB' -> 2 - const minSizeUnit = opts.minSize.replace(/^\d+/, '').toLowerCase(); // '2KB' -> 'kb' - const conversionFactors: Record number> = { - 'b': n => n, - 'kb': n => n * 1e3, - 'mb': n => n * 1e6, - '%': n => n * totalSize / 100, - }; - const minBytesSize = conversionFactors[minSizeUnit](+minSizeAmt); - // Apply specified minimums: delete language results that do not reach the threshold - for (const [lang, data] of Object.entries(results.languages.results)) { - if (data.bytes < minBytesSize) { - // Add data to 'Other' result - results.languages.results.other ??= { type: 'data' /*arbitrary*/, bytes: 0 }; - results.languages.results.other.bytes += data.bytes; - // Remove language result - delete results.languages.results[lang]; - } - } - } - // Set counts results.files.count = Object.keys(results.files.results).length; results.languages.count = Object.keys(results.languages.results).length; diff --git a/src/types.ts b/src/types.ts index b747069..6876fd6 100644 --- a/src/types.ts +++ b/src/types.ts @@ -18,7 +18,6 @@ export interface Options { keepVendored?: boolean keepBinary?: boolean relativePaths?: boolean - minSize?: `${number}${Lowercase<'B' | 'KB' | 'MB' | '%'>}` childLanguages?: boolean quick?: boolean offline?: boolean @@ -40,7 +39,7 @@ export interface Results { languages: { count: Integer bytes: Bytes - results: Record Date: Sat, 8 Feb 2025 17:24:21 +1300 Subject: [PATCH 3/3] Update with 'other' LOC too --- src/cli.ts | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/src/cli.ts b/src/cli.ts index bba6841..e496cfd 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -64,27 +64,30 @@ if (args.analyze) (async () => { if (!args.json) { // Ignore languages with a bytes/% size less than the declared min size if (args.minSize) { - let otherBytes = 0; const totalSize = languages.bytes; - const minSizeAmt = parseFloat(args.minSize.replace(/[a-z]+$/i, "")); // '2KB' -> 2 - const minSizeUnit = args.minSize.replace(/^\d+/, "").toLowerCase(); // '2KB' -> 'kb' + const minSizeAmt = parseFloat(args.minSize.replace(/[a-z]+$/i, '')); // '2KB' -> 2 + const minSizeUnit = args.minSize.replace(/^\d+/, '').toLowerCase(); // '2KB' -> 'kb' const conversionFactors: Record number> = { - b: (n) => n, - kb: (n) => n * 1e3, - mb: (n) => n * 1e6, - "%": (n) => (n * totalSize) / 100, + 'b': n => n, + 'kb': n => n * 1e3, + 'mb': n => n * 1e6, + '%': n => n * totalSize / 100, }; const minBytesSize = conversionFactors[minSizeUnit](+minSizeAmt); + const other = { bytes: 0, lines: { total: 0, content: 0, code: 0 } }; // Apply specified minimums: delete language results that do not reach the threshold for (const [lang, data] of Object.entries(languages.results)) { if (data.bytes < minBytesSize) { - // Add to other bytes count - otherBytes += data.bytes; + // Add to 'other' count + other.bytes += data.bytes; + other.lines.total += data.lines.total; + other.lines.content += data.lines.content; + other.lines.code += data.lines.code; // Remove language result delete languages.results[lang]; } } - languages.results['Other'] = { type: null!, bytes: otherBytes, color: undefined }; + languages.results['Other'] = { ...other, type: null! }; } const sortedEntries = Object.entries(languages.results).sort((a, b) => (a[1].bytes < b[1].bytes ? +1 : -1));