Skip to content

Commit

Permalink
Add support for linguist-detectable and linguist-documentation (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
KN4CK3R authored Feb 23, 2024
1 parent 7d0903b commit 2a278b9
Show file tree
Hide file tree
Showing 4 changed files with 363 additions and 69 deletions.
23 changes: 22 additions & 1 deletion modules/git/repo_attribute.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"os"

"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/optional"
)

// CheckAttributeOpts represents the possible options to CheckAttribute
Expand Down Expand Up @@ -291,7 +292,7 @@ func (repo *Repository) CheckAttributeReader(commitID string) (*CheckAttributeRe
}

checker := &CheckAttributeReader{
Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language", "gitlab-language"},
Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language", "gitlab-language", "linguist-documentation", "linguist-detectable"},
Repo: repo,
IndexFile: indexFilename,
WorkTree: worktree,
Expand All @@ -316,3 +317,23 @@ func (repo *Repository) CheckAttributeReader(commitID string) (*CheckAttributeRe

return checker, deferable
}

// true if "set"/"true", false if "unset"/"false", none otherwise
func attributeToBool(attr map[string]string, name string) optional.Option[bool] {
if value, has := attr[name]; has && value != "unspecified" {
switch value {
case "set", "true":
return optional.Some(true)
case "unset", "false":
return optional.Some(false)
}
}
return optional.None[bool]()
}

func attributeToString(attr map[string]string, name string) optional.Option[string] {
if value, has := attr[name]; has && value != "unspecified" {
return optional.Some(value)
}
return optional.None[string]()
}
75 changes: 41 additions & 34 deletions modules/git/repo_language_stats_gogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"strings"

"code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/optional"

"github.com/go-enry/go-enry/v2"
"github.com/go-git/go-git/v5"
Expand Down Expand Up @@ -57,25 +58,47 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
return nil
}

notVendored := false
notGenerated := false
isVendored := optional.None[bool]()
isGenerated := optional.None[bool]()
isDocumentation := optional.None[bool]()
isDetectable := optional.None[bool]()

if checker != nil {
attrs, err := checker.CheckPath(f.Name)
if err == nil {
if vendored, has := attrs["linguist-vendored"]; has {
if vendored == "set" || vendored == "true" {
return nil
}
notVendored = vendored == "false"
isVendored = attributeToBool(attrs, "linguist-vendored")
if isVendored.ValueOrDefault(false) {
return nil
}

isGenerated = attributeToBool(attrs, "linguist-generated")
if isGenerated.ValueOrDefault(false) {
return nil
}
if generated, has := attrs["linguist-generated"]; has {
if generated == "set" || generated == "true" {
return nil

isDocumentation = attributeToBool(attrs, "linguist-documentation")
if isDocumentation.ValueOrDefault(false) {
return nil
}

isDetectable = attributeToBool(attrs, "linguist-detectable")
if !isDetectable.ValueOrDefault(true) {
return nil
}

hasLanguage := attributeToString(attrs, "linguist-language")
if hasLanguage.Value() == "" {
hasLanguage = attributeToString(attrs, "gitlab-language")
if hasLanguage.Has() {
language := hasLanguage.Value()
if idx := strings.IndexByte(language, '?'); idx >= 0 {
hasLanguage = optional.Some(language[:idx])
}
}
notGenerated = generated == "false"
}
if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" {
if hasLanguage.Value() != "" {
language := hasLanguage.Value()

// group languages, such as Pug -> HTML; SCSS -> CSS
group := enry.GetLanguageGroup(language)
if len(group) != 0 {
Expand All @@ -85,28 +108,14 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
// this language will always be added to the size
sizes[language] += f.Size
return nil
} else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" {
// strip off a ? if present
if idx := strings.IndexByte(language, '?'); idx >= 0 {
language = language[:idx]
}
if len(language) != 0 {
// group languages, such as Pug -> HTML; SCSS -> CSS
group := enry.GetLanguageGroup(language)
if len(group) != 0 {
language = group
}

// this language will always be added to the size
sizes[language] += f.Size
return nil
}
}
}
}

if (!notVendored && analyze.IsVendor(f.Name)) || enry.IsDotFile(f.Name) ||
enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
if (!isVendored.Has() && analyze.IsVendor(f.Name)) ||
enry.IsDotFile(f.Name) ||
(!isDocumentation.Has() && enry.IsDocumentation(f.Name)) ||
enry.IsConfiguration(f.Name) {
return nil
}

Expand All @@ -115,12 +124,10 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
if f.Size <= bigFileSize {
content, _ = readFile(f, fileSizeLimit)
}
if !notGenerated && enry.IsGenerated(f.Name, content) {
if !isGenerated.Has() && enry.IsGenerated(f.Name, content) {
return nil
}

// TODO: Use .gitattributes file for linguist overrides

language := analyze.GetCodeLanguage(f.Name, content)
if language == enry.OtherLanguage || language == "" {
return nil
Expand All @@ -138,7 +145,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
included = langtype == enry.Programming || langtype == enry.Markup
includedLanguage[language] = included
}
if included {
if included || isDetectable.ValueOrDefault(false) {
sizes[language] += f.Size
} else if len(sizes) == 0 && (firstExcludedLanguage == "" || firstExcludedLanguage == language) {
firstExcludedLanguage = language
Expand Down
75 changes: 41 additions & 34 deletions modules/git/repo_language_stats_nogogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (

"code.gitea.io/gitea/modules/analyze"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/optional"

"github.com/go-enry/go-enry/v2"
)
Expand Down Expand Up @@ -88,25 +89,47 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
continue
}

notVendored := false
notGenerated := false
isVendored := optional.None[bool]()
isGenerated := optional.None[bool]()
isDocumentation := optional.None[bool]()
isDetectable := optional.None[bool]()

if checker != nil {
attrs, err := checker.CheckPath(f.Name())
if err == nil {
if vendored, has := attrs["linguist-vendored"]; has {
if vendored == "set" || vendored == "true" {
continue
}
notVendored = vendored == "false"
isVendored = attributeToBool(attrs, "linguist-vendored")
if isVendored.ValueOrDefault(false) {
continue
}

isGenerated = attributeToBool(attrs, "linguist-generated")
if isGenerated.ValueOrDefault(false) {
continue
}
if generated, has := attrs["linguist-generated"]; has {
if generated == "set" || generated == "true" {
continue

isDocumentation = attributeToBool(attrs, "linguist-documentation")
if isDocumentation.ValueOrDefault(false) {
continue
}

isDetectable = attributeToBool(attrs, "linguist-detectable")
if !isDetectable.ValueOrDefault(true) {
continue
}

hasLanguage := attributeToString(attrs, "linguist-language")
if hasLanguage.Value() == "" {
hasLanguage = attributeToString(attrs, "gitlab-language")
if hasLanguage.Has() {
language := hasLanguage.Value()
if idx := strings.IndexByte(language, '?'); idx >= 0 {
hasLanguage = optional.Some(language[:idx])
}
}
notGenerated = generated == "false"
}
if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" {
if hasLanguage.Value() != "" {
language := hasLanguage.Value()

// group languages, such as Pug -> HTML; SCSS -> CSS
group := enry.GetLanguageGroup(language)
if len(group) != 0 {
Expand All @@ -116,29 +139,14 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
// this language will always be added to the size
sizes[language] += f.Size()
continue
} else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" {
// strip off a ? if present
if idx := strings.IndexByte(language, '?'); idx >= 0 {
language = language[:idx]
}
if len(language) != 0 {
// group languages, such as Pug -> HTML; SCSS -> CSS
group := enry.GetLanguageGroup(language)
if len(group) != 0 {
language = group
}

// this language will always be added to the size
sizes[language] += f.Size()
continue
}
}

}
}

if (!notVendored && analyze.IsVendor(f.Name())) || enry.IsDotFile(f.Name()) ||
enry.IsDocumentation(f.Name()) || enry.IsConfiguration(f.Name()) {
if (!isVendored.Has() && analyze.IsVendor(f.Name())) ||
enry.IsDotFile(f.Name()) ||
(!isDocumentation.Has() && enry.IsDocumentation(f.Name())) ||
enry.IsConfiguration(f.Name()) {
continue
}

Expand Down Expand Up @@ -170,7 +178,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
return nil, err
}
}
if !notGenerated && enry.IsGenerated(f.Name(), content) {
if !isGenerated.Has() && enry.IsGenerated(f.Name(), content) {
continue
}

Expand All @@ -193,13 +201,12 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
included = langType == enry.Programming || langType == enry.Markup
includedLanguage[language] = included
}
if included {
if included || isDetectable.ValueOrDefault(false) {
sizes[language] += f.Size()
} else if len(sizes) == 0 && (firstExcludedLanguage == "" || firstExcludedLanguage == language) {
firstExcludedLanguage = language
firstExcludedLanguageSize += f.Size()
}
continue
}

// If there are no included languages add the first excluded language
Expand Down
Loading

0 comments on commit 2a278b9

Please sign in to comment.