From 9ee84955fd2c54c77c59571659abd11dd27cfa91 Mon Sep 17 00:00:00 2001 From: andy Date: Sun, 8 Nov 2020 22:57:01 +0100 Subject: [PATCH] Integrate chroma customized matching into language detection --- ...ats_request_extra_heartbeats_template.json | 6 +- .../testdata/api_heartbeats_response.json | 2 +- go.mod | 2 + go.sum | 15 + pkg/language/chroma.go | 270 ++++++++++++++++++ pkg/language/language.go | 4 + pkg/language/language_test.go | 50 +++- pkg/language/priority.go | 13 + .../objective-c.h => fsharp.fs} | 0 .../empty.m} | 0 .../codefiles/matlab_with_headers/matlab.m | 10 + .../codefiles/matlab_with_headers/random.h | 0 pkg/language/testdata/codefiles/objective-c.m | 18 ++ pkg/language/testdata/codefiles/perl.pl | 1 + .../testdata/codefiles/py_with_c_files/foo.c | 0 .../testdata/codefiles/py_with_c_files/see.h | 0 .../testdata/codefiles/py_with_c_files/see.py | 0 .../testdata/codefiles/with_mat_file/empty.m | 0 .../codefiles/with_mat_file/empty.mat | 0 .../codefiles/with_mat_file/objective-c.h | 0 .../codefiles/with_mat_file/objective-c.m | 0 .../codefiles/with_mat_file/objective-cpp.h | 0 .../codefiles/with_mat_file/objective-cpp.mm | 0 pkg/language/testdata/objective-c.m | 18 ++ 24 files changed, 401 insertions(+), 8 deletions(-) create mode 100644 pkg/language/chroma.go create mode 100644 pkg/language/priority.go rename pkg/language/testdata/codefiles/{h_with_m_file/objective-c.h => fsharp.fs} (100%) rename pkg/language/testdata/codefiles/{h_with_m_file/objective-c.m => matlab_with_headers/empty.m} (100%) create mode 100755 pkg/language/testdata/codefiles/matlab_with_headers/matlab.m create mode 100644 pkg/language/testdata/codefiles/matlab_with_headers/random.h create mode 100644 pkg/language/testdata/codefiles/objective-c.m create mode 100644 pkg/language/testdata/codefiles/perl.pl create mode 100644 pkg/language/testdata/codefiles/py_with_c_files/foo.c create mode 100644 pkg/language/testdata/codefiles/py_with_c_files/see.h create mode 100644 pkg/language/testdata/codefiles/py_with_c_files/see.py create mode 100644 pkg/language/testdata/codefiles/with_mat_file/empty.m create mode 100644 pkg/language/testdata/codefiles/with_mat_file/empty.mat create mode 100644 pkg/language/testdata/codefiles/with_mat_file/objective-c.h create mode 100644 pkg/language/testdata/codefiles/with_mat_file/objective-c.m create mode 100644 pkg/language/testdata/codefiles/with_mat_file/objective-cpp.h create mode 100644 pkg/language/testdata/codefiles/with_mat_file/objective-cpp.mm create mode 100644 pkg/language/testdata/objective-c.m diff --git a/cmd/legacy/heartbeat/testdata/api_heartbeats_request_extra_heartbeats_template.json b/cmd/legacy/heartbeat/testdata/api_heartbeats_request_extra_heartbeats_template.json index eeec3656..ed40e473 100644 --- a/cmd/legacy/heartbeat/testdata/api_heartbeats_request_extra_heartbeats_template.json +++ b/cmd/legacy/heartbeat/testdata/api_heartbeats_request_extra_heartbeats_template.json @@ -6,7 +6,7 @@ "dependencies": null, "entity": "%s", "is_write": true, - "language": null, + "language": "Go", "lineno": 13, "lines": 2, "project": "wakatime-cli", @@ -21,7 +21,7 @@ "dependencies": null, "entity": "%s", "is_write": true, - "language": null, + "language": "Go", "lineno": 42, "lines": 2, "project": "wakatime-cli", @@ -36,7 +36,7 @@ "dependencies": null, "entity": "%s", "is_write": null, - "language": null, + "language": "Go", "lineno": null, "lines": 2, "project": "wakatime-cli", diff --git a/cmd/legacy/heartbeat/testdata/api_heartbeats_response.json b/cmd/legacy/heartbeat/testdata/api_heartbeats_response.json index 8b4f3abb..b214dc5f 100644 --- a/cmd/legacy/heartbeat/testdata/api_heartbeats_response.json +++ b/cmd/legacy/heartbeat/testdata/api_heartbeats_response.json @@ -11,7 +11,7 @@ "entity": "/tmp/main.go", "id": "845a922e-9e65-4775-bd68-bb3196d2e06a", "is_write": true, - "language": "golang", + "language": "Go", "lineno": 42, "lines": 100, "machine_name_id": null, diff --git a/go.mod b/go.mod index 5a8a3f21..d1d32d4b 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,9 @@ go 1.15 require ( github.com/PuerkitoBio/goquery v1.6.0 // indirect + github.com/alecthomas/chroma v0.8.1 github.com/armon/consul-api v0.0.0-20180202201655-eb2c6b5be1b6 // indirect + github.com/danwakefield/fnmatch v0.0.0-20160403171240-cbb64ac3d964 github.com/dlclark/regexp2 v1.4.0 github.com/matishsiao/goInfo v0.0.0-20200404012835-b5f882ee2288 github.com/mattn/go-sqlite3 v1.14.4 diff --git a/go.sum b/go.sum index f2e6e582..5568aa29 100644 --- a/go.sum +++ b/go.sum @@ -18,6 +18,12 @@ github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAE github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc= github.com/PuerkitoBio/goquery v1.6.0 h1:j7taAbelrdcsOlGeMenZxc2AWXD5fieT1/znArdnx94= github.com/PuerkitoBio/goquery v1.6.0/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc= +github.com/alecthomas/assert v0.0.0-20170929043011-405dbfeb8e38/go.mod h1:r7bzyVFMNntcxPZXK3/+KdruV1H5KSlyVY0gc+NgInI= +github.com/alecthomas/chroma v0.8.1 h1:ym20sbvyC6RXz45u4qDglcgr8E313oPROshcuCHqiEE= +github.com/alecthomas/chroma v0.8.1/go.mod h1:sko8vR34/90zvl5QdcUdvzL3J8NKjAUx9va9jPuFNoM= +github.com/alecthomas/colour v0.0.0-20160524082231-60882d9e2721/go.mod h1:QO9JBoKquHd+jz9nshCh40fOfO+JzsoXy8qTHF68zU0= +github.com/alecthomas/kong v0.2.4/go.mod h1:kQOmtJgV+Lb4aj+I2LEn40cbtawdWJ9Y8QLq+lElKxE= +github.com/alecthomas/repr v0.0.0-20180818092828-117648cd9897/go.mod h1:xTS7Pm1pD1mvyM075QCDSRqH6qRLXylzS24ZTpRiSzQ= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo= @@ -40,11 +46,14 @@ github.com/coreos/go-semver v0.3.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3Ee github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4= github.com/coreos/pkg v0.0.0-20180928190104-399ea9e2e55f/go.mod h1:E3G3o1h8I7cfcXa63jLwjI0eiQQMgzzUDFVpN/nH/eA= github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= +github.com/danwakefield/fnmatch v0.0.0-20160403171240-cbb64ac3d964 h1:y5HC9v93H5EPKqaS1UYVg1uYah5Xf51mBfIoWehClUQ= +github.com/danwakefield/fnmatch v0.0.0-20160403171240-cbb64ac3d964/go.mod h1:Xd9hchkHSWYkEqJwUGisez3G1QY8Ryz0sdWrLPMGjLk= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no= +github.com/dlclark/regexp2 v1.2.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc= github.com/dlclark/regexp2 v1.4.0 h1:F1rxgk7p4uKjwIQxBs9oAXe5CqrXlCduYEJvrF4u93E= github.com/dlclark/regexp2 v1.4.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc= github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= @@ -127,7 +136,9 @@ github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czP github.com/matishsiao/goInfo v0.0.0-20200404012835-b5f882ee2288 h1:cdM7et8/VlNnSBpq3KbyQWsYLCY0WsB7tvV8Fr0DUNE= github.com/matishsiao/goInfo v0.0.0-20200404012835-b5f882ee2288/go.mod h1:yLZrFIhv+Z20hxHvcZpEyKVQp9HMsOJkXAxx7yDqtvg= github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaOChaDxuIBZU= +github.com/mattn/go-colorable v0.1.6/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= github.com/mattn/go-isatty v0.0.3/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= +github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= github.com/mattn/go-sqlite3 v1.14.0 h1:mLyGNKR8+Vv9CAU7PphKa2hkEqxxhn8i32J6FPj1/QA= github.com/mattn/go-sqlite3 v1.14.0/go.mod h1:JIl7NbARA7phWnGvh0LKTyg7S9BA+6gx71ShQilpsus= github.com/mattn/go-sqlite3 v1.14.4 h1:4rQjbDxdu9fSgI/r3KN72G3c2goxknAqHHgPWWs8UlI= @@ -153,6 +164,7 @@ github.com/pelletier/go-toml v1.2.0 h1:T5zMGML61Wp+FlcbWjRDT7yAxhJNAiPPLOFECq181 github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/posener/complete v1.1.1/go.mod h1:em0nMJCgc9GFtwrmVmEMR/ZL6WyhyjMBndrE9hABlRI= @@ -170,6 +182,7 @@ github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFR github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= +github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo= github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= github.com/slongfield/pyfmt v0.0.0-20180124071345-020a7cb18bca h1:fO9hIZRL+kteo13eh51GqkUdZf/NpMmZsi8ob6b1eOg= @@ -291,6 +304,8 @@ golang.org/x/sys v0.0.0-20190507160741-ecd444e8653b/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190606165138-5da285871e9c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0 h1:HyfiK1WMnHj5FXFXatD+Qs1A/xC2Run6RzeW1SyHxpc= golang.org/x/sys v0.0.0-20190624142023-c5567b49c5d0/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd h1:xhmwyvizuTgC2qz7ZlMluP20uW+C3Rm0FD/WLDX8884= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200413165638-669c56c373c4 h1:opSr2sbRXk5X5/givKrrKj9HXxFpW2sdCiP8MJSKLQY= diff --git a/pkg/language/chroma.go b/pkg/language/chroma.go new file mode 100644 index 00000000..ac5e2400 --- /dev/null +++ b/pkg/language/chroma.go @@ -0,0 +1,270 @@ +package language + +import ( + "fmt" + "io" + "os" + fp "path/filepath" + "sort" + "strings" + + "github.com/wakatime/wakatime-cli/pkg/heartbeat" + + "github.com/alecthomas/chroma" + "github.com/alecthomas/chroma/lexers" + _ "github.com/alecthomas/chroma/lexers/a" // not used directly + _ "github.com/alecthomas/chroma/lexers/b" // not used directly + _ "github.com/alecthomas/chroma/lexers/c" // not used directly + _ "github.com/alecthomas/chroma/lexers/circular" // not used directly + _ "github.com/alecthomas/chroma/lexers/d" // not used directly + _ "github.com/alecthomas/chroma/lexers/e" // not used directly + _ "github.com/alecthomas/chroma/lexers/f" // not used directly + _ "github.com/alecthomas/chroma/lexers/g" // not used directly + _ "github.com/alecthomas/chroma/lexers/h" // not used directly + _ "github.com/alecthomas/chroma/lexers/i" // not used directly + _ "github.com/alecthomas/chroma/lexers/j" // not used directly + _ "github.com/alecthomas/chroma/lexers/k" // not used directly + _ "github.com/alecthomas/chroma/lexers/l" // not used directly + _ "github.com/alecthomas/chroma/lexers/m" // not used directly + _ "github.com/alecthomas/chroma/lexers/n" // not used directly + _ "github.com/alecthomas/chroma/lexers/o" // not used directly + _ "github.com/alecthomas/chroma/lexers/p" // not used directly + _ "github.com/alecthomas/chroma/lexers/q" // not used directly + _ "github.com/alecthomas/chroma/lexers/r" // not used directly + _ "github.com/alecthomas/chroma/lexers/s" // not used directly + _ "github.com/alecthomas/chroma/lexers/t" // not used directly + _ "github.com/alecthomas/chroma/lexers/v" // not used directly + _ "github.com/alecthomas/chroma/lexers/w" // not used directly + _ "github.com/alecthomas/chroma/lexers/x" // not used directly + _ "github.com/alecthomas/chroma/lexers/y" // not used directly + _ "github.com/alecthomas/chroma/lexers/z" // not used directly + "github.com/danwakefield/fnmatch" + jww "github.com/spf13/jwalterweatherman" +) + +// chromaMatchCustomized returns the best by filename matching lexer. Best lexer is determined +// by customized priority. +// This is a modified implementation of chroma.lexers.internal.api:Match(). +func chromaMatchCustomized(filepath string) (heartbeat.Language, bool) { + _, file := fp.Split(filepath) + filename := fp.Base(file) + matched := chroma.PrioritisedLexers{} + + // First, try primary filename matches. + for _, lexer := range lexers.Registry.Lexers { + config := lexer.Config() + for _, glob := range config.Filenames { + if fnmatch.Match(glob, filename, 0) { + matched = append(matched, lexer) + } + } + } + + if len(matched) > 0 { + bestLexer := selectByCustomizedPriority(filepath, matched) + + language, ok := heartbeat.ParseLanguageFromChroma(bestLexer.Config().Name) + if !ok { + jww.WARN.Printf("failed to parse language from chroma lexer name %q", bestLexer.Config().Name) + return heartbeat.LanguageUnknown, false + } + + return language, true + } + + // Next, try filename aliases. + for _, lexer := range lexers.Registry.Lexers { + config := lexer.Config() + for _, glob := range config.AliasFilenames { + if fnmatch.Match(glob, filename, 0) { + matched = append(matched, lexer) + } + } + } + + if len(matched) > 0 { + bestLexer := selectByCustomizedPriority(filepath, matched) + + language, ok := heartbeat.ParseLanguageFromChroma(bestLexer.Config().Name) + if !ok { + jww.WARN.Printf("failed to parse language from chroma lexer name %q", bestLexer.Config().Name) + return heartbeat.LanguageUnknown, false + } + + return language, true + } + + return heartbeat.LanguageUnknown, false +} + +// weightedLexer is a lexer with priority and weight. +type weightedLexer struct { + chroma.Lexer + Weight float32 + Priority float32 +} + +// selectByCustomizedPriority selects the best matching lexer by customized priority evaluation. +func selectByCustomizedPriority(filepath string, lexers chroma.PrioritisedLexers) chroma.Lexer { + sort.Slice(lexers, func(i, j int) bool { + icfg, jcfg := lexers[i].Config(), lexers[j].Config() + + // 1. by priority + if icfg.Priority != jcfg.Priority { + return icfg.Priority > jcfg.Priority + } + + // 2. by name + return strings.ToLower(icfg.Name) > strings.ToLower(jcfg.Name) + }) + + dir, _ := fp.Split(filepath) + + extensions, err := loadFolderExtensions(dir) + if err != nil { + jww.WARN.Printf("failed to load folder extensions: %s", err) + return lexers[0] + } + + head, err := fileHead(filepath) + if err != nil { + jww.WARN.Printf("failed to load head from file %q: %s", filepath, err) + return lexers[0] + } + + var weighted []weightedLexer + + for _, lexer := range lexers { + var weight float32 + + if analyser, ok := lexer.(chroma.Analyser); ok { + weight = analyser.AnalyseText(string(head)) + } + + cfg := lexer.Config() + + if p, ok := priority(cfg.Name); ok { + weighted = append(weighted, weightedLexer{ + Lexer: lexer, + Priority: p, + Weight: weight, + }) + + continue + } + + if cfg.Name == "Matlab" { + weighted = append(weighted, weightedLexer{ + Lexer: lexer, + Priority: cfg.Priority, + Weight: matlabWeight(weight, extensions), + }) + + continue + } + + if cfg.Name == "Objective-C" { + weighted = append(weighted, weightedLexer{ + Lexer: lexer, + Priority: cfg.Priority, + Weight: objectiveCWeight(weight, extensions), + }) + + continue + } + + weighted = append(weighted, weightedLexer{ + Lexer: lexer, + Priority: cfg.Priority, + Weight: weight, + }) + } + + sort.Slice(weighted, func(i, j int) bool { + // 1. by weight + if weighted[i].Weight != weighted[j].Weight { + return weighted[i].Weight > weighted[j].Weight + } + + // 2. by priority + if weighted[i].Priority != weighted[j].Priority { + return weighted[i].Priority > weighted[j].Priority + } + + // 3. name + return weighted[i].Lexer.Config().Name > weighted[j].Lexer.Config().Name + }) + + return weighted[0].Lexer +} + +// fileHead returns the first 512000 bytes of the file's content. +func fileHead(filepath string) ([]byte, error) { + f, err := os.Open(filepath) + if err != nil { + return nil, fmt.Errorf("failed to open file: %s", err) + } + + defer f.Close() + + data := make([]byte, 512000) + + _, err = f.ReadAt(data, 0) + if err != nil && err != io.EOF { + return nil, fmt.Errorf("failed to read bytes from file: %s", err) + } + + return data, nil +} + +// objectiveCWeight determines the weight of objective-c by the provided same folder file extensions. +func objectiveCWeight(weight float32, extensions []string) float32 { + var matFileExists bool + + for _, e := range extensions { + if e == ".mat" { + matFileExists = true + break + } + } + + if matFileExists { + weight -= 0.01 + } else { + weight += 0.01 + } + + for _, e := range extensions { + if e == ".h" { + weight += 0.01 + break + } + } + + return weight +} + +// matlabWeight determines the weight of matlab by the provided same folder file extensions. +func matlabWeight(weight float32, extensions []string) float32 { + for _, e := range extensions { + if e == ".mat" { + weight += 0.01 + break + } + } + + var headerFileExists bool + + for _, e := range extensions { + if e == ".h" { + headerFileExists = true + break + } + } + + if !headerFileExists { + weight += 0.01 + } + + return weight +} diff --git a/pkg/language/language.go b/pkg/language/language.go index 1ef5910f..52ecba3f 100644 --- a/pkg/language/language.go +++ b/pkg/language/language.go @@ -72,6 +72,10 @@ func Detect(fp string) (heartbeat.Language, error) { return language, nil } + if language, ok := chromaMatchCustomized(fp); ok { + return language, nil + } + return heartbeat.LanguageUnknown, fmt.Errorf("could not detect the language of file %q", fp) } diff --git a/pkg/language/language_test.go b/pkg/language/language_test.go index 13df4669..b9348791 100644 --- a/pkg/language/language_test.go +++ b/pkg/language/language_test.go @@ -89,29 +89,71 @@ func TestDetect_HeaderFile_With_C_And_CXX_Files(t *testing.T) { } func TestDetect_ObjectiveC_Over_Matlab_MatchingHeader(t *testing.T) { - lang, err := language.Detect("testdata/codefiles/h_with_m_file/objective-c.m") + lang, err := language.Detect("testdata/codefiles/with_mat_file/objective-c.m") require.NoError(t, err) assert.Equal(t, heartbeat.LanguageObjectiveC, lang) } func TestDetect_ObjectiveC_M_FileInFolder(t *testing.T) { - lang, err := language.Detect("testdata/codefiles/h_with_m_file/objective-c.h") + lang, err := language.Detect("testdata/codefiles/with_mat_file/objective-c.h") require.NoError(t, err) assert.Equal(t, heartbeat.LanguageObjectiveC, lang) } func TestDetect_ObjectiveCPP_MatchingHeader(t *testing.T) { - lang, err := language.Detect("testdata/codefiles/h_with_mm_file/objective-cpp.mm") + lang, err := language.Detect("testdata/codefiles/with_mat_file/objective-cpp.mm") require.NoError(t, err) assert.Equal(t, heartbeat.LanguageObjectiveCPP, lang) } func TestDetect_ObjectiveCPP_MM_FileInFolder(t *testing.T) { - lang, err := language.Detect("testdata/codefiles/h_with_mm_file/objective-cpp.h") + lang, err := language.Detect("testdata/codefiles/with_mat_file/objective-cpp.h") require.NoError(t, err) assert.Equal(t, heartbeat.LanguageObjectiveCPP, lang) } + +func TestDetect_ObjectiveC(t *testing.T) { + lang, err := language.Detect("testdata/codefiles/objective-c.m") + require.NoError(t, err) + + assert.Equal(t, heartbeat.LanguageObjectiveC, lang) +} + +func TestDetect_Matlab_Over_ObjectiveC_Mat_FileInFolder(t *testing.T) { + lang, err := language.Detect("testdata/codefiles/with_mat_file/empty.m") + require.NoError(t, err) + + assert.Equal(t, heartbeat.LanguageMatlab, lang) +} + +func TestDetect_ObjectiveC_Over_Matlab_NonMatchingHeader(t *testing.T) { + lang, err := language.Detect("testdata/codefiles/matlab_with_headers/empty.m") + require.NoError(t, err) + + assert.Equal(t, heartbeat.LanguageObjectiveC, lang) +} + +func TestDetect_NonHeaderFile_C_FilesInFolder(t *testing.T) { + lang, err := language.Detect("testdata/codefiles/py_with_c_files/see.py") + require.NoError(t, err) + + assert.Equal(t, heartbeat.LanguagePython, lang) +} + +func TestDetect_Perl_Over_Prolog(t *testing.T) { + lang, err := language.Detect("testdata/codefiles/perl.pl") + require.NoError(t, err) + + assert.Equal(t, heartbeat.LanguagePerl, lang) +} + +func TestDetect_FSharp_Over_Forth(t *testing.T) { + lang, err := language.Detect("testdata/codefiles/fsharp.fs") + require.NoError(t, err) + + assert.Equal(t, heartbeat.LanguageFSharp, lang) +} diff --git a/pkg/language/priority.go b/pkg/language/priority.go new file mode 100644 index 00000000..8576f81b --- /dev/null +++ b/pkg/language/priority.go @@ -0,0 +1,13 @@ +package language + +func priority(lang string) (float32, bool) { + prios := map[string]float32{ + "FSharp": 0.01, + "Perl": 0.01, + "TypeScript": 0.01, + } + + p, ok := prios[lang] + + return p, ok +} diff --git a/pkg/language/testdata/codefiles/h_with_m_file/objective-c.h b/pkg/language/testdata/codefiles/fsharp.fs similarity index 100% rename from pkg/language/testdata/codefiles/h_with_m_file/objective-c.h rename to pkg/language/testdata/codefiles/fsharp.fs diff --git a/pkg/language/testdata/codefiles/h_with_m_file/objective-c.m b/pkg/language/testdata/codefiles/matlab_with_headers/empty.m similarity index 100% rename from pkg/language/testdata/codefiles/h_with_m_file/objective-c.m rename to pkg/language/testdata/codefiles/matlab_with_headers/empty.m diff --git a/pkg/language/testdata/codefiles/matlab_with_headers/matlab.m b/pkg/language/testdata/codefiles/matlab_with_headers/matlab.m new file mode 100755 index 00000000..11259e58 --- /dev/null +++ b/pkg/language/testdata/codefiles/matlab_with_headers/matlab.m @@ -0,0 +1,10 @@ +function foo = bar(a, b, c) + +% This is a variable +avariables = 0; + +% ============================================================= +% This is a Matlab comment +% ============================================================= + +end diff --git a/pkg/language/testdata/codefiles/matlab_with_headers/random.h b/pkg/language/testdata/codefiles/matlab_with_headers/random.h new file mode 100644 index 00000000..e69de29b diff --git a/pkg/language/testdata/codefiles/objective-c.m b/pkg/language/testdata/codefiles/objective-c.m new file mode 100644 index 00000000..f7ebdcd8 --- /dev/null +++ b/pkg/language/testdata/codefiles/objective-c.m @@ -0,0 +1,18 @@ +// +// Objective-C.m +// + +#import "SomeViewController.h" +#import +#import + +@interface ViewController : UIViewController + @property (nonnull, strong) URL *url; +@end + +@implementation ViewController { +- (void)viewDidLoad { + [super viewDidLoad]; + // noop +} +@end diff --git a/pkg/language/testdata/codefiles/perl.pl b/pkg/language/testdata/codefiles/perl.pl new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/pkg/language/testdata/codefiles/perl.pl @@ -0,0 +1 @@ + diff --git a/pkg/language/testdata/codefiles/py_with_c_files/foo.c b/pkg/language/testdata/codefiles/py_with_c_files/foo.c new file mode 100644 index 00000000..e69de29b diff --git a/pkg/language/testdata/codefiles/py_with_c_files/see.h b/pkg/language/testdata/codefiles/py_with_c_files/see.h new file mode 100644 index 00000000..e69de29b diff --git a/pkg/language/testdata/codefiles/py_with_c_files/see.py b/pkg/language/testdata/codefiles/py_with_c_files/see.py new file mode 100644 index 00000000..e69de29b diff --git a/pkg/language/testdata/codefiles/with_mat_file/empty.m b/pkg/language/testdata/codefiles/with_mat_file/empty.m new file mode 100644 index 00000000..e69de29b diff --git a/pkg/language/testdata/codefiles/with_mat_file/empty.mat b/pkg/language/testdata/codefiles/with_mat_file/empty.mat new file mode 100644 index 00000000..e69de29b diff --git a/pkg/language/testdata/codefiles/with_mat_file/objective-c.h b/pkg/language/testdata/codefiles/with_mat_file/objective-c.h new file mode 100644 index 00000000..e69de29b diff --git a/pkg/language/testdata/codefiles/with_mat_file/objective-c.m b/pkg/language/testdata/codefiles/with_mat_file/objective-c.m new file mode 100644 index 00000000..e69de29b diff --git a/pkg/language/testdata/codefiles/with_mat_file/objective-cpp.h b/pkg/language/testdata/codefiles/with_mat_file/objective-cpp.h new file mode 100644 index 00000000..e69de29b diff --git a/pkg/language/testdata/codefiles/with_mat_file/objective-cpp.mm b/pkg/language/testdata/codefiles/with_mat_file/objective-cpp.mm new file mode 100644 index 00000000..e69de29b diff --git a/pkg/language/testdata/objective-c.m b/pkg/language/testdata/objective-c.m new file mode 100644 index 00000000..f7ebdcd8 --- /dev/null +++ b/pkg/language/testdata/objective-c.m @@ -0,0 +1,18 @@ +// +// Objective-C.m +// + +#import "SomeViewController.h" +#import +#import + +@interface ViewController : UIViewController + @property (nonnull, strong) URL *url; +@end + +@implementation ViewController { +- (void)viewDidLoad { + [super viewDidLoad]; + // noop +} +@end