From 0bbc24dffd1bad5d464fcb07afbc62d7e48d83af Mon Sep 17 00:00:00 2001 From: Andreas Humenberger Date: Fri, 7 Jun 2024 10:00:48 +0200 Subject: [PATCH] Allow the repository to specify a list of evaluation tasks Part of #165 --- cmd/eval-dev-quality/cmd/evaluate.go | 2 +- evaluate/evaluate.go | 86 +++++------ evaluate/evaluate_test.go | 210 ++++++++++++++++----------- evaluate/report/collection.go | 54 +++++-- evaluate/report/collection_test.go | 169 +++++++++++++-------- evaluate/report/csv.go | 9 +- evaluate/report/csv_test.go | 57 ++++---- evaluate/repository.go | 61 +++++++- evaluate/repository_test.go | 95 +++++++++++- task/task.go | 9 +- 10 files changed, 513 insertions(+), 239 deletions(-) diff --git a/cmd/eval-dev-quality/cmd/evaluate.go b/cmd/eval-dev-quality/cmd/evaluate.go index acdabe707..ed95db5c5 100644 --- a/cmd/eval-dev-quality/cmd/evaluate.go +++ b/cmd/eval-dev-quality/cmd/evaluate.go @@ -329,7 +329,7 @@ func (command *Evaluate) Execute(args []string) (err error) { } // WriteCSVs writes the various CSV reports to disk. -func writeCSVs(resultPath string, assessments report.AssessmentPerModelPerLanguagePerRepository) (err error) { +func writeCSVs(resultPath string, assessments report.AssessmentPerModelPerLanguagePerRepositoryPerTask) (err error) { // Write the "evaluation.csv" containing all data. csv, err := report.GenerateCSV(assessments) if err != nil { diff --git a/evaluate/evaluate.go b/evaluate/evaluate.go index 132cd7c8c..6eb2624e0 100644 --- a/evaluate/evaluate.go +++ b/evaluate/evaluate.go @@ -63,12 +63,12 @@ func (ctx *Context) runsAtModelLevel() uint { const RepositoryPlainName = "plain" // Evaluate runs an evaluation on the given context and returns its results. -func Evaluate(ctx *Context) (assessments report.AssessmentPerModelPerLanguagePerRepository, totalScore uint64) { +func Evaluate(ctx *Context) (assessments report.AssessmentPerModelPerLanguagePerRepositoryPerTask, totalScore uint64) { // Check that models and languages can be evaluated by executing the "plain" repositories. modelSucceededBasicChecksOfLanguage := map[evalmodel.Model]map[evallanguage.Language]bool{} ctx.Log.Printf("Checking that models and languages can be used for evaluation") // Ensure we report metrics for every model even if they are excluded. - assessments = report.NewAssessmentPerModelPerLanguagePerRepository(ctx.Models, ctx.Languages, ctx.RepositoryPaths) + assessments = report.NewAssessmentPerModelPerLanguagePerRepositoryPerTask(ctx.Models, ctx.Languages, ctx.RepositoryPaths) problemsPerModel := map[string][]error{} { @@ -106,29 +106,31 @@ func Evaluate(ctx *Context) (assessments report.AssessmentPerModelPerLanguagePer r.SetQueryAttempts(ctx.QueryAttempts) } - withLoadedModel(ctx.Log, model, ctx.ProviderForModel[model], func() { - for rm := uint(0); rm < ctx.runsAtModelLevel(); rm++ { - if ctx.Runs > 1 && ctx.RunsSequential { - ctx.Log.Printf("Run %d/%d for model %q", rm+1, ctx.Runs, modelID) + for _, taskIdentifier := range temporaryRepository.Tasks { + withLoadedModel(ctx.Log, model, ctx.ProviderForModel[model], func() { + for rm := uint(0); rm < ctx.runsAtModelLevel(); rm++ { + if ctx.Runs > 1 && ctx.RunsSequential { + ctx.Log.Printf("Run %d/%d for model %q", rm+1, ctx.Runs, modelID) + } + + if err := temporaryRepository.Reset(ctx.Log); err != nil { + ctx.Log.Panicf("ERROR: unable to reset temporary repository path: %s", err) + } + + assessment, ps, err := temporaryRepository.Evaluate(ctx.Log, ctx.ResultPath, model, language, taskIdentifier) + assessments.Add(model, language, repositoryPath, taskIdentifier, assessment) + if err != nil { + ps = append(ps, err) + } + if len(ps) > 0 { + ctx.Log.Printf("Model %q was not able to solve the %q repository for language %q: %+v", modelID, repositoryPath, languageID, ps) + problemsPerModel[modelID] = append(problemsPerModel[modelID], ps...) + } else { + modelSucceededBasicChecksOfLanguage[model][language] = true + } } - - if err := temporaryRepository.Reset(ctx.Log); err != nil { - ctx.Log.Panicf("ERROR: unable to reset temporary repository path: %s", err) - } - - assessment, ps, err := temporaryRepository.Evaluate(ctx.Log, ctx.ResultPath, model, language) - assessments[model][language][repositoryPath].Add(assessment) - if err != nil { - ps = append(ps, err) - } - if len(ps) > 0 { - ctx.Log.Printf("Model %q was not able to solve the %q repository for language %q: %+v", modelID, repositoryPath, languageID, ps) - problemsPerModel[modelID] = append(problemsPerModel[modelID], ps...) - } else { - modelSucceededBasicChecksOfLanguage[model][language] = true - } - } - }) + }) + } } } } @@ -196,24 +198,26 @@ func Evaluate(ctx *Context) (assessments report.AssessmentPerModelPerLanguagePer continue } - withLoadedModel(ctx.Log, model, ctx.ProviderForModel[model], func() { - for rm := uint(0); rm < ctx.runsAtModelLevel(); rm++ { - if ctx.Runs > 1 && ctx.RunsSequential { - ctx.Log.Printf("Run %d/%d for model %q", rm+1, ctx.Runs, modelID) - } - - if err := temporaryRepository.Reset(ctx.Log); err != nil { - ctx.Log.Panicf("ERROR: unable to reset temporary repository path: %s", err) - } - - assessment, ps, err := temporaryRepository.Evaluate(ctx.Log, ctx.ResultPath, model, language) - assessments[model][language][repositoryPath].Add(assessment) - problemsPerModel[modelID] = append(problemsPerModel[modelID], ps...) - if err != nil { - ctx.Log.Printf("ERROR: Model %q encountered a hard error for language %q, repository %q: %+v", modelID, languageID, repositoryPath, err) + for _, taskIdentifier := range temporaryRepository.Tasks { + withLoadedModel(ctx.Log, model, ctx.ProviderForModel[model], func() { + for rm := uint(0); rm < ctx.runsAtModelLevel(); rm++ { + if ctx.Runs > 1 && ctx.RunsSequential { + ctx.Log.Printf("Run %d/%d for model %q", rm+1, ctx.Runs, modelID) + } + + if err := temporaryRepository.Reset(ctx.Log); err != nil { + ctx.Log.Panicf("ERROR: unable to reset temporary repository path: %s", err) + } + + assessment, ps, err := temporaryRepository.Evaluate(ctx.Log, ctx.ResultPath, model, language, taskIdentifier) + assessments.Add(model, language, repositoryPath, taskIdentifier, assessment) + problemsPerModel[modelID] = append(problemsPerModel[modelID], ps...) + if err != nil { + ctx.Log.Printf("ERROR: Model %q encountered a hard error for language %q, repository %q: %+v", modelID, languageID, repositoryPath, err) + } } - } - }) + }) + } } } } diff --git a/evaluate/evaluate_test.go b/evaluate/evaluate_test.go index e9542e601..bf3d170a5 100644 --- a/evaluate/evaluate_test.go +++ b/evaluate/evaluate_test.go @@ -80,7 +80,7 @@ func TestEvaluate(t *testing.T) { Context *Context - ExpectedAssessments report.AssessmentPerModelPerLanguagePerRepository + ExpectedAssessments report.AssessmentPerModelPerLanguagePerRepositoryPerTask ExpectedTotalScore uint64 ExpectedOutputValidate func(t *testing.T, output string, resultPath string) ExpectedResultFiles map[string]func(t *testing.T, filePath string, data string) @@ -121,10 +121,12 @@ func TestEvaluate(t *testing.T) { // Normalize assessments. for _, ls := range actualAssessments { for _, rs := range ls { - for _, a := range rs { - if v, ok := a[metrics.AssessmentKeyProcessingTime]; ok { - if assert.Greater(t, v, uint64(0)) { - delete(a, metrics.AssessmentKeyProcessingTime) + for _, ts := range rs { + for _, a := range ts { + if v, ok := a[metrics.AssessmentKeyProcessingTime]; ok { + if assert.Greater(t, v, uint64(0)) { + delete(a, metrics.AssessmentKeyProcessingTime) + } } } } @@ -164,6 +166,7 @@ func TestEvaluate(t *testing.T) { { languageGolang := &golang.Language{} mockedModel := modeltesting.NewMockModelNamed(t, "empty-response-model") + repositoryPath := filepath.Join("golang", "plain") validate(t, &testCase{ Name: "Empty model responses are errors", @@ -183,9 +186,13 @@ func TestEvaluate(t *testing.T) { }, }, - ExpectedAssessments: map[evalmodel.Model]map[language.Language]map[string]metrics.Assessments{ - mockedModel: map[language.Language]map[string]metrics.Assessments{ - languageGolang: map[string]metrics.Assessments{}, + ExpectedAssessments: map[evalmodel.Model]map[language.Language]map[string]map[task.Identifier]metrics.Assessments{ + mockedModel: map[language.Language]map[string]map[task.Identifier]metrics.Assessments{ + languageGolang: map[string]map[task.Identifier]metrics.Assessments{ + repositoryPath: map[task.Identifier]metrics.Assessments{ + task.IdentifierWriteTests: metrics.Assessments{}, + }, + }, }, }, ExpectedTotalScore: 1, @@ -201,6 +208,7 @@ func TestEvaluate(t *testing.T) { mockedModelID := "testing-provider/empty-response-model" mockedQuery := providertesting.NewMockQuery(t) mockedModel := llm.NewModel(mockedQuery, mockedModelID) + repositoryPath := filepath.Join("golang", "plain") validate(t, &testCase{ Name: "Single try fails", @@ -224,9 +232,13 @@ func TestEvaluate(t *testing.T) { QueryAttempts: 1, }, - ExpectedAssessments: map[evalmodel.Model]map[language.Language]map[string]metrics.Assessments{ - mockedModel: map[language.Language]map[string]metrics.Assessments{ - languageGolang: map[string]metrics.Assessments{}, + ExpectedAssessments: map[evalmodel.Model]map[language.Language]map[string]map[task.Identifier]metrics.Assessments{ + mockedModel: map[language.Language]map[string]map[task.Identifier]metrics.Assessments{ + languageGolang: map[string]map[task.Identifier]metrics.Assessments{ + repositoryPath: map[task.Identifier]metrics.Assessments{ + task.IdentifierWriteTests: metrics.Assessments{}, + }, + }, }, }, ExpectedTotalScore: 1, @@ -271,13 +283,15 @@ func TestEvaluate(t *testing.T) { }, }, - ExpectedAssessments: map[evalmodel.Model]map[language.Language]map[string]metrics.Assessments{ - mockedModel: map[language.Language]map[string]metrics.Assessments{ - languageGolang: map[string]metrics.Assessments{ - repositoryPath: map[metrics.AssessmentKey]uint64{ - metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 14, - metrics.AssessmentKeyResponseCharacterCount: 14, - metrics.AssessmentKeyResponseNoError: 1, + ExpectedAssessments: map[evalmodel.Model]map[language.Language]map[string]map[task.Identifier]metrics.Assessments{ + mockedModel: map[language.Language]map[string]map[task.Identifier]metrics.Assessments{ + languageGolang: map[string]map[task.Identifier]metrics.Assessments{ + repositoryPath: map[task.Identifier]metrics.Assessments{ + task.IdentifierWriteTests: map[metrics.AssessmentKey]uint64{ + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 14, + metrics.AssessmentKeyResponseCharacterCount: 14, + metrics.AssessmentKeyResponseNoError: 1, + }, }, }, }, @@ -323,13 +337,15 @@ func TestEvaluate(t *testing.T) { }, }, - ExpectedAssessments: map[evalmodel.Model]map[language.Language]map[string]metrics.Assessments{ - mockedModel: map[language.Language]map[string]metrics.Assessments{ - languageGolang: map[string]metrics.Assessments{ - repositoryPath: map[metrics.AssessmentKey]uint64{ - metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 14, - metrics.AssessmentKeyResponseCharacterCount: 14, - metrics.AssessmentKeyResponseNoError: 1, + ExpectedAssessments: map[evalmodel.Model]map[language.Language]map[string]map[task.Identifier]metrics.Assessments{ + mockedModel: map[language.Language]map[string]map[task.Identifier]metrics.Assessments{ + languageGolang: map[string]map[task.Identifier]metrics.Assessments{ + repositoryPath: map[task.Identifier]metrics.Assessments{ + task.IdentifierWriteTests: map[metrics.AssessmentKey]uint64{ + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 14, + metrics.AssessmentKeyResponseCharacterCount: 14, + metrics.AssessmentKeyResponseNoError: 1, + }, }, }, }, @@ -409,18 +425,22 @@ func TestEvaluate(t *testing.T) { Runs: 2, }, - ExpectedAssessments: map[evalmodel.Model]map[language.Language]map[string]metrics.Assessments{ - mockedModel: map[language.Language]map[string]metrics.Assessments{ - languageGolang: map[string]metrics.Assessments{ - repositoryPlainPath: map[metrics.AssessmentKey]uint64{ - metrics.AssessmentKeyCoverage: 0, - metrics.AssessmentKeyFilesExecuted: 2, - metrics.AssessmentKeyResponseNoError: 2, + ExpectedAssessments: map[evalmodel.Model]map[language.Language]map[string]map[task.Identifier]metrics.Assessments{ + mockedModel: map[language.Language]map[string]map[task.Identifier]metrics.Assessments{ + languageGolang: map[string]map[task.Identifier]metrics.Assessments{ + repositoryPlainPath: map[task.Identifier]metrics.Assessments{ + task.IdentifierWriteTests: map[metrics.AssessmentKey]uint64{ + metrics.AssessmentKeyCoverage: 0, + metrics.AssessmentKeyFilesExecuted: 2, + metrics.AssessmentKeyResponseNoError: 2, + }, }, - repositoryNextPath: map[metrics.AssessmentKey]uint64{ - metrics.AssessmentKeyCoverage: 0, - metrics.AssessmentKeyFilesExecuted: 1, - metrics.AssessmentKeyResponseNoError: 1, + repositoryNextPath: map[task.Identifier]metrics.Assessments{ + task.IdentifierWriteTests: map[metrics.AssessmentKey]uint64{ + metrics.AssessmentKeyCoverage: 0, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyResponseNoError: 1, + }, }, }, }, @@ -474,18 +494,22 @@ func TestEvaluate(t *testing.T) { Runs: 2, }, - ExpectedAssessments: map[evalmodel.Model]map[language.Language]map[string]metrics.Assessments{ - mockedModel: map[language.Language]map[string]metrics.Assessments{ - languageGolang: map[string]metrics.Assessments{ - repositoryPlainPath: map[metrics.AssessmentKey]uint64{ - metrics.AssessmentKeyCoverage: 0, - metrics.AssessmentKeyFilesExecuted: 1, - metrics.AssessmentKeyResponseNoError: 1, + ExpectedAssessments: map[evalmodel.Model]map[language.Language]map[string]map[task.Identifier]metrics.Assessments{ + mockedModel: map[language.Language]map[string]map[task.Identifier]metrics.Assessments{ + languageGolang: map[string]map[task.Identifier]metrics.Assessments{ + repositoryPlainPath: map[task.Identifier]metrics.Assessments{ + task.IdentifierWriteTests: map[metrics.AssessmentKey]uint64{ + metrics.AssessmentKeyCoverage: 0, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyResponseNoError: 1, + }, }, - repositoryNextPath: map[metrics.AssessmentKey]uint64{ - metrics.AssessmentKeyCoverage: 0, - metrics.AssessmentKeyFilesExecuted: 2, - metrics.AssessmentKeyResponseNoError: 2, + repositoryNextPath: map[task.Identifier]metrics.Assessments{ + task.IdentifierWriteTests: map[metrics.AssessmentKey]uint64{ + metrics.AssessmentKeyCoverage: 0, + metrics.AssessmentKeyFilesExecuted: 2, + metrics.AssessmentKeyResponseNoError: 2, + }, }, }, }, @@ -535,11 +559,13 @@ func TestEvaluate(t *testing.T) { Runs: 2, }, - ExpectedAssessments: map[evalmodel.Model]map[language.Language]map[string]metrics.Assessments{ - mockedModel: map[language.Language]map[string]metrics.Assessments{ - languageGolang: map[string]metrics.Assessments{ - repositoryPlainPath: map[metrics.AssessmentKey]uint64{}, - repositoryNextPath: map[metrics.AssessmentKey]uint64{}, + ExpectedAssessments: map[evalmodel.Model]map[language.Language]map[string]map[task.Identifier]metrics.Assessments{ + mockedModel: map[language.Language]map[string]map[task.Identifier]metrics.Assessments{ + languageGolang: map[string]map[task.Identifier]metrics.Assessments{ + repositoryPlainPath: map[task.Identifier]metrics.Assessments{ + task.IdentifierWriteTests: map[metrics.AssessmentKey]uint64{}, + }, + repositoryNextPath: map[task.Identifier]metrics.Assessments{}, }, }, }, @@ -583,13 +609,15 @@ func TestEvaluate(t *testing.T) { RunsSequential: false, }, - ExpectedAssessments: map[evalmodel.Model]map[language.Language]map[string]metrics.Assessments{ - mockedModel: map[language.Language]map[string]metrics.Assessments{ - languageGolang: map[string]metrics.Assessments{ - repositoryPath: map[metrics.AssessmentKey]uint64{ - metrics.AssessmentKeyCoverage: 0, - metrics.AssessmentKeyFilesExecuted: 3, - metrics.AssessmentKeyResponseNoError: 3, + ExpectedAssessments: map[evalmodel.Model]map[language.Language]map[string]map[task.Identifier]metrics.Assessments{ + mockedModel: map[language.Language]map[string]map[task.Identifier]metrics.Assessments{ + languageGolang: map[string]map[task.Identifier]metrics.Assessments{ + repositoryPath: map[task.Identifier]metrics.Assessments{ + task.IdentifierWriteTests: map[metrics.AssessmentKey]uint64{ + metrics.AssessmentKeyCoverage: 0, + metrics.AssessmentKeyFilesExecuted: 3, + metrics.AssessmentKeyResponseNoError: 3, + }, }, }, }, @@ -635,13 +663,15 @@ func TestEvaluate(t *testing.T) { RunsSequential: true, }, - ExpectedAssessments: map[evalmodel.Model]map[language.Language]map[string]metrics.Assessments{ - mockedModel: map[language.Language]map[string]metrics.Assessments{ - languageGolang: map[string]metrics.Assessments{ - repositoryPath: map[metrics.AssessmentKey]uint64{ - metrics.AssessmentKeyCoverage: 0, - metrics.AssessmentKeyFilesExecuted: 3, - metrics.AssessmentKeyResponseNoError: 3, + ExpectedAssessments: map[evalmodel.Model]map[language.Language]map[string]map[task.Identifier]metrics.Assessments{ + mockedModel: map[language.Language]map[string]map[task.Identifier]metrics.Assessments{ + languageGolang: map[string]map[task.Identifier]metrics.Assessments{ + repositoryPath: map[task.Identifier]metrics.Assessments{ + task.IdentifierWriteTests: map[metrics.AssessmentKey]uint64{ + metrics.AssessmentKeyCoverage: 0, + metrics.AssessmentKeyFilesExecuted: 3, + metrics.AssessmentKeyResponseNoError: 3, + }, }, }, }, @@ -717,13 +747,15 @@ func TestEvaluate(t *testing.T) { RunsSequential: true, }, - ExpectedAssessments: map[evalmodel.Model]map[language.Language]map[string]metrics.Assessments{ - mockedModel: map[language.Language]map[string]metrics.Assessments{ - languageGolang: map[string]metrics.Assessments{ - repositoryPath: map[metrics.AssessmentKey]uint64{ - metrics.AssessmentKeyCoverage: 0, - metrics.AssessmentKeyFilesExecuted: 3, - metrics.AssessmentKeyResponseNoError: 3, + ExpectedAssessments: map[evalmodel.Model]map[language.Language]map[string]map[task.Identifier]metrics.Assessments{ + mockedModel: map[language.Language]map[string]map[task.Identifier]metrics.Assessments{ + languageGolang: map[string]map[task.Identifier]metrics.Assessments{ + repositoryPath: map[task.Identifier]metrics.Assessments{ + task.IdentifierWriteTests: map[metrics.AssessmentKey]uint64{ + metrics.AssessmentKeyCoverage: 0, + metrics.AssessmentKeyFilesExecuted: 3, + metrics.AssessmentKeyResponseNoError: 3, + }, }, }, }, @@ -784,13 +816,15 @@ func TestEvaluate(t *testing.T) { Runs: 3, }, - ExpectedAssessments: map[evalmodel.Model]map[language.Language]map[string]metrics.Assessments{ - mockedModel: map[language.Language]map[string]metrics.Assessments{ - languageGolang: map[string]metrics.Assessments{ - repositoryPath: map[metrics.AssessmentKey]uint64{ - metrics.AssessmentKeyCoverage: 0, - metrics.AssessmentKeyFilesExecuted: 3, - metrics.AssessmentKeyResponseNoError: 3, + ExpectedAssessments: map[evalmodel.Model]map[language.Language]map[string]map[task.Identifier]metrics.Assessments{ + mockedModel: map[language.Language]map[string]map[task.Identifier]metrics.Assessments{ + languageGolang: map[string]map[task.Identifier]metrics.Assessments{ + repositoryPath: map[task.Identifier]metrics.Assessments{ + task.IdentifierWriteTests: map[metrics.AssessmentKey]uint64{ + metrics.AssessmentKeyCoverage: 0, + metrics.AssessmentKeyFilesExecuted: 3, + metrics.AssessmentKeyResponseNoError: 3, + }, }, }, }, @@ -832,13 +866,15 @@ func TestEvaluate(t *testing.T) { Runs: 1, }, - ExpectedAssessments: map[evalmodel.Model]map[language.Language]map[string]metrics.Assessments{ - mockedModel: map[language.Language]map[string]metrics.Assessments{ - languageGolang: map[string]metrics.Assessments{ - repositoryPath: map[metrics.AssessmentKey]uint64{ - metrics.AssessmentKeyCoverage: 0, - metrics.AssessmentKeyFilesExecuted: 1, - metrics.AssessmentKeyResponseNoError: 1, + ExpectedAssessments: map[evalmodel.Model]map[language.Language]map[string]map[task.Identifier]metrics.Assessments{ + mockedModel: map[language.Language]map[string]map[task.Identifier]metrics.Assessments{ + languageGolang: map[string]map[task.Identifier]metrics.Assessments{ + repositoryPath: map[task.Identifier]metrics.Assessments{ + task.IdentifierWriteTests: map[metrics.AssessmentKey]uint64{ + metrics.AssessmentKeyCoverage: 0, + metrics.AssessmentKeyFilesExecuted: 1, + metrics.AssessmentKeyResponseNoError: 1, + }, }, }, }, diff --git a/evaluate/report/collection.go b/evaluate/report/collection.go index 648b0969f..b8ce9a967 100644 --- a/evaluate/report/collection.go +++ b/evaluate/report/collection.go @@ -12,6 +12,7 @@ import ( "github.com/symflower/eval-dev-quality/evaluate/metrics" "github.com/symflower/eval-dev-quality/language" "github.com/symflower/eval-dev-quality/model" + "github.com/symflower/eval-dev-quality/task" ) // AssessmentPerLanguagePerModel holds a collection of assessments per language and model. @@ -44,19 +45,19 @@ func (a AssessmentPerModel) WalkByScore(function func(model model.Model, assessm return nil } -// AssessmentPerModelPerLanguagePerRepository holds a collection of assessments per model per language and per repository. -type AssessmentPerModelPerLanguagePerRepository map[model.Model]map[language.Language]map[string]metrics.Assessments +// AssessmentPerModelPerLanguagePerRepositoryPerTask holds a collection of assessments per model per language and per repository. +type AssessmentPerModelPerLanguagePerRepositoryPerTask map[model.Model]map[language.Language]map[string]map[task.Identifier]metrics.Assessments -// NewAssessmentPerModelPerLanguagePerRepository returns a new AssessmentPerModelPerLanguagePerRepository initialized with an empty assessment for each combination. -func NewAssessmentPerModelPerLanguagePerRepository(models []model.Model, languages []language.Language, repositories []string) (assessments AssessmentPerModelPerLanguagePerRepository) { - a := AssessmentPerModelPerLanguagePerRepository{} +// NewAssessmentPerModelPerLanguagePerRepositoryPerTask returns a new AssessmentPerModelPerLanguagePerRepository initialized with an empty assessment for each combination. +func NewAssessmentPerModelPerLanguagePerRepositoryPerTask(models []model.Model, languages []language.Language, repositories []string) (assessments AssessmentPerModelPerLanguagePerRepositoryPerTask) { + a := AssessmentPerModelPerLanguagePerRepositoryPerTask{} for _, m := range models { if _, ok := a[m]; !ok { - a[m] = map[language.Language]map[string]metrics.Assessments{} + a[m] = map[language.Language]map[string]map[task.Identifier]metrics.Assessments{} } for _, l := range languages { if _, ok := a[m][l]; !ok { - a[m][l] = map[string]metrics.Assessments{} + a[m][l] = map[string]map[task.Identifier]metrics.Assessments{} } for _, r := range repositories { // Ensure the repository path matches the language. @@ -64,7 +65,10 @@ func NewAssessmentPerModelPerLanguagePerRepository(models []model.Model, languag continue } - a[m][l][r] = metrics.NewAssessments() + if _, ok := a[m][l][r]; !ok { + a[m][l][r] = map[task.Identifier]metrics.Assessments{} + } + } } } @@ -72,8 +76,25 @@ func NewAssessmentPerModelPerLanguagePerRepository(models []model.Model, languag return a } +// Add adds a new assessment. +func (a AssessmentPerModelPerLanguagePerRepositoryPerTask) Add(model model.Model, language language.Language, repository string, taskIdentifier task.Identifier, assessment metrics.Assessments) { + perRepository, ok := a[model][language][repository] + if !ok { + perRepository = map[task.Identifier]metrics.Assessments{} + a[model][language][repository] = perRepository + } + + assessments, ok := perRepository[taskIdentifier] + if !ok { + assessments = metrics.NewAssessments() + a[model][language][repository][taskIdentifier] = assessments + } + + assessments.Add(assessment) +} + // Walk walks over all entries. -func (a AssessmentPerModelPerLanguagePerRepository) Walk(function func(m model.Model, l language.Language, r string, a metrics.Assessments) error) (err error) { +func (a AssessmentPerModelPerLanguagePerRepositoryPerTask) Walk(function func(m model.Model, l language.Language, r string, t task.Identifier, a metrics.Assessments) error) (err error) { models := maps.Keys(a) slices.SortStableFunc(models, func(a, b model.Model) int { return cmp.Compare(a.ID(), b.ID()) @@ -87,8 +108,11 @@ func (a AssessmentPerModelPerLanguagePerRepository) Walk(function func(m model.M repositories := maps.Keys(a[m][l]) sort.Strings(repositories) for _, r := range repositories { - if err := function(m, l, r, a[m][l][r]); err != nil { - return err + taskIdentifiers := maps.Keys(a[m][l][r]) + for _, t := range taskIdentifiers { + if err := function(m, l, r, t, a[m][l][r][t]); err != nil { + return err + } } } } @@ -98,12 +122,12 @@ func (a AssessmentPerModelPerLanguagePerRepository) Walk(function func(m model.M } // CollapseByModel returns all assessments aggregated per model ID. -func (a AssessmentPerModelPerLanguagePerRepository) CollapseByModel() AssessmentPerModel { +func (a AssessmentPerModelPerLanguagePerRepositoryPerTask) CollapseByModel() AssessmentPerModel { perModel := make(AssessmentPerModel, len(a)) for _, m := range maps.Keys(a) { perModel[m] = metrics.NewAssessments() } - _ = a.Walk(func(m model.Model, l language.Language, r string, a metrics.Assessments) (err error) { + _ = a.Walk(func(m model.Model, l language.Language, r string, t task.Identifier, a metrics.Assessments) (err error) { perModel[m].Add(a) return nil @@ -113,9 +137,9 @@ func (a AssessmentPerModelPerLanguagePerRepository) CollapseByModel() Assessment } // CollapseByLanguage returns all assessments aggregated per language and model. -func (a AssessmentPerModelPerLanguagePerRepository) CollapseByLanguage() AssessmentPerLanguagePerModel { +func (a AssessmentPerModelPerLanguagePerRepositoryPerTask) CollapseByLanguage() AssessmentPerLanguagePerModel { assessments := AssessmentPerLanguagePerModel{} - _ = a.Walk(func(m model.Model, l language.Language, r string, a metrics.Assessments) (err error) { + _ = a.Walk(func(m model.Model, l language.Language, r string, t task.Identifier, a metrics.Assessments) (err error) { if _, ok := assessments[l]; !ok { assessments[l] = map[model.Model]metrics.Assessments{} } diff --git a/evaluate/report/collection_test.go b/evaluate/report/collection_test.go index 50d39facf..5ba199f67 100644 --- a/evaluate/report/collection_test.go +++ b/evaluate/report/collection_test.go @@ -12,13 +12,14 @@ import ( languagetesting "github.com/symflower/eval-dev-quality/language/testing" "github.com/symflower/eval-dev-quality/model" modeltesting "github.com/symflower/eval-dev-quality/model/testing" + "github.com/symflower/eval-dev-quality/task" ) func TestAssessmentPerModelPerLanguagePerRepositoryWalk(t *testing.T) { type testCase struct { Name string - Assessments AssessmentPerModelPerLanguagePerRepository + Assessments AssessmentPerModelPerLanguagePerRepositoryPerTask ExpectedOrder []metrics.Assessments } @@ -26,9 +27,9 @@ func TestAssessmentPerModelPerLanguagePerRepositoryWalk(t *testing.T) { validate := func(t *testing.T, tc *testCase) { t.Run(tc.Name, func(t *testing.T) { actualOrder := []metrics.Assessments{} - assert.NoError(t, tc.Assessments.Walk(func(m model.Model, l language.Language, r string, a metrics.Assessments) (err error) { + assert.NoError(t, tc.Assessments.Walk(func(m model.Model, l language.Language, r string, ti task.Identifier, a metrics.Assessments) (err error) { actualOrder = append(actualOrder, a) - metricstesting.AssertAssessmentsEqual(t, tc.Assessments[m][l][r], a) + metricstesting.AssertAssessmentsEqual(t, tc.Assessments[m][l][r][ti], a) return nil })) @@ -44,11 +45,13 @@ func TestAssessmentPerModelPerLanguagePerRepositoryWalk(t *testing.T) { validate(t, &testCase{ Name: "Single Group", - Assessments: AssessmentPerModelPerLanguagePerRepository{ + Assessments: AssessmentPerModelPerLanguagePerRepositoryPerTask{ modeltesting.NewMockModelNamed(t, "some-model"): { languagetesting.NewMockLanguageNamed(t, "some-language"): { - "some-repository": metrics.Assessments{ - metrics.AssessmentKeyResponseNoExcess: 1, + "some-repository": { + task.IdentifierWriteTests: metrics.Assessments{ + metrics.AssessmentKeyResponseNoExcess: 1, + }, }, }, }, @@ -64,40 +67,56 @@ func TestAssessmentPerModelPerLanguagePerRepositoryWalk(t *testing.T) { validate(t, &testCase{ Name: "Multiple Groups", - Assessments: AssessmentPerModelPerLanguagePerRepository{ + Assessments: AssessmentPerModelPerLanguagePerRepositoryPerTask{ modeltesting.NewMockModelNamed(t, "some-model-a"): { languagetesting.NewMockLanguageNamed(t, "some-language-a"): { - "some-repository-a": metrics.Assessments{ - metrics.AssessmentKeyResponseNoExcess: 1, + "some-repository-a": { + task.IdentifierWriteTests: metrics.Assessments{ + metrics.AssessmentKeyResponseNoExcess: 1, + }, }, - "some-repository-b": metrics.Assessments{ - metrics.AssessmentKeyResponseNoExcess: 2, + "some-repository-b": { + task.IdentifierWriteTests: metrics.Assessments{ + metrics.AssessmentKeyResponseNoExcess: 2, + }, }, }, languagetesting.NewMockLanguageNamed(t, "some-language-b"): { - "some-repository-a": metrics.Assessments{ - metrics.AssessmentKeyResponseNoExcess: 3, + "some-repository-a": { + task.IdentifierWriteTests: metrics.Assessments{ + metrics.AssessmentKeyResponseNoExcess: 3, + }, }, - "some-repository-b": metrics.Assessments{ - metrics.AssessmentKeyResponseNoExcess: 4, + "some-repository-b": { + task.IdentifierWriteTests: metrics.Assessments{ + metrics.AssessmentKeyResponseNoExcess: 4, + }, }, }, }, modeltesting.NewMockModelNamed(t, "some-model-b"): { languagetesting.NewMockLanguageNamed(t, "some-language-a"): { - "some-repository-a": metrics.Assessments{ - metrics.AssessmentKeyResponseNoExcess: 5, + "some-repository-a": { + task.IdentifierWriteTests: metrics.Assessments{ + metrics.AssessmentKeyResponseNoExcess: 5, + }, }, - "some-repository-b": metrics.Assessments{ - metrics.AssessmentKeyResponseNoExcess: 6, + "some-repository-b": { + task.IdentifierWriteTests: metrics.Assessments{ + metrics.AssessmentKeyResponseNoExcess: 6, + }, }, }, languagetesting.NewMockLanguageNamed(t, "some-language-b"): { - "some-repository-a": metrics.Assessments{ - metrics.AssessmentKeyResponseNoExcess: 7, + "some-repository-a": { + task.IdentifierWriteTests: metrics.Assessments{ + metrics.AssessmentKeyResponseNoExcess: 7, + }, }, - "some-repository-b": metrics.Assessments{ - metrics.AssessmentKeyResponseNoExcess: 8, + "some-repository-b": { + task.IdentifierWriteTests: metrics.Assessments{ + metrics.AssessmentKeyResponseNoExcess: 8, + }, }, }, }, @@ -227,7 +246,7 @@ func TestAssessmentCollapseByModel(t *testing.T) { type testCase struct { Name string - Assessments AssessmentPerModelPerLanguagePerRepository + Assessments AssessmentPerModelPerLanguagePerRepositoryPerTask ExpectedAssessmentPerModel AssessmentPerModel } @@ -246,40 +265,56 @@ func TestAssessmentCollapseByModel(t *testing.T) { validate(t, &testCase{ Name: "Collapse", - Assessments: AssessmentPerModelPerLanguagePerRepository{ + Assessments: AssessmentPerModelPerLanguagePerRepositoryPerTask{ modelA: { languagetesting.NewMockLanguageNamed(t, "some-language-a"): { - "some-repository-a": metrics.Assessments{ - metrics.AssessmentKeyResponseNoExcess: 1, + "some-repository-a": { + task.IdentifierWriteTests: metrics.Assessments{ + metrics.AssessmentKeyResponseNoExcess: 1, + }, }, - "some-repository-b": metrics.Assessments{ - metrics.AssessmentKeyResponseNoExcess: 2, + "some-repository-b": { + task.IdentifierWriteTests: metrics.Assessments{ + metrics.AssessmentKeyResponseNoExcess: 2, + }, }, }, languagetesting.NewMockLanguageNamed(t, "some-language-b"): { - "some-repository-a": metrics.Assessments{ - metrics.AssessmentKeyResponseNoExcess: 3, + "some-repository-a": { + task.IdentifierWriteTests: metrics.Assessments{ + metrics.AssessmentKeyResponseNoExcess: 3, + }, }, - "some-repository-b": metrics.Assessments{ - metrics.AssessmentKeyResponseNoExcess: 4, + "some-repository-b": { + task.IdentifierWriteTests: metrics.Assessments{ + metrics.AssessmentKeyResponseNoExcess: 4, + }, }, }, }, modelB: { languagetesting.NewMockLanguageNamed(t, "some-language-a"): { - "some-repository-a": metrics.Assessments{ - metrics.AssessmentKeyResponseNoExcess: 5, + "some-repository-a": { + task.IdentifierWriteTests: metrics.Assessments{ + metrics.AssessmentKeyResponseNoExcess: 5, + }, }, - "some-repository-b": metrics.Assessments{ - metrics.AssessmentKeyResponseNoExcess: 6, + "some-repository-b": { + task.IdentifierWriteTests: metrics.Assessments{ + metrics.AssessmentKeyResponseNoExcess: 6, + }, }, }, languagetesting.NewMockLanguageNamed(t, "some-language-b"): { - "some-repository-a": metrics.Assessments{ - metrics.AssessmentKeyResponseNoExcess: 7, + "some-repository-a": { + task.IdentifierWriteTests: metrics.Assessments{ + metrics.AssessmentKeyResponseNoExcess: 7, + }, }, - "some-repository-b": metrics.Assessments{ - metrics.AssessmentKeyResponseNoExcess: 8, + "some-repository-b": { + task.IdentifierWriteTests: metrics.Assessments{ + metrics.AssessmentKeyResponseNoExcess: 8, + }, }, }, }, @@ -300,7 +335,7 @@ func TestAssessmentCollapseByLanguage(t *testing.T) { type testCase struct { Name string - Assessments AssessmentPerModelPerLanguagePerRepository + Assessments AssessmentPerModelPerLanguagePerRepositoryPerTask ExpectedAssessmentPerLanguagePerModel AssessmentPerLanguagePerModel } @@ -322,40 +357,56 @@ func TestAssessmentCollapseByLanguage(t *testing.T) { validate(t, &testCase{ Name: "Collapse", - Assessments: AssessmentPerModelPerLanguagePerRepository{ + Assessments: AssessmentPerModelPerLanguagePerRepositoryPerTask{ modelA: { languageA: { - "some-repository-a": metrics.Assessments{ - metrics.AssessmentKeyResponseNoExcess: 1, + "some-repository-a": { + task.IdentifierWriteTests: metrics.Assessments{ + metrics.AssessmentKeyResponseNoExcess: 1, + }, }, - "some-repository-b": metrics.Assessments{ - metrics.AssessmentKeyResponseNoExcess: 2, + "some-repository-b": { + task.IdentifierWriteTests: metrics.Assessments{ + metrics.AssessmentKeyResponseNoExcess: 2, + }, }, }, languageB: { - "some-repository-a": metrics.Assessments{ - metrics.AssessmentKeyResponseNoExcess: 3, + "some-repository-a": { + task.IdentifierWriteTests: metrics.Assessments{ + metrics.AssessmentKeyResponseNoExcess: 3, + }, }, - "some-repository-b": metrics.Assessments{ - metrics.AssessmentKeyResponseNoExcess: 4, + "some-repository-b": { + task.IdentifierWriteTests: metrics.Assessments{ + metrics.AssessmentKeyResponseNoExcess: 4, + }, }, }, }, modelB: { languageA: { - "some-repository-a": metrics.Assessments{ - metrics.AssessmentKeyResponseNoExcess: 5, + "some-repository-a": { + task.IdentifierWriteTests: metrics.Assessments{ + metrics.AssessmentKeyResponseNoExcess: 5, + }, }, - "some-repository-b": metrics.Assessments{ - metrics.AssessmentKeyResponseNoExcess: 6, + "some-repository-b": { + task.IdentifierWriteTests: metrics.Assessments{ + metrics.AssessmentKeyResponseNoExcess: 6, + }, }, }, languageB: { - "some-repository-a": metrics.Assessments{ - metrics.AssessmentKeyResponseNoExcess: 7, + "some-repository-a": { + task.IdentifierWriteTests: metrics.Assessments{ + metrics.AssessmentKeyResponseNoExcess: 7, + }, }, - "some-repository-b": metrics.Assessments{ - metrics.AssessmentKeyResponseNoExcess: 8, + "some-repository-b": { + task.IdentifierWriteTests: metrics.Assessments{ + metrics.AssessmentKeyResponseNoExcess: 8, + }, }, }, }, diff --git a/evaluate/report/csv.go b/evaluate/report/csv.go index 2d01ee4a9..3ad3ae91a 100644 --- a/evaluate/report/csv.go +++ b/evaluate/report/csv.go @@ -13,6 +13,7 @@ import ( "github.com/symflower/eval-dev-quality/evaluate/metrics" "github.com/symflower/eval-dev-quality/language" "github.com/symflower/eval-dev-quality/model" + "github.com/symflower/eval-dev-quality/task" ) // CSVFormatter defines a formatter for CSV data. @@ -44,17 +45,17 @@ func GenerateCSV(formatter CSVFormatter) (csvData string, err error) { } // Header returns the header description as a CSV row. -func (a AssessmentPerModelPerLanguagePerRepository) Header() (header []string) { +func (a AssessmentPerModelPerLanguagePerRepositoryPerTask) Header() (header []string) { return append([]string{"model", "language", "repository", "score"}, metrics.AllAssessmentKeysStrings...) } // Rows returns all data as CSV rows. -func (a AssessmentPerModelPerLanguagePerRepository) Rows() (rows [][]string) { - _ = a.Walk(func(m model.Model, l language.Language, r string, a metrics.Assessments) (err error) { +func (a AssessmentPerModelPerLanguagePerRepositoryPerTask) Rows() (rows [][]string) { + _ = a.Walk(func(m model.Model, l language.Language, r string, t task.Identifier, a metrics.Assessments) (err error) { metrics := a.StringCSV() score := a.Score() - row := append([]string{m.ID(), l.ID(), r, strconv.FormatUint(uint64(score), 10)}, metrics...) + row := append([]string{m.ID(), l.ID(), r, string(t), strconv.FormatUint(uint64(score), 10)}, metrics...) rows = append(rows, row) return nil diff --git a/evaluate/report/csv_test.go b/evaluate/report/csv_test.go index f8175f4aa..906b0618b 100644 --- a/evaluate/report/csv_test.go +++ b/evaluate/report/csv_test.go @@ -9,13 +9,14 @@ import ( "github.com/symflower/eval-dev-quality/evaluate/metrics" languagetesting "github.com/symflower/eval-dev-quality/language/testing" modeltesting "github.com/symflower/eval-dev-quality/model/testing" + "github.com/symflower/eval-dev-quality/task" ) func TestGenerateCSVForAssessmentPerModelPerLanguagePerRepository(t *testing.T) { type testCase struct { Name string - Assessments AssessmentPerModelPerLanguagePerRepository + Assessments AssessmentPerModelPerLanguagePerRepositoryPerTask ExpectedString string } @@ -32,48 +33,54 @@ func TestGenerateCSVForAssessmentPerModelPerLanguagePerRepository(t *testing.T) validate(t, &testCase{ Name: "Single Empty Model", - Assessments: AssessmentPerModelPerLanguagePerRepository{ + Assessments: AssessmentPerModelPerLanguagePerRepositoryPerTask{ modeltesting.NewMockModelNamed(t, "some-model"): { languagetesting.NewMockLanguageNamed(t, "some-language"): { - "some-repository": metrics.NewAssessments(), + "some-repository": { + task.IdentifierWriteTests: metrics.NewAssessments(), + }, }, }, }, ExpectedString: ` model,language,repository,score,coverage,files-executed,generate-tests-for-file-character-count,processing-time,response-character-count,response-no-error,response-no-excess,response-with-code - some-model,some-language,some-repository,0,0,0,0,0,0,0,0,0 + some-model,some-language,some-repository,write-tests,0,0,0,0,0,0,0,0,0 `, }) validate(t, &testCase{ Name: "Multiple Models", - Assessments: AssessmentPerModelPerLanguagePerRepository{ + Assessments: AssessmentPerModelPerLanguagePerRepositoryPerTask{ modeltesting.NewMockModelNamed(t, "some-model-a"): { languagetesting.NewMockLanguageNamed(t, "some-language"): { - "some-repository": metrics.Assessments{ - metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 50, - metrics.AssessmentKeyResponseCharacterCount: 100, - metrics.AssessmentKeyCoverage: 1, - metrics.AssessmentKeyFilesExecuted: 2, - metrics.AssessmentKeyResponseNoError: 3, - metrics.AssessmentKeyResponseNoExcess: 4, - metrics.AssessmentKeyResponseWithCode: 5, - metrics.AssessmentKeyProcessingTime: 200, + "some-repository": { + task.IdentifierWriteTests: metrics.Assessments{ + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 50, + metrics.AssessmentKeyResponseCharacterCount: 100, + metrics.AssessmentKeyCoverage: 1, + metrics.AssessmentKeyFilesExecuted: 2, + metrics.AssessmentKeyResponseNoError: 3, + metrics.AssessmentKeyResponseNoExcess: 4, + metrics.AssessmentKeyResponseWithCode: 5, + metrics.AssessmentKeyProcessingTime: 200, + }, }, }, }, modeltesting.NewMockModelNamed(t, "some-model-b"): { languagetesting.NewMockLanguageNamed(t, "some-language"): { - "some-repository": metrics.Assessments{ - metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 100, - metrics.AssessmentKeyResponseCharacterCount: 200, - metrics.AssessmentKeyCoverage: 1, - metrics.AssessmentKeyFilesExecuted: 2, - metrics.AssessmentKeyResponseNoError: 3, - metrics.AssessmentKeyResponseNoExcess: 4, - metrics.AssessmentKeyResponseWithCode: 5, - metrics.AssessmentKeyProcessingTime: 300, + "some-repository": { + task.IdentifierWriteTests: metrics.Assessments{ + metrics.AssessmentKeyGenerateTestsForFileCharacterCount: 100, + metrics.AssessmentKeyResponseCharacterCount: 200, + metrics.AssessmentKeyCoverage: 1, + metrics.AssessmentKeyFilesExecuted: 2, + metrics.AssessmentKeyResponseNoError: 3, + metrics.AssessmentKeyResponseNoExcess: 4, + metrics.AssessmentKeyResponseWithCode: 5, + metrics.AssessmentKeyProcessingTime: 300, + }, }, }, }, @@ -81,8 +88,8 @@ func TestGenerateCSVForAssessmentPerModelPerLanguagePerRepository(t *testing.T) ExpectedString: ` model,language,repository,score,coverage,files-executed,generate-tests-for-file-character-count,processing-time,response-character-count,response-no-error,response-no-excess,response-with-code - some-model-a,some-language,some-repository,15,1,2,50,200,100,3,4,5 - some-model-b,some-language,some-repository,15,1,2,100,300,200,3,4,5 + some-model-a,some-language,some-repository,write-tests,15,1,2,50,200,100,3,4,5 + some-model-b,some-language,some-repository,write-tests,15,1,2,100,300,200,3,4,5 `, }) } diff --git a/evaluate/repository.go b/evaluate/repository.go index 8d6ea651f..2332820a1 100644 --- a/evaluate/repository.go +++ b/evaluate/repository.go @@ -2,6 +2,7 @@ package evaluate import ( "context" + "encoding/json" "errors" "fmt" "os" @@ -19,16 +20,63 @@ import ( "github.com/symflower/eval-dev-quality/util" ) +// repositoryConfiguration holds the configuration of a repository. +type repositoryConfiguration struct { + Tasks []task.Identifier +} + +// validate validates the configuration. +func (rc *repositoryConfiguration) validate() (err error) { + if len(rc.Tasks) == 0 { + return pkgerrors.Errorf("empty list of tasks in configuration") + } + + for _, taskIdentifier := range rc.Tasks { + if !task.LookupIdentifier[taskIdentifier] { + return pkgerrors.Errorf("task identifier %q unknown", taskIdentifier) + } + } + + return nil +} + +// defaultConfiguration holds the default configuration object if there exists no configuration file. +var defaultConfiguration = repositoryConfiguration{ + Tasks: task.AllIdentifiers, +} + // Repository holds data about a repository. type Repository struct { + repositoryConfiguration + // Name holds the name of the repository. Name string // DataPath holds the path to the repository. DataPath string } +// loadConfiguration loads the configuration from the dedicated configuration file. +func (r *Repository) loadConfiguration() (err error) { + configurationFilePath := filepath.Join(r.DataPath, "repository.json") + + data, err := os.ReadFile(configurationFilePath) + if errors.Is(err, os.ErrNotExist) { + r.repositoryConfiguration = defaultConfiguration + + return nil + } else if err != nil { + return pkgerrors.Wrap(err, configurationFilePath) + } + + if err := json.Unmarshal(data, &r.repositoryConfiguration); err != nil { + return pkgerrors.Wrap(err, configurationFilePath) + } + + return r.repositoryConfiguration.validate() +} + // Evaluate evaluates a repository with the given model and language. -func (r *Repository) Evaluate(logger *log.Logger, resultPath string, model evalmodel.Model, language language.Language) (repositoryAssessment metrics.Assessments, problems []error, err error) { +func (r *Repository) Evaluate(logger *log.Logger, resultPath string, model evalmodel.Model, language language.Language, taskIdentifier task.Identifier) (repositoryAssessment metrics.Assessments, problems []error, err error) { log, logClose, err := log.WithFile(logger, filepath.Join(resultPath, evalmodel.CleanModelNameForFileSystem(model.ID()), language.ID(), r.Name+".log")) if err != nil { return nil, nil, err @@ -59,7 +107,7 @@ func (r *Repository) Evaluate(logger *log.Logger, resultPath string, model evalm Logger: log, } - assessments, err := model.RunTask(ctx, task.IdentifierWriteTests) + assessments, err := model.RunTask(ctx, taskIdentifier) if err != nil { problems = append(problems, pkgerrors.WithMessage(err, filePath)) @@ -195,8 +243,13 @@ func TemporaryRepository(logger *log.Logger, testDataPath string, repositoryPath return nil, cleanup, pkgerrors.WithStack(pkgerrors.Wrap(err, fmt.Sprintf("%s - %s", "unable to commit", out))) } - return &Repository{ + repository = &Repository{ Name: repositoryPathRelative, DataPath: temporaryRepositoryPath, - }, cleanup, nil + } + if err := repository.loadConfiguration(); err != nil { + return nil, cleanup, err + } + + return repository, cleanup, nil } diff --git a/evaluate/repository_test.go b/evaluate/repository_test.go index d22ff142f..f8817d3e2 100644 --- a/evaluate/repository_test.go +++ b/evaluate/repository_test.go @@ -10,6 +10,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/zimmski/osutil" + "github.com/zimmski/osutil/bytesutil" "github.com/symflower/eval-dev-quality/evaluate/metrics" metricstesting "github.com/symflower/eval-dev-quality/evaluate/metrics/testing" @@ -19,6 +20,7 @@ import ( "github.com/symflower/eval-dev-quality/model" "github.com/symflower/eval-dev-quality/model/symflower" modeltesting "github.com/symflower/eval-dev-quality/model/testing" + "github.com/symflower/eval-dev-quality/task" "github.com/symflower/eval-dev-quality/util" ) @@ -46,7 +48,7 @@ func TestRepository(t *testing.T) { assert.NoError(t, err) defer cleanup() - actualRepositoryAssessment, actualProblems, actualErr := temporaryRepository.Evaluate(logger, temporaryPath, tc.Model, tc.Language) + actualRepositoryAssessment, actualProblems, actualErr := temporaryRepository.Evaluate(logger, temporaryPath, tc.Model, tc.Language, task.IdentifierWriteTests) metricstesting.AssertAssessmentsEqual(t, tc.ExpectedRepositoryAssessment, actualRepositoryAssessment) if assert.Equal(t, len(tc.ExpectedProblemContains), len(actualProblems), "problems count") { @@ -254,3 +256,94 @@ func TestResetTemporaryRepository(t *testing.T) { }, }) } + +func TestRepositoryLoadConfiguration(t *testing.T) { + type testCase struct { + Name string + + TestDataPath string + RepositoryPath string + + ExpectedErrorText string + MutationBefore func(t *testing.T, path string) + ValidateAfter func(t *testing.T, repository *Repository) + } + + validate := func(t *testing.T, tc *testCase) { + t.Run(tc.Name, func(t *testing.T) { + temporaryPath := t.TempDir() + temporaryRepositoryPath := filepath.Join(temporaryPath, tc.RepositoryPath) + require.NoError(t, osutil.CopyTree(filepath.Join(tc.TestDataPath, tc.RepositoryPath), temporaryRepositoryPath)) + + if tc.MutationBefore != nil { + tc.MutationBefore(t, temporaryRepositoryPath) + } + + _, logger := log.Buffer() + actualRepository, cleanup, actualErr := TemporaryRepository(logger, temporaryPath, tc.RepositoryPath) + defer cleanup() + if tc.ExpectedErrorText != "" { + assert.ErrorContains(t, actualErr, tc.ExpectedErrorText) + } else { + assert.NoError(t, actualErr) + } + + if tc.ValidateAfter != nil { + tc.ValidateAfter(t, actualRepository) + } + }) + } + + validate(t, &testCase{ + Name: "No configuration file", + + TestDataPath: filepath.Join("..", "testdata"), + RepositoryPath: filepath.Join("golang", "plain"), + + ValidateAfter: func(t *testing.T, repository *Repository) { + assert.Equal(t, task.AllIdentifiers, repository.Tasks) + }, + }) + validate(t, &testCase{ + Name: "Specify known task", + + TestDataPath: filepath.Join("..", "testdata"), + RepositoryPath: filepath.Join("golang", "plain"), + + MutationBefore: func(t *testing.T, repositoryPath string) { + configuration := bytesutil.StringTrimIndentations(` + { + "tasks": [ + "write-tests" + ] + } + `) + assert.NoError(t, os.WriteFile(filepath.Join(repositoryPath, "repository.json"), []byte(configuration), 0600)) + }, + ValidateAfter: func(t *testing.T, repository *Repository) { + expectedTaskIdentifiers := []task.Identifier{ + task.IdentifierWriteTests, + } + assert.Equal(t, expectedTaskIdentifiers, repository.Tasks) + }, + }) + validate(t, &testCase{ + Name: "Specify unknown task", + + TestDataPath: filepath.Join("..", "testdata"), + RepositoryPath: filepath.Join("golang", "plain"), + + ExpectedErrorText: "task identifier \"unknown-task\" unknown", + MutationBefore: func(t *testing.T, repositoryPath string) { + configuration := bytesutil.StringTrimIndentations(` + { + "tasks": [ + "write-tests", + "unknown-task" + ] + } + `) + assert.NoError(t, os.WriteFile(filepath.Join(repositoryPath, "repository.json"), []byte(configuration), 0600)) + }, + }) +} diff --git a/task/task.go b/task/task.go index f42685e58..5b6f09033 100644 --- a/task/task.go +++ b/task/task.go @@ -15,13 +15,18 @@ var ( // Identifier holds the identifier of a task. type Identifier string -// AllIdentifiers holds all available task identifiers. -var AllIdentifiers []Identifier +var ( + // AllIdentifiers holds all available task identifiers. + AllIdentifiers []Identifier + // LookupIdentifier holds a map of all available task identifiers. + LookupIdentifier = map[Identifier]bool{} +) // registerIdentifier registers the given identifier and makes it available. func registerIdentifier(name string) (identifier Identifier) { identifier = Identifier(name) AllIdentifiers = append(AllIdentifiers, identifier) + LookupIdentifier[identifier] = true return identifier }