Skip to content

Commit

Permalink
GH-355: Handle duplicates in --people-dict and extend docs
Browse files Browse the repository at this point in the history
Signed-off-by: Oliver Bristow <oliver.bristow@project-tracr.com>
  • Loading branch information
Code0x58 committed Apr 5, 2020
1 parent 13a2081 commit 3aa4096
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 8 deletions.
9 changes: 8 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,14 @@ and add the unknown email to the list of that developer's emails.

If `--people-dict` is specified, it should point to a text file with the custom identities. The
format is: every line is a single developer, it contains all the matching emails and names separated
by `|`. The case is ignored.
by `|`. The case is ignored. Example file contents:
```
Linus Torvalds|torvalds@linux-foundation.org
Vadim Markovtsev|vadim@sourced.tech|another@one.com
```

If `--people-dict` is not specified a [`.mailmap`](https://git-scm.com/docs/git-check-mailmap) file
will be used if it exists in the latest commit.

#### Overwrites matrix

Expand Down
13 changes: 10 additions & 3 deletions internal/plumbing/identity/identity.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,11 +186,18 @@ func (detector *Detector) LoadPeopleDict(path string) error {
size := 0
for scanner.Scan() {
ids := strings.Split(scanner.Text(), "|")
canon := ids[0]
var exists bool
var canonIndex int
// lookup or create a new canonical value
if canonIndex, exists = dict[strings.ToLower(canon)]; !exists {
reverseDict = append(reverseDict, canon)
size++
canonIndex = size
}
for _, id := range ids {
dict[strings.ToLower(id)] = size
dict[strings.ToLower(id)] = canonIndex
}
reverseDict = append(reverseDict, ids[0])
size++
}
reverseDict = append(reverseDict, AuthorMissingName)
detector.PeopleDict = dict
Expand Down
19 changes: 16 additions & 3 deletions internal/plumbing/identity/identity_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,19 +177,32 @@ func TestIdentityDetectorLoadPeopleDict(t *testing.T) {
id := fixtureIdentityDetector()
err := id.LoadPeopleDict(path.Join("..", "..", "test_data", "identities"))
assert.Nil(t, err)
assert.Equal(t, len(id.PeopleDict), 7)
assert.Equal(t, len(id.PeopleDict), 10)
assert.Contains(t, id.PeopleDict, "linus torvalds")
assert.Contains(t, id.PeopleDict, "torvalds@linux-foundation.org")
assert.Contains(t, id.PeopleDict, "vadim markovtsev")
assert.Contains(t, id.PeopleDict, "vadim@sourced.tech")
assert.Contains(t, id.PeopleDict, "another@one.com")
assert.Contains(t, id.PeopleDict, "máximo cuadros")
assert.Contains(t, id.PeopleDict, "maximo@sourced.tech")
assert.Equal(t, len(id.ReversedPeopleDict), 4)
assert.Contains(t, id.PeopleDict, "duplicate")
assert.Contains(t, id.PeopleDict, "first@example.com")
assert.Contains(t, id.PeopleDict, "second@example.com")

assert.Equal(t, len(id.ReversedPeopleDict), 5)
assert.Equal(t, id.ReversedPeopleDict[0], "Linus Torvalds")
assert.Equal(t, id.ReversedPeopleDict[1], "Vadim Markovtsev")
assert.Equal(t, id.ReversedPeopleDict[2], "Máximo Cuadros")
assert.Equal(t, id.ReversedPeopleDict[3], AuthorMissingName)
assert.Equal(t, id.ReversedPeopleDict[3], "Duplicate")
assert.Equal(t, id.ReversedPeopleDict[4], AuthorMissingName)

assert.Equal(t, id.PeopleDict["duplicate"], id.PeopleDict["first@example.com"])
assert.Equal(t, id.PeopleDict["duplicate"], id.PeopleDict["second@example.com"])

assert.Equal(t, id.PeopleDict["vadim markovtsev"], id.PeopleDict["vadim@sourced.tech"])
assert.Equal(t, id.PeopleDict["vadim markovtsev"], id.PeopleDict["another@one.com"])

assert.NotEqual(t, id.PeopleDict["duplicate"], id.PeopleDict["vadim markovtsev"])
}

func TestIdentityDetectorLoadPeopleDictWrongPath(t *testing.T) {
Expand Down
4 changes: 3 additions & 1 deletion internal/test_data/identities
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
Linus Torvalds|torvalds@linux-foundation.org
Vadim Markovtsev|vadim@sourced.tech|another@one.com
Máximo Cuadros|maximo@sourced.tech
Máximo Cuadros|maximo@sourced.tech
Duplicate|first@example.com
Duplicate|second@example.com

0 comments on commit 3aa4096

Please sign in to comment.