Skip to content

Commit

Permalink
Remove duplicate aliases (#120)
Browse files Browse the repository at this point in the history
* remove duplicated aliases

* fix readme with new test

* add test for duplicated aliases

* update comments

* bump version
  • Loading branch information
evertoncolling authored Nov 18, 2024
1 parent 0aa672c commit caa2861
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 22 deletions.
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,9 @@ To ensure the integrity of the catalog, the following tests are conducted:
3. **Reference Validation**: There should be no references to non-existent unit `externalIds` in `unitSystems.json`.
4. **Default Quantities**: All quantities must be present in the `unitSystems.json` for the default quantity.
5. **Consistent References**: All quantity references in `unitSystems.json` must exist in `units.json`.
6. **Unique aliases**: All pairs of (`alias` and `quantity`) must be unique, for all **aliases** in `aliasNames`.
7. **ExternalId Format**: All unit `externalIds` must follow the pattern `{quantity}:{unit}`, where both `quantity` and `unit` are in **snake_case**.
6. **Unique Quantity-Alias Pairs**: All pairs of (`alias` and `quantity`) must be unique across all units, for all aliases in `aliasNames`.
7. **Unique Unit Aliases**: Each unit's `aliasNames` array must contain only unique values, with no duplicate entries allowed.
8. **ExternalId Format**: All unit `externalIds` must follow the pattern `{quantity}:{unit}`, where both `quantity` and `unit` are in **snake_case**.

## Attribution
Some of the units are sourced from QUDT.org, which is licensed under the [Creative Commons Attribution 4.0 International License](https://creativecommons.org/licenses/by/4.0/).
Expand Down
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

<groupId>com.cognite.units</groupId>
<artifactId>units-catalog</artifactId>
<version>0.1.14</version>
<version>0.1.15</version>

<name>${project.groupId}:${project.artifactId}</name>
<description>A comprehensive unit catalog for Cognite Data Fusion (CDF) with a focus on standardization, comprehensiveness, and consistency.</description>
Expand Down
5 changes: 3 additions & 2 deletions src/main/kotlin/com/cognite/units/UnitService.kt
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ class UnitService(units: String, systems: String) {
assert(unitsByExternalId[it.externalId] == null) { "Duplicate externalId ${it.externalId}" }
unitsByExternalId[it.externalId] = it

// 7. ExternalId Format: All unit `externalIds` must follow the pattern `{quantity}:{unit}`, where both
// 8. ExternalId Format: All unit `externalIds` must follow the pattern `{quantity}:{unit}`, where both
// `quantity` and `unit` are in snake_case.
assert(it.externalId == generateExpectedExternalId(it)) {
"Invalid externalId ${it.externalId} for unit ${it.name} (${it.quantity})"
Expand All @@ -120,7 +120,8 @@ class UnitService(units: String, systems: String) {
// convert to set first, to remove duplicate aliases due to encoding (e.g. "\u00b0C" vs "°C")
it.aliasNames.toSet().forEach { alias ->
unitsByAlias.computeIfAbsent(alias) { ArrayList() }.add(it)
// 6. Unique aliases: All pairs of (alias and quantity) must be unique, for all aliases in `aliasNames`
// 6. Unique Quantity-Alias Pairs: All pairs of (alias and quantity) must be unique, for all aliases in
// `aliasNames`
assert(unitsByQuantityAndAlias[it.quantity]!![alias] == null) {
"Duplicate alias $alias for quantity ${it.quantity}"
}
Expand Down
28 changes: 28 additions & 0 deletions src/test/kotlin/UnitTest.kt
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@
*/

import com.cognite.units.Conversion
import com.cognite.units.TypedUnit
import com.cognite.units.UnitService
import org.junit.jupiter.api.Assertions.assertEquals
import org.junit.jupiter.api.Assertions.fail
import org.junit.jupiter.api.Test
import org.junit.jupiter.api.assertThrows
import java.net.URL
import kotlin.test.DefaultAsserter

class UnitTest {

Expand Down Expand Up @@ -106,6 +108,18 @@ class UnitTest {
}
}

@Test
// 7. Unique Unit Aliases: Each unit's `aliasNames` array must contain only unique values, with no duplicate entries
// allowed.
fun checkDuplicateAliases() {
val unitService = UnitService.service
val listOfUnits = unitService.getUnits()

listOfUnits.forEach {
validateUniqueAliases(it)
}
}

@Test
fun jsonWithInvalidExternalId() {
try {
Expand Down Expand Up @@ -181,4 +195,18 @@ class UnitTest {
false,
)
}

private fun validateUniqueAliases(unit: TypedUnit) {
// Create a set to track unique aliases
val aliases = mutableSetOf<String>()

unit.aliasNames.forEach { alias ->
// tries do add a new entry to the set, if it already exists, it will fail the test
if (!aliases.add(alias)) {
DefaultAsserter.fail(
"Duplicate alias '$alias' found in aliasNames for unit ${unit.externalId} (${unit.quantity})",
)
}
}
}
}
28 changes: 11 additions & 17 deletions versions/v1/units.json
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,6 @@
"micromole / L",
"Micromole per Liter",
"μmol / liter",
"µmol / L",
"Micromole per litre",
"micromole per Litre",
"μmol per L",
Expand Down Expand Up @@ -3841,8 +3840,6 @@
"kilogram per metre",
"kilogm per metre",
"kg / metre",
"kg / metre",
"kg per metre",
"kg per metre",
"kilogram / m",
"kilogm / m",
Expand Down Expand Up @@ -9414,10 +9411,7 @@
"Sm³/hour",
"m3/h",
"m3/hr",
"m3/hour",
"Sm3/h",
"Sm3/hr",
"Sm3/hour"
"m3/hour"
],
"symbol": "m³/h",
"conversion": {
Expand Down Expand Up @@ -10493,11 +10487,11 @@
],
"symbol": "dB",
"conversion": {
"multiplier": 1.0,
"offset": 0.0
"multiplier": 1.0,
"offset": 0.0
},
"source": "qudt.org",
"sourceReference": "https://qudt.org/vocab/unit/DeciB"
"sourceReference": "https://qudt.org/vocab/unit/DeciB"
},
{
"externalId": "attenuation:b",
Expand All @@ -10510,11 +10504,11 @@
],
"symbol": "B",
"conversion": {
"multiplier": 10.0,
"offset": 0.0
"multiplier": 10.0,
"offset": 0.0
},
"source": "qudt.org",
"sourceReference": "https://qudt.org/vocab/unit/B"
"sourceReference": "https://qudt.org/vocab/unit/B"
},
{
"externalId": "frequency:hectohz",
Expand All @@ -10525,9 +10519,9 @@
"hectoHz",
"hectoHZ",
"hectohertz",
"hecto-Hz",
"hecto-hertz",
"HectoHertz",
"hecto-Hz",
"hecto-hertz",
"HectoHertz",
"Hecto-Hertz",
"hHz"
],
Expand Down Expand Up @@ -10591,4 +10585,4 @@
"source": "Custom based on qudt.org - https://qudt.org/vocab/unit/DEGREE_API",
"sourceReference": null
}
]
]

0 comments on commit caa2861

Please sign in to comment.