Skip to content

Commit

Permalink
[fix](catalog) gen partition id by name (apache#39325)
Browse files Browse the repository at this point in the history
Followup apache#38525
Previously, we use sequence number to generate partition id of table in
hive metastore.
for example, there are 2 partitions: `dt=2024-10-02` and
`dt=2024-10-03`, the partition id will be 0 and 1.
But if a new partition being added: `dt=2024-10-01`, the partiton id
will be 0, 1, and 2.
You can see, before, the id `0` is for `dt=2024-10-02`, but now `0` is
for `dt=2024-10-01`.

This PR use catalog/db/table/partition name to generate a id for the
partition,
so that each partition will have unique id.
  • Loading branch information
morningman authored Aug 19, 2024
1 parent 9ecc5ae commit 21b3228
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import org.apache.doris.common.security.authentication.AuthenticationConfig;
import org.apache.doris.common.util.CacheBulkLoader;
import org.apache.doris.common.util.LocationPath;
import org.apache.doris.common.util.Util;
import org.apache.doris.datasource.CacheException;
import org.apache.doris.datasource.hive.AcidInfo.DeleteDeltaInfo;
import org.apache.doris.datasource.property.PropertyConverter;
Expand Down Expand Up @@ -250,9 +251,8 @@ private HivePartitionValues loadPartitionValues(PartitionValueCacheKey key) {
Map<Long, PartitionItem> idToPartitionItem = Maps.newHashMapWithExpectedSize(partitionNames.size());
BiMap<String, Long> partitionNameToIdMap = HashBiMap.create(partitionNames.size());
Map<Long, List<UniqueId>> idToUniqueIdsMap = Maps.newHashMapWithExpectedSize(partitionNames.size());
long idx = 0;
for (String partitionName : partitionNames) {
long partitionId = idx++;
long partitionId = Util.genIdByName(catalog.getName(), key.dbName, key.tblName, partitionName);
ListPartitionItem listPartitionItem = toListPartitionItem(partitionName, key.types);
idToPartitionItem.put(partitionId, listPartitionItem);
partitionNameToIdMap.put(partitionName, partitionId);
Expand All @@ -273,7 +273,7 @@ private HivePartitionValues loadPartitionValues(PartitionValueCacheKey key) {
singleUidToColumnRangeMap = ListPartitionPrunerV2.genSingleUidToColumnRange(singleColumnRangeMap);
}
Map<Long, List<String>> partitionValuesMap = ListPartitionPrunerV2.getPartitionValuesMap(idToPartitionItem);
return new HivePartitionValues(idToPartitionItem, uidToPartitionRange, rangeToId, singleColumnRangeMap, idx,
return new HivePartitionValues(idToPartitionItem, uidToPartitionRange, rangeToId, singleColumnRangeMap,
partitionNameToIdMap, idToUniqueIdsMap, singleUidToColumnRangeMap, partitionValuesMap);
}

Expand Down Expand Up @@ -638,13 +638,12 @@ public void addPartitionsCache(String dbName, String tblName, List<String> parti
Map<String, Long> partitionNameToIdMapBefore = copy.getPartitionNameToIdMap();
Map<Long, List<UniqueId>> idToUniqueIdsMap = copy.getIdToUniqueIdsMap();
Map<Long, PartitionItem> idToPartitionItem = new HashMap<>();
long idx = copy.getNextPartitionId();
for (String partitionName : partitionNames) {
if (partitionNameToIdMapBefore.containsKey(partitionName)) {
LOG.info("addPartitionsCache partitionName:[{}] has exist in table:[{}]", partitionName, tblName);
continue;
}
long partitionId = idx++;
long partitionId = Util.genIdByName(catalog.getName(), dbName, tblName, partitionName);
ListPartitionItem listPartitionItem = toListPartitionItem(partitionName, key.types);
idToPartitionItemBefore.put(partitionId, listPartitionItem);
idToPartitionItem.put(partitionId, listPartitionItem);
Expand All @@ -653,7 +652,6 @@ public void addPartitionsCache(String dbName, String tblName, List<String> parti
Map<Long, List<String>> partitionValuesMapBefore = copy.getPartitionValuesMap();
Map<Long, List<String>> partitionValuesMap = ListPartitionPrunerV2.getPartitionValuesMap(idToPartitionItem);
partitionValuesMapBefore.putAll(partitionValuesMap);
copy.setNextPartitionId(idx);
if (key.types.size() > 1) {
Map<UniqueId, Range<PartitionKey>> uidToPartitionRangeBefore = copy.getUidToPartitionRange();
// uidToPartitionRange and rangeToId are only used for multi-column partition
Expand Down Expand Up @@ -1075,7 +1073,6 @@ public static class HiveFileStatus {

@Data
public static class HivePartitionValues {
private long nextPartitionId;
private BiMap<String, Long> partitionNameToIdMap;
private Map<Long, List<UniqueId>> idToUniqueIdsMap;
private Map<Long, PartitionItem> idToPartitionItem;
Expand All @@ -1094,7 +1091,6 @@ public HivePartitionValues(Map<Long, PartitionItem> idToPartitionItem,
Map<UniqueId, Range<PartitionKey>> uidToPartitionRange,
Map<Range<PartitionKey>, UniqueId> rangeToId,
RangeMap<ColumnBound, UniqueId> singleColumnRangeMap,
long nextPartitionId,
BiMap<String, Long> partitionNameToIdMap,
Map<Long, List<UniqueId>> idToUniqueIdsMap,
Map<UniqueId, Range<ColumnBound>> singleUidToColumnRangeMap,
Expand All @@ -1103,7 +1099,6 @@ public HivePartitionValues(Map<Long, PartitionItem> idToPartitionItem,
this.uidToPartitionRange = uidToPartitionRange;
this.rangeToId = rangeToId;
this.singleColumnRangeMap = singleColumnRangeMap;
this.nextPartitionId = nextPartitionId;
this.partitionNameToIdMap = partitionNameToIdMap;
this.idToUniqueIdsMap = idToUniqueIdsMap;
this.singleUidToColumnRangeMap = singleUidToColumnRangeMap;
Expand All @@ -1112,7 +1107,6 @@ public HivePartitionValues(Map<Long, PartitionItem> idToPartitionItem,

public HivePartitionValues copy() {
HivePartitionValues copy = new HivePartitionValues();
copy.setNextPartitionId(nextPartitionId);
copy.setPartitionNameToIdMap(partitionNameToIdMap == null ? null : HashBiMap.create(partitionNameToIdMap));
copy.setIdToUniqueIdsMap(idToUniqueIdsMap == null ? null : Maps.newHashMap(idToUniqueIdsMap));
copy.setIdToPartitionItem(idToPartitionItem == null ? null : Maps.newHashMap(idToPartitionItem));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -548,7 +548,7 @@ private HivePartitionValues loadPartitionValues(PartitionValueCacheKey key, List
singleUidToColumnRangeMap = ListPartitionPrunerV2.genSingleUidToColumnRange(singleColumnRangeMap);
}
Map<Long, List<String>> partitionValuesMap = ListPartitionPrunerV2.getPartitionValuesMap(idToPartitionItem);
return new HivePartitionValues(idToPartitionItem, uidToPartitionRange, rangeToId, singleColumnRangeMap, idx,
return new HivePartitionValues(idToPartitionItem, uidToPartitionRange, rangeToId, singleColumnRangeMap,
partitionNameToIdMap, idToUniqueIdsMap, singleUidToColumnRangeMap, partitionValuesMap);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ public List<String> listPartitionNames(String dbName, String tblName) {
String tblName = "tb";
HiveMetaStoreCache.HivePartitionValues partitionValues = cache.getPartitionValues(dbName, tblName, types);
Assert.assertEquals(1, partitionValues.getIdToPartitionItem().size());
Assert.assertTrue(partitionValues.getIdToPartitionItem().containsKey(0L));
List<PartitionKey> items = partitionValues.getIdToPartitionItem().get(0L).getItems();
Assert.assertTrue(partitionValues.getIdToPartitionItem().containsKey(8882801933302843777L));
List<PartitionKey> items = partitionValues.getIdToPartitionItem().get(8882801933302843777L).getItems();
Assert.assertEquals(1, items.size());
PartitionKey partitionKey = items.get(0);
Assert.assertEquals("1.234", partitionKey.getKeys().get(0).toString());
Expand All @@ -116,8 +116,8 @@ public List<String> listPartitionNames(String dbName, String tblName) {
cache.addPartitionsCache(dbName, tblName, values, types);
HiveMetaStoreCache.HivePartitionValues partitionValues2 = cache.getPartitionValues(dbName, tblName, types);
Assert.assertEquals(2, partitionValues2.getIdToPartitionItem().size());
Assert.assertTrue(partitionValues2.getIdToPartitionItem().containsKey(1L));
List<PartitionKey> items2 = partitionValues2.getIdToPartitionItem().get(1L).getItems();
Assert.assertTrue(partitionValues2.getIdToPartitionItem().containsKey(7070400225537799947L));
List<PartitionKey> items2 = partitionValues2.getIdToPartitionItem().get(7070400225537799947L).getItems();
Assert.assertEquals(1, items2.size());
PartitionKey partitionKey2 = items2.get(0);
Assert.assertEquals("5.678", partitionKey2.getKeys().get(0).toString());
Expand All @@ -128,8 +128,8 @@ public List<String> listPartitionNames(String dbName, String tblName) {
cache.invalidateTableCache(dbName, tblName);
HiveMetaStoreCache.HivePartitionValues partitionValues3 = cache.getPartitionValues(dbName, tblName, types);
Assert.assertEquals(1, partitionValues3.getIdToPartitionItem().size());
Assert.assertTrue(partitionValues3.getIdToPartitionItem().containsKey(0L));
List<PartitionKey> items3 = partitionValues3.getIdToPartitionItem().get(0L).getItems();
Assert.assertTrue(partitionValues3.getIdToPartitionItem().containsKey(8882801933302843777L));
List<PartitionKey> items3 = partitionValues3.getIdToPartitionItem().get(8882801933302843777L).getItems();
Assert.assertEquals(1, items3.size());
PartitionKey partitionKey3 = items3.get(0);
Assert.assertEquals("1.234", partitionKey3.getKeys().get(0).toString());
Expand Down

0 comments on commit 21b3228

Please sign in to comment.