-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-11425] [SPARK-11486] Improve hybrid aggregation #9383
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
4e09081
53dbdf2
2e341f5
df44fc6
6f3bb15
fc5e052
d89e034
1c0c6c3
d8422e1
cbeaedf
fbce6fe
10d7169
8a20e56
f6a5f06
b1f8a99
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -79,9 +79,13 @@ public static UnsafeExternalSorter createWithExistingInMemorySorter( | |
| PrefixComparator prefixComparator, | ||
| int initialSize, | ||
| long pageSizeBytes, | ||
| UnsafeInMemorySorter inMemorySorter) { | ||
| return new UnsafeExternalSorter(taskMemoryManager, blockManager, | ||
| UnsafeInMemorySorter inMemorySorter) throws IOException { | ||
| UnsafeExternalSorter sorter = new UnsafeExternalSorter(taskMemoryManager, blockManager, | ||
| taskContext, recordComparator, prefixComparator, initialSize, pageSizeBytes, inMemorySorter); | ||
| sorter.spill(Long.MAX_VALUE, sorter); | ||
| // The external sorter will be used to insert records, in-memory sorter is not needed. | ||
| sorter.inMemSorter = null; | ||
| return sorter; | ||
| } | ||
|
|
||
| public static UnsafeExternalSorter create( | ||
|
|
@@ -124,7 +128,6 @@ private UnsafeExternalSorter( | |
| acquireMemory(inMemSorter.getMemoryUsage()); | ||
| } else { | ||
| this.inMemSorter = existingInMemorySorter; | ||
| // will acquire after free the map | ||
| } | ||
| this.peakMemoryUsedBytes = getMemoryUsage(); | ||
|
|
||
|
|
@@ -157,12 +160,9 @@ public void closeCurrentPage() { | |
| */ | ||
| @Override | ||
| public long spill(long size, MemoryConsumer trigger) throws IOException { | ||
| assert(inMemSorter != null); | ||
| if (trigger != this) { | ||
| if (readingIterator != null) { | ||
| return readingIterator.spill(); | ||
| } else { | ||
|
|
||
| } | ||
| return 0L; // this should throw exception | ||
| } | ||
|
|
@@ -388,25 +388,38 @@ public void insertKVRecord(Object keyBase, long keyOffset, int keyLen, | |
| inMemSorter.insertRecord(recordAddress, prefix); | ||
| } | ||
|
|
||
| /** | ||
| * Merges another UnsafeExternalSorters into this one, the other one will be emptied. | ||
| * | ||
| * @throws IOException | ||
| */ | ||
| public void merge(UnsafeExternalSorter other) throws IOException { | ||
| other.spill(); | ||
| spillWriters.addAll(other.spillWriters); | ||
| // remove them from `spillWriters`, or the files will be deleted in `cleanupResources`. | ||
| other.spillWriters.clear(); | ||
| other.cleanupResources(); | ||
| } | ||
|
|
||
| /** | ||
| * Returns a sorted iterator. It is the caller's responsibility to call `cleanupResources()` | ||
| * after consuming this iterator. | ||
| */ | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @JoshRosen Do you remember why we need to clear this? Once cleared, how to delete the spilled files?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ping @JoshRosen
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Chatted with @JoshRosen offline, we should not clear
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. just a note, we had a quick discussion. Seems we should not call |
||
| public UnsafeSorterIterator getSortedIterator() throws IOException { | ||
| assert(inMemSorter != null); | ||
| readingIterator = new SpillableIterator(inMemSorter.getSortedIterator()); | ||
| int numIteratorsToMerge = spillWriters.size() + (readingIterator.hasNext() ? 1 : 0); | ||
| if (spillWriters.isEmpty()) { | ||
| assert(inMemSorter != null); | ||
| readingIterator = new SpillableIterator(inMemSorter.getSortedIterator()); | ||
| return readingIterator; | ||
| } else { | ||
| final UnsafeSorterSpillMerger spillMerger = | ||
| new UnsafeSorterSpillMerger(recordComparator, prefixComparator, numIteratorsToMerge); | ||
| new UnsafeSorterSpillMerger(recordComparator, prefixComparator, spillWriters.size()); | ||
| for (UnsafeSorterSpillWriter spillWriter : spillWriters) { | ||
| spillMerger.addSpillIfNotEmpty(spillWriter.getReader(blockManager)); | ||
| } | ||
| spillWriters.clear(); | ||
| spillMerger.addSpillIfNotEmpty(readingIterator); | ||
|
|
||
| if (inMemSorter != null) { | ||
| readingIterator = new SpillableIterator(inMemSorter.getSortedIterator()); | ||
| spillMerger.addSpillIfNotEmpty(readingIterator); | ||
| } | ||
| return spillMerger.getSortedIterator(); | ||
| } | ||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -236,16 +236,13 @@ public void printPerfMetrics() { | |
|
|
||
| /** | ||
| * Sorts the map's records in place, spill them to disk, and returns an [[UnsafeKVExternalSorter]] | ||
| * that can be used to insert more records to do external sorting. | ||
| * | ||
| * The only memory that is allocated is the address/prefix array, 16 bytes per record. | ||
| * | ||
| * Note that this destroys the map, and as a result, the map cannot be used anymore after this. | ||
| * Note that the map will be reset for inserting new records, and the returned sorter can NOT be used | ||
| * to insert records. | ||
| */ | ||
| public UnsafeKVExternalSorter destructAndCreateExternalSorter() throws IOException { | ||
| UnsafeKVExternalSorter sorter = new UnsafeKVExternalSorter( | ||
| return new UnsafeKVExternalSorter( | ||
| groupingKeySchema, aggregationBufferSchema, | ||
| SparkEnv.get().blockManager(), map.getPageSizeBytes(), map); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this UnsafeFixedWidthAggregationMap usable after this call?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, I think we need to make it clear that
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok. I see, we cannot directly insert records to the external sorter returned by |
||
| return sorter; | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -83,11 +83,10 @@ public UnsafeKVExternalSorter( | |
| /* initialSize */ 4096, | ||
| pageSizeBytes); | ||
| } else { | ||
| // The memory needed for UnsafeInMemorySorter should be less than longArray in map. | ||
| map.freeArray(); | ||
| // The memory used by UnsafeInMemorySorter will be counted later (end of this block) | ||
| // During spilling, the array in map will not be used, so we can borrow that and use it | ||
| // as the underline array for in-memory sorter (it's always large enough). | ||
| final UnsafeInMemorySorter inMemSorter = new UnsafeInMemorySorter( | ||
| taskMemoryManager, recordComparator, prefixComparator, Math.max(1, map.numElements())); | ||
| taskMemoryManager, recordComparator, prefixComparator, map.getArray()); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there any chance that multiple objects may change the same array?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, the array from map is not thread safe, only accessible from current thread.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok. Then, this
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes |
||
|
|
||
| // We cannot use the destructive iterator here because we are reusing the existing memory | ||
| // pages in BytesToBytesMap to hold records during sorting. | ||
|
|
@@ -123,10 +122,9 @@ public UnsafeKVExternalSorter( | |
| pageSizeBytes, | ||
| inMemSorter); | ||
|
|
||
| sorter.spill(); | ||
| map.free(); | ||
| // counting the memory used UnsafeInMemorySorter | ||
| taskMemoryManager.acquireExecutionMemory(inMemSorter.getMemoryUsage(), sorter); | ||
| // reset the map, so we can re-use it to insert new records. the inMemSorter will not used | ||
| // anymore, so the underline array could be used by map again. | ||
| map.reset(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we need comments at here. Basically, we need to explain why we reset. Also, once
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes. |
||
| } | ||
| } | ||
|
|
||
|
|
@@ -142,6 +140,15 @@ public void insertKV(UnsafeRow key, UnsafeRow value) throws IOException { | |
| value.getBaseObject(), value.getBaseOffset(), value.getSizeInBytes(), prefix); | ||
| } | ||
|
|
||
| /** | ||
| * Merges another UnsafeKVExternalSorter into `this`, the other one will be emptied. | ||
| * | ||
| * @throws IOException | ||
| */ | ||
| public void merge(UnsafeKVExternalSorter other) throws IOException { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add comment? |
||
| sorter.merge(other.sorter); | ||
| } | ||
|
|
||
| /** | ||
| * Returns a sorted iterator. It is the caller's responsibility to call `cleanupResources()` | ||
| * after consuming this iterator. | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ordinarily, this would end up deleting the spill files, but it doesn't because of the
spillWriters.clear()call above. If you end up updating this patch, mind adding a one-line comment to explain this (since it's a subtle point)?