-
Notifications
You must be signed in to change notification settings - Fork 162
fix(csharp/src/Drivers/Databricks): Correct DatabricksCompositeReader and StatusPoller to Stop/Dispose Appropriately #3217
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
ad5e451
6c145fa
ff97d58
6e72402
a7a8891
2087de7
4844036
13f3cbf
c6aa6db
0e0df51
54837e9
d6ed3ad
f5001c9
9b45677
a402cbb
1f0240a
be06c48
2b5c4f3
fca8ce0
e5c962c
a39f702
65f9d0d
4130c83
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -16,17 +16,12 @@ | |
| */ | ||
|
|
||
| using System; | ||
| using System.Collections.Generic; | ||
| using System.Net.Http; | ||
| using System.Threading; | ||
| using System.Threading.Tasks; | ||
| using Apache.Arrow; | ||
| using Apache.Arrow.Adbc; | ||
| using Apache.Arrow.Adbc.Drivers.Apache; | ||
| using Apache.Arrow.Adbc.Drivers.Apache.Hive2; | ||
| using Apache.Arrow.Adbc.Drivers.Databricks.CloudFetch; | ||
| using Apache.Arrow.Adbc.Tracing; | ||
| using Apache.Arrow.Ipc; | ||
| using Apache.Hive.Service.Rpc.Thrift; | ||
|
|
||
| namespace Apache.Arrow.Adbc.Drivers.Databricks | ||
|
|
@@ -50,6 +45,8 @@ internal sealed class DatabricksCompositeReader : TracingReader | |
| private readonly TlsProperties _tlsOptions; | ||
| private readonly HiveServer2ProxyConfigurator _proxyConfigurator; | ||
|
|
||
| private DatabricksOperationStatusPoller? operationStatusPoller; | ||
|
|
||
| /// <summary> | ||
| /// Initializes a new instance of the <see cref="DatabricksCompositeReader"/> class. | ||
| /// </summary> | ||
|
|
@@ -66,10 +63,15 @@ internal DatabricksCompositeReader(DatabricksStatement statement, Schema schema, | |
| _proxyConfigurator = proxyConfigurator; | ||
|
|
||
| // use direct results if available | ||
| if (_statement.HasDirectResults && _statement.DirectResults != null && _statement.DirectResults.__isset.resultSet) | ||
| if (_statement.HasDirectResults && _statement.DirectResults != null && _statement.DirectResults.__isset.resultSet && statement.DirectResults?.ResultSet != null) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this be simplified to
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it is a bit helpful for linter |
||
| { | ||
| _activeReader = DetermineReader(_statement.DirectResults.ResultSet); | ||
| } | ||
| if (_statement.DirectResults?.ResultSet.HasMoreRows ?? true) | ||
| { | ||
| operationStatusPoller = new DatabricksOperationStatusPoller(statement); | ||
| operationStatusPoller.Start(); | ||
| } | ||
| } | ||
|
|
||
| private BaseDatabricksReader DetermineReader(TFetchResultsResp initialResults) | ||
|
|
@@ -93,7 +95,7 @@ private BaseDatabricksReader DetermineReader(TFetchResultsResp initialResults) | |
| /// </summary> | ||
| /// <param name="cancellationToken">The cancellation token.</param> | ||
| /// <returns>The next record batch, or null if there are no more batches.</returns> | ||
| public override async ValueTask<RecordBatch?> ReadNextRecordBatchAsync(CancellationToken cancellationToken = default) | ||
| private async ValueTask<RecordBatch?> ReadNextRecordBatchInternalAsync(CancellationToken cancellationToken = default) | ||
| { | ||
| // Initialize the active reader if not already done | ||
| if (_activeReader == null) | ||
|
|
@@ -108,5 +110,34 @@ private BaseDatabricksReader DetermineReader(TFetchResultsResp initialResults) | |
|
|
||
| return await _activeReader.ReadNextRecordBatchAsync(cancellationToken); | ||
| } | ||
|
|
||
| public override async ValueTask<RecordBatch?> ReadNextRecordBatchAsync(CancellationToken cancellationToken = default) | ||
| { | ||
| var result = await ReadNextRecordBatchInternalAsync(cancellationToken); | ||
| // Stop the poller when we've reached the end of results | ||
| if (result == null) | ||
| { | ||
| StopOperationStatusPoller(); | ||
| } | ||
| return result; | ||
| } | ||
|
|
||
| protected override void Dispose(bool disposing) | ||
| { | ||
| if (disposing) | ||
| { | ||
| _activeReader?.Dispose(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. set to null after dispose? |
||
| StopOperationStatusPoller(); | ||
| } | ||
| _activeReader = null; | ||
| base.Dispose(disposing); | ||
| } | ||
|
|
||
| private void StopOperationStatusPoller() | ||
| { | ||
| operationStatusPoller?.Stop(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Consider setting to |
||
| operationStatusPoller?.Dispose(); | ||
| operationStatusPoller = null; | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -18,6 +18,7 @@ | |
| using System; | ||
| using System.Threading; | ||
| using System.Threading.Tasks; | ||
| using Apache.Arrow.Adbc.Drivers.Apache; | ||
| using Apache.Arrow.Adbc.Drivers.Databricks.CloudFetch; | ||
| using Apache.Hive.Service.Rpc.Thrift; | ||
|
|
||
|
|
@@ -31,14 +32,19 @@ internal class DatabricksOperationStatusPoller : IDisposable | |
| { | ||
| private readonly IHiveServer2Statement _statement; | ||
| private readonly int _heartbeatIntervalSeconds; | ||
| private readonly int _requestTimeoutSeconds; | ||
| // internal cancellation token source - won't affect the external token | ||
| private CancellationTokenSource? _internalCts; | ||
| private Task? _operationStatusPollingTask; | ||
|
|
||
| public DatabricksOperationStatusPoller(IHiveServer2Statement statement, int heartbeatIntervalSeconds = DatabricksConstants.DefaultOperationStatusPollingIntervalSeconds) | ||
| public DatabricksOperationStatusPoller( | ||
| IHiveServer2Statement statement, | ||
| int heartbeatIntervalSeconds = DatabricksConstants.DefaultOperationStatusPollingIntervalSeconds, | ||
| int requestTimeoutSeconds = DatabricksConstants.DefaultOperationStatusRequestTimeoutSeconds) | ||
| { | ||
| _statement = statement ?? throw new ArgumentNullException(nameof(statement)); | ||
| _heartbeatIntervalSeconds = heartbeatIntervalSeconds; | ||
| _requestTimeoutSeconds = requestTimeoutSeconds; | ||
| } | ||
|
|
||
| public bool IsStarted => _operationStatusPollingTask != null; | ||
|
|
@@ -62,29 +68,27 @@ public void Start(CancellationToken externalToken = default) | |
|
|
||
| private async Task PollOperationStatus(CancellationToken cancellationToken) | ||
| { | ||
| try | ||
| while (!cancellationToken.IsCancellationRequested) | ||
| { | ||
| while (!cancellationToken.IsCancellationRequested) | ||
| { | ||
| var operationHandle = _statement.OperationHandle; | ||
| if (operationHandle == null) break; | ||
| var operationHandle = _statement.OperationHandle; | ||
| if (operationHandle == null) break; | ||
|
|
||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to use a timeout token here, instead of cancelling when canceltoken is triggered; if an interrupt is triggered prematurely, the TCLI client may still have unsent/unconsumed results in the buffers, affecting subsequent calls with that client (which is any future call in the same Session)
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. are you able to repro this? should we do this to all the thrift rpc calls in the driver?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it is because in THTTPTransport (used by SparkHttpConnection -> DatabricksHttpconnection), a new Stream is created when the request is flushed. If cancellation happens before this, that stream doesn't get discarded: Yes, during testing, got some errors. In the proxy logs, I remember seeing requests sent out with both GetOperationStatus and CloseOperationStatus (in the same request) while testing another PR I think we are safe in HiveServer2Statement, but we might need to adjust CancellationToken in DatabricksReader, CloudFetchResultFetcher, and DatabricksCompositeReader
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, I think this depends a bit on how CancellationToken could be used by PBI, too
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. At least for now, I think we can operate this way:
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This is currently unimplemented but we'll need to implement it before GA for parity with the ODBC implementation. What is probably most important for cancellation is query execution, and unless we manage to push forward the proposed ADBC 1.1 API, currently the only way to cancel a running query is to call
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From a Power BI perspective, the most important use of cancellation is for Direct Query because users can generate a lot of queries simply by clicking around in a visual and in-progress queries will need to be cancelled if their output is no longer needed. DQ output tends to be relatively small, so being able to cancel in the middle of reading the output is arguably less important than being able to cancel before the results start coming back. |
||
| CancellationToken GetOperationStatusTimeoutToken = ApacheUtility.GetCancellationToken(_requestTimeoutSeconds, ApacheUtility.TimeUnit.Seconds); | ||
|
|
||
| var request = new TGetOperationStatusReq(operationHandle); | ||
| var response = await _statement.Client.GetOperationStatus(request, cancellationToken); | ||
| await Task.Delay(TimeSpan.FromSeconds(_heartbeatIntervalSeconds), cancellationToken); | ||
| var request = new TGetOperationStatusReq(operationHandle); | ||
| var response = await _statement.Client.GetOperationStatus(request, GetOperationStatusTimeoutToken); | ||
| await Task.Delay(TimeSpan.FromSeconds(_heartbeatIntervalSeconds), cancellationToken); | ||
|
|
||
| // end the heartbeat if the command has terminated | ||
| if (response.OperationState == TOperationState.CANCELED_STATE || | ||
| response.OperationState == TOperationState.ERROR_STATE) | ||
| { | ||
| break; | ||
| } | ||
| // end the heartbeat if the command has terminated | ||
| if (response.OperationState == TOperationState.CANCELED_STATE || | ||
| response.OperationState == TOperationState.ERROR_STATE || | ||
| response.OperationState == TOperationState.CLOSED_STATE || | ||
| response.OperationState == TOperationState.TIMEDOUT_STATE || | ||
| response.OperationState == TOperationState.UKNOWN_STATE) | ||
| { | ||
| break; | ||
| } | ||
| } | ||
| catch (TaskCanceledException) | ||
| { | ||
| // ignore | ||
| } | ||
| } | ||
|
|
||
| public void Stop() | ||
|
|
@@ -94,12 +98,19 @@ public void Stop() | |
|
|
||
| public void Dispose() | ||
| { | ||
| if (_internalCts != null) | ||
| _internalCts?.Cancel(); | ||
| try | ||
| { | ||
| _internalCts.Cancel(); | ||
| _operationStatusPollingTask?.Wait(); | ||
| _internalCts.Dispose(); | ||
| _operationStatusPollingTask?.GetAwaiter().GetResult(); | ||
| } | ||
| catch (OperationCanceledException) | ||
| { | ||
| // Expected, no-op | ||
| } | ||
|
|
||
| _internalCts?.Dispose(); | ||
| _internalCts = null; | ||
| _operationStatusPollingTask = null; | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why don't use cancellation token here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't want Dispose to stall, what do you think?
If caller disposes statement, and download_queue is full, we would have to wait on however long cancellation token is configured to expire