2424using  System . Threading . Tasks ; 
2525using  K4os . Compression . LZ4 . Streams ; 
2626
27- namespace  Apache . Arrow . Adbc . Drivers . Apache . Databricks . CloudFetch 
27+ namespace  Apache . Arrow . Adbc . Drivers . Databricks . CloudFetch 
2828{ 
2929    /// <summary> 
3030    /// Downloads files from URLs. 
@@ -35,10 +35,13 @@ internal sealed class CloudFetchDownloader : ICloudFetchDownloader
3535        private  readonly  BlockingCollection < IDownloadResult >  _resultQueue ; 
3636        private  readonly  ICloudFetchMemoryBufferManager  _memoryManager ; 
3737        private  readonly  HttpClient  _httpClient ; 
38+         private  readonly  ICloudFetchResultFetcher  _resultFetcher ; 
3839        private  readonly  int  _maxParallelDownloads ; 
3940        private  readonly  bool  _isLz4Compressed ; 
4041        private  readonly  int  _maxRetries ; 
4142        private  readonly  int  _retryDelayMs ; 
43+         private  readonly  int  _maxUrlRefreshAttempts ; 
44+         private  readonly  int  _urlExpirationBufferSeconds ; 
4245        private  readonly  SemaphoreSlim  _downloadSemaphore ; 
4346        private  Task ?  _downloadTask ; 
4447        private  CancellationTokenSource ?  _cancellationTokenSource ; 
@@ -53,29 +56,37 @@ internal sealed class CloudFetchDownloader : ICloudFetchDownloader
5356        /// <param name="resultQueue">The queue to add completed downloads to.</param> 
5457        /// <param name="memoryManager">The memory buffer manager.</param> 
5558        /// <param name="httpClient">The HTTP client to use for downloads.</param> 
59+         /// <param name="resultFetcher">The result fetcher that manages URLs.</param> 
5660        /// <param name="maxParallelDownloads">The maximum number of parallel downloads.</param> 
5761        /// <param name="isLz4Compressed">Whether the results are LZ4 compressed.</param> 
58-         /// <param name="logger">The logger instance.</param> 
5962        /// <param name="maxRetries">The maximum number of retry attempts.</param> 
6063        /// <param name="retryDelayMs">The delay between retry attempts in milliseconds.</param> 
64+         /// <param name="maxUrlRefreshAttempts">The maximum number of URL refresh attempts.</param> 
65+         /// <param name="urlExpirationBufferSeconds">Buffer time in seconds before URL expiration to trigger refresh.</param> 
6166        public  CloudFetchDownloader ( 
6267            BlockingCollection < IDownloadResult >  downloadQueue , 
6368            BlockingCollection < IDownloadResult >  resultQueue , 
6469            ICloudFetchMemoryBufferManager  memoryManager , 
6570            HttpClient  httpClient , 
71+             ICloudFetchResultFetcher  resultFetcher , 
6672            int  maxParallelDownloads , 
6773            bool  isLz4Compressed , 
6874            int  maxRetries  =  3 , 
69-             int  retryDelayMs  =  500 ) 
75+             int  retryDelayMs  =  500 , 
76+             int  maxUrlRefreshAttempts  =  3 , 
77+             int  urlExpirationBufferSeconds  =  60 ) 
7078        { 
7179            _downloadQueue  =  downloadQueue  ??  throw  new  ArgumentNullException ( nameof ( downloadQueue ) ) ; 
7280            _resultQueue  =  resultQueue  ??  throw  new  ArgumentNullException ( nameof ( resultQueue ) ) ; 
7381            _memoryManager  =  memoryManager  ??  throw  new  ArgumentNullException ( nameof ( memoryManager ) ) ; 
7482            _httpClient  =  httpClient  ??  throw  new  ArgumentNullException ( nameof ( httpClient ) ) ; 
83+             _resultFetcher  =  resultFetcher  ??  throw  new  ArgumentNullException ( nameof ( resultFetcher ) ) ; 
7584            _maxParallelDownloads  =  maxParallelDownloads  >  0  ?  maxParallelDownloads  :  throw  new  ArgumentOutOfRangeException ( nameof ( maxParallelDownloads ) ) ; 
7685            _isLz4Compressed  =  isLz4Compressed ; 
7786            _maxRetries  =  maxRetries  >  0  ?  maxRetries  :  throw  new  ArgumentOutOfRangeException ( nameof ( maxRetries ) ) ; 
7887            _retryDelayMs  =  retryDelayMs  >  0  ?  retryDelayMs  :  throw  new  ArgumentOutOfRangeException ( nameof ( retryDelayMs ) ) ; 
88+             _maxUrlRefreshAttempts  =  maxUrlRefreshAttempts  >  0  ?  maxUrlRefreshAttempts  :  throw  new  ArgumentOutOfRangeException ( nameof ( maxUrlRefreshAttempts ) ) ; 
89+             _urlExpirationBufferSeconds  =  urlExpirationBufferSeconds  >  0  ?  urlExpirationBufferSeconds  :  throw  new  ArgumentOutOfRangeException ( nameof ( urlExpirationBufferSeconds ) ) ; 
7990            _downloadSemaphore  =  new  SemaphoreSlim ( _maxParallelDownloads ,  _maxParallelDownloads ) ; 
8091            _isCompleted  =  false ; 
8192        } 
@@ -237,6 +248,19 @@ private async Task DownloadFilesAsync(CancellationToken cancellationToken)
237248                        break ; 
238249                    } 
239250
251+                     // Check if the URL is expired or about to expire 
252+                     if  ( downloadResult . IsExpiredOrExpiringSoon ( _urlExpirationBufferSeconds ) ) 
253+                     { 
254+                         // Get a refreshed URL before starting the download 
255+                         var  refreshedLink  =  await  _resultFetcher . GetUrlAsync ( downloadResult . Link . StartRowOffset ,  cancellationToken ) ; 
256+                         if  ( refreshedLink  !=  null ) 
257+                         { 
258+                             // Update the download result with the refreshed link 
259+                             downloadResult . UpdateWithRefreshedLink ( refreshedLink ) ; 
260+                             Trace . TraceInformation ( $ "Updated URL for file at offset { refreshedLink . StartRowOffset }  before download") ; 
261+                         } 
262+                     } 
263+ 
240264                    // Acquire a download slot 
241265                    await  _downloadSemaphore . WaitAsync ( cancellationToken ) . ConfigureAwait ( false ) ; 
242266
@@ -341,6 +365,37 @@ private async Task DownloadFileAsync(IDownloadResult downloadResult, Cancellatio
341365                        HttpCompletionOption . ResponseHeadersRead , 
342366                        cancellationToken ) . ConfigureAwait ( false ) ; 
343367
368+                     // Check if the response indicates an expired URL (typically 403 or 401) 
369+                     if  ( response . StatusCode  ==  System . Net . HttpStatusCode . Forbidden  || 
370+                         response . StatusCode  ==  System . Net . HttpStatusCode . Unauthorized ) 
371+                     { 
372+                         // If we've already tried refreshing too many times, fail 
373+                         if  ( downloadResult . RefreshAttempts  >=  _maxUrlRefreshAttempts ) 
374+                         { 
375+                             throw  new  InvalidOperationException ( $ "Failed to download file after { downloadResult . RefreshAttempts }  URL refresh attempts.") ; 
376+                         } 
377+ 
378+                         // Try to refresh the URL 
379+                         var  refreshedLink  =  await  _resultFetcher . GetUrlAsync ( downloadResult . Link . StartRowOffset ,  cancellationToken ) ; 
380+                         if  ( refreshedLink  !=  null ) 
381+                         { 
382+                             // Update the download result with the refreshed link 
383+                             downloadResult . UpdateWithRefreshedLink ( refreshedLink ) ; 
384+                             url  =  refreshedLink . FileLink ; 
385+                             sanitizedUrl  =  SanitizeUrl ( url ) ; 
386+ 
387+                             Trace . TraceInformation ( $ "URL for file at offset { refreshedLink . StartRowOffset }  was refreshed after expired URL response") ; 
388+ 
389+                             // Continue to the next retry attempt with the refreshed URL 
390+                             continue ; 
391+                         } 
392+                         else 
393+                         { 
394+                             // If refresh failed, throw an exception 
395+                             throw  new  InvalidOperationException ( "Failed to refresh expired URL." ) ; 
396+                         } 
397+                     } 
398+ 
344399                    response . EnsureSuccessStatusCode ( ) ; 
345400
346401                    // Log the download size if available from response headers 
0 commit comments