-
Notifications
You must be signed in to change notification settings - Fork 980
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DRILL-7223: Create an option to control timeout for REFRESH METADATA #1776
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -354,6 +354,10 @@ private ExecConstants() { | |
"enables statistics usage for varchar and decimal data types. Default is unset, i.e. empty string. " + | ||
"Allowed values: 'true', 'false', '' (empty string)."), "true", "false", ""); | ||
|
||
public static final String PARQUET_REFRESH_TIMEOUT = "store.parquet.refresh_timeout_per_runnable_in_msec"; | ||
public static final LongValidator PARQUET_REFRESH_TIMEOUT_VALIDATOR = new LongValidator(PARQUET_REFRESH_TIMEOUT, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Make this |
||
new OptionDescription("Sets a timeout (in msec) for REFRESH TABLE METADATA processing of a single subdirectory")); | ||
|
||
public static final String PARQUET_PAGEREADER_ASYNC = "store.parquet.reader.pagereader.async"; | ||
public static final OptionValidator PARQUET_PAGEREADER_ASYNC_VALIDATOR = new BooleanValidator(PARQUET_PAGEREADER_ASYNC, | ||
new OptionDescription("Enable the asynchronous page reader. This pipelines the reading of data from disk for high performance.")); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -166,14 +166,13 @@ public final V call() throws Exception { | |
throw e; | ||
} finally { | ||
long time = System.nanoTime() - start; | ||
if (logger.isWarnEnabled()) { | ||
long timeMillis = TimeUnit.MILLISECONDS.convert(time, TimeUnit.NANOSECONDS); | ||
if (timeMillis > TIMEOUT_PER_RUNNABLE_IN_MSECS) { | ||
logger.warn("Task '{}' execution time {} ms exceeds timeout {} ms.", this, timeMillis, TIMEOUT_PER_RUNNABLE_IN_MSECS); | ||
} else { | ||
logger.debug("Task '{}' execution time is {} ms", this, timeMillis); | ||
} | ||
long timeMillis = TimeUnit.MILLISECONDS.convert(time, TimeUnit.NANOSECONDS); | ||
if (timeMillis > TIMEOUT_PER_RUNNABLE_IN_MSECS) { | ||
logger.warn("Task '{}' execution time {} ms exceeds timeout {} ms.", this, timeMillis, TIMEOUT_PER_RUNNABLE_IN_MSECS); | ||
} else { | ||
logger.debug("Task '{}' execution time is {} ms", this, timeMillis); | ||
} | ||
|
||
executionTime = time; | ||
} | ||
} | ||
|
@@ -188,6 +187,23 @@ private long getExecutionTime(TimeUnit unit) { | |
return unit.convert(executionTime, TimeUnit.NANOSECONDS); | ||
} | ||
|
||
/** | ||
* Execute the list of runnables with the given parallelization. At end, return values and report completion time | ||
* stats to provided logger. Each runnable is allowed a certain timeout. If the timeout exceeds, existing/pending | ||
* tasks will be cancelled and a {@link UserException} is thrown. | ||
* @param activity Name of activity for reporting in logger. | ||
* @param logger The logger to use to report results. | ||
* @param tasks List of callable that should be executed and timed. If this list has one item, task will be | ||
* completed in-thread. Each callable must handle {@link InterruptedException}s. | ||
* @param parallelism The number of threads that should be run to complete this task. | ||
* @param timeout if bigger than zero, set the timeout per runnable (in msec) | ||
* @return The list of outcome objects. | ||
* @throws IOException All exceptions are coerced to IOException since this was build for storage system tasks initially. | ||
*/ | ||
public static <V> List<V> run(final String activity, final Logger logger, final List<TimedCallable<V>> tasks, int parallelism, long timeout) throws IOException { | ||
TIMEOUT_PER_RUNNABLE_IN_MSECS = timeout; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. By convention we treat these as static final constants (although I see that the |
||
return run(activity, logger, tasks, parallelism); | ||
} | ||
|
||
/** | ||
* Execute the list of runnables with the given parallelization. At end, return values and report completion time | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -50,6 +50,7 @@ public class ParquetReaderConfig { | |
private boolean enableTimeReadCounter = false; | ||
private boolean autoCorrectCorruptedDates = true; | ||
private boolean enableStringsSignedMinMax = false; | ||
private long timeoutPerRunnableInMsec = 15_000; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since the 'owner' of the default should be ExecConstants, we should avoid this default value. |
||
|
||
public static ParquetReaderConfig.Builder builder() { | ||
return new ParquetReaderConfig.Builder(); | ||
|
@@ -64,12 +65,15 @@ public ParquetReaderConfig(@JsonProperty("enableBytesReadCounter") Boolean enabl | |
@JsonProperty("enableBytesTotalCounter") Boolean enableBytesTotalCounter, | ||
@JsonProperty("enableTimeReadCounter") Boolean enableTimeReadCounter, | ||
@JsonProperty("autoCorrectCorruptedDates") Boolean autoCorrectCorruptedDates, | ||
@JsonProperty("enableStringsSignedMinMax") Boolean enableStringsSignedMinMax) { | ||
@JsonProperty("enableStringsSignedMinMax") Boolean enableStringsSignedMinMax, | ||
@JsonProperty("timeoutPerRunnableInMsec") Long timeoutPerRunnableInMsec) { | ||
this.enableBytesReadCounter = enableBytesReadCounter == null ? this.enableBytesReadCounter : enableBytesReadCounter; | ||
this.enableBytesTotalCounter = enableBytesTotalCounter == null ? this.enableBytesTotalCounter : enableBytesTotalCounter; | ||
this.enableTimeReadCounter = enableTimeReadCounter == null ? this.enableTimeReadCounter : enableTimeReadCounter; | ||
this.autoCorrectCorruptedDates = autoCorrectCorruptedDates == null ? this.autoCorrectCorruptedDates : autoCorrectCorruptedDates; | ||
this.enableStringsSignedMinMax = enableStringsSignedMinMax == null ? this.enableStringsSignedMinMax : enableStringsSignedMinMax; | ||
this.timeoutPerRunnableInMsec = timeoutPerRunnableInMsec == null || Long.valueOf(timeoutPerRunnableInMsec) <= 0 ? // zero means: use default | ||
this.timeoutPerRunnableInMsec : timeoutPerRunnableInMsec; | ||
} | ||
|
||
private ParquetReaderConfig() { } | ||
|
@@ -99,6 +103,9 @@ public boolean enableStringsSignedMinMax() { | |
return enableStringsSignedMinMax; | ||
} | ||
|
||
@JsonProperty("timeoutPerRunnableInMsec") | ||
public long timeoutPerRunnableInMsec() { return timeoutPerRunnableInMsec; } | ||
|
||
public ParquetReadOptions toReadOptions() { | ||
return ParquetReadOptions.builder() | ||
.useSignedStringMinMax(enableStringsSignedMinMax) | ||
|
@@ -119,7 +126,8 @@ public int hashCode() { | |
enableBytesTotalCounter, | ||
enableTimeReadCounter, | ||
autoCorrectCorruptedDates, | ||
enableStringsSignedMinMax); | ||
enableStringsSignedMinMax, | ||
timeoutPerRunnableInMsec); | ||
} | ||
|
||
@Override | ||
|
@@ -135,7 +143,8 @@ public boolean equals(Object o) { | |
&& enableBytesTotalCounter == that.enableBytesTotalCounter | ||
&& enableTimeReadCounter == that.enableTimeReadCounter | ||
&& autoCorrectCorruptedDates == that.autoCorrectCorruptedDates | ||
&& enableStringsSignedMinMax == that.enableStringsSignedMinMax; | ||
&& enableStringsSignedMinMax == that.enableStringsSignedMinMax | ||
&& timeoutPerRunnableInMsec == that.timeoutPerRunnableInMsec; | ||
} | ||
|
||
@Override | ||
|
@@ -146,6 +155,7 @@ public String toString() { | |
+ ", enableTimeReadCounter=" + enableTimeReadCounter | ||
+ ", autoCorrectCorruptedDates=" + autoCorrectCorruptedDates | ||
+ ", enableStringsSignedMinMax=" + enableStringsSignedMinMax | ||
+ ", timeoutPerRunnableInMsec=" + timeoutPerRunnableInMsec | ||
+ '}'; | ||
} | ||
|
||
|
@@ -188,10 +198,12 @@ public ParquetReaderConfig build() { | |
|
||
// last assign values from session options, session options have higher priority than other configurations | ||
if (options != null) { | ||
String option = options.getOption(ExecConstants.PARQUET_READER_STRINGS_SIGNED_MIN_MAX_VALIDATOR); | ||
if (!option.isEmpty()) { | ||
readerConfig.enableStringsSignedMinMax = Boolean.valueOf(option); | ||
String optionSignedMinMax = options.getOption(ExecConstants.PARQUET_READER_STRINGS_SIGNED_MIN_MAX_VALIDATOR); | ||
if (!optionSignedMinMax.isEmpty()) { | ||
readerConfig.enableStringsSignedMinMax = Boolean.valueOf(optionSignedMinMax); | ||
} | ||
Long optionTimeout = options.getOption(ExecConstants.PARQUET_REFRESH_TIMEOUT_VALIDATOR); | ||
readerConfig.timeoutPerRunnableInMsec = Long.valueOf(optionTimeout); | ||
} | ||
|
||
return readerConfig; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -630,6 +630,7 @@ drill.exec.options: { | |
store.parquet.reader.columnreader.async: false, | ||
store.parquet.reader.int96_as_timestamp: false, | ||
store.parquet.reader.strings_signed_min_max: "", | ||
store.parquet.refresh_timeout_per_runnable_in_msec: 15000, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See prior comment about omitting 'refresh' . Also I think that we should not put this in the 'parquet' namespace even though currently it is mainly used for Parquet metadata read. The reason is the runnable timeout could in theory be used for other formats also. How about |
||
store.parquet.reader.pagereader.async: true, | ||
store.parquet.reader.pagereader.bufferedread: true, | ||
store.parquet.reader.pagereader.buffersize: 1048576, | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should avoid the word 'refresh' here and other places for the timeout since this parameter is intended for any timed runnable task, not just the ones initiated by the REFRESH command. For instance, in normal query planning without using metadata cache, the FooterGatherer also creates multiple
TimedCallable
threads to read parquet footers directly.