Skip to content

Commit

Permalink
Attend to review comments
Browse files Browse the repository at this point in the history
* period -> uploading window;
* get_upload_task_job -> get_upload_task_internal;
* Add comment to all bindings on how we avoind infinite loops;
* Correct position of incoming task comment in Kt;
* Add CHANGELOG.md entry;
  • Loading branch information
brizental committed Aug 4, 2020
1 parent 3edf2c6 commit 2bfcc25
Show file tree
Hide file tree
Showing 7 changed files with 28 additions and 17 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

[Full changelog](https://github.com/mozilla/glean/compare/v32.0.0...main)

* Python
* BUGFIX: Limit the number of retries for 5xx server errors on ping uploads. ([#1120](https://github.com/mozilla/glean/pull/1120))
* This kinds of failures yield a "recoverable error", which means the ping gets re-enqueued. That can cause infinite loops on the ping upload worker. For python we were incorrectly only limiting the number of retries for I/O errors, another type of "recoverable error".

# v32.0.0 (2020-08-03)

[Full changelog](https://github.com/mozilla/glean/compare/v31.6.0...v32.0.0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -121,11 +121,10 @@ internal sealed class PingUploadTask {
*
* There are two possibilities for this scenario:
* * Pending pings queue is empty, no more pings to request;
* * Requester has reported more than MAX_RECOVERABLE_FAILURES_PER_PERIOD
* recoverable upload failures on the same period[1]
* * Requester has reported more max recoverable upload failures on the same uploading_window[1]
* and should stop requesting at this moment.
*
* [1]: A "period" starts when a requester gets a new `PingUploadTask::Upload(PingRequest)`
* [1]: An "uploading window" starts when a requester gets a new `PingUploadTask::Upload(PingRequest)`
* response and finishes when they finally get a `PingUploadTask::Done` or `PingUploadTask::Wait` response.
*/
object Done : PingUploadTask()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,9 @@ class PingUploadWorker(context: Context, params: WorkerParameters) : Worker(cont
*/
@Suppress("ReturnCount")
override fun doWork(): Result {
// Create a slot of memory for the task: glean-core will write data into
// the allocated memory.
do {
// Create a slot of memory for the task: glean-core will write data into
// the allocated memory.
val incomingTask = FfiPingUploadTask.ByReference()
LibGleanFFI.INSTANCE.glean_get_upload_task(incomingTask)
when (val action = incomingTask.toPingUploadTask()) {
Expand All @@ -120,5 +120,7 @@ class PingUploadWorker(context: Context, params: WorkerParameters) : Worker(cont
PingUploadTask.Done -> return Result.success()
}
} while (true)
// Limits are enforced by glean-core to avoid an inifinite loop here.
// Whenever a limit is reached, this binding will receive `PingUploadTask.Done` and step out.
}
}
2 changes: 2 additions & 0 deletions glean-core/csharp/Glean/Net/BaseUploader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,8 @@ internal void TriggerUpload(Configuration config)

int waitAttempts = 0;

// Limits are enforced by glean-core to avoid an inifinite loop here.
// Whenever a limit is reached, this binding will receive `UploadTaskTag.Done` and step out.
while (true)
{
FfiUploadTask incomingTask = new FfiUploadTask();
Expand Down
2 changes: 2 additions & 0 deletions glean-core/ios/Glean/Net/HttpPingUploader.swift
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ public class HttpPingUploader {
/// It will report back the task status to Glean, which will take care of deleting pending ping files.
/// It will continue upload as long as it can fetch new tasks.
func process() {
// Limits are enforced by glean-core to avoid an inifinite loop here.
// Whenever a limit is reached, this binding will receive `.done` and step out.
while true {
var incomingTask = FfiPingUploadTask()
glean_get_upload_task(&incomingTask)
Expand Down
2 changes: 2 additions & 0 deletions glean-core/python/glean/net/ping_upload_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ def _process(data_dir: Path, application_id: str, configuration) -> bool:

wait_attempts = 0

# Limits are enforced by glean-core to avoid an inifinite loop here.
# Whenever a limit is reached, this binding will receive `UploadTaskTag.DONE` and step out.
while True:
incoming_task = ffi_support.new("FfiPingUploadTask *")
_ffi.lib.glean_get_upload_task(incoming_task)
Expand Down
24 changes: 12 additions & 12 deletions glean-core/src/upload/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ mod directory;
mod request;
mod result;

/// The maximum recoverable failures allowed per period.
/// The maximum recoverable failures allowed per uploading window.
///
/// Limiting this is necessary to avoid infinite loops on requesting upload tasks.
const MAX_RECOVERABLE_FAILURES_PER_PERIOD: u32 = 3;
const MAX_RECOVERABLE_FAILURES_PER_UPLOADING_WINDOW: u32 = 3;

// The maximum size in bytes a ping body may have to be eligible for upload.
const PING_BODY_MAX_SIZE: usize = 1024 * 1024; // 1 MB
Expand Down Expand Up @@ -121,11 +121,11 @@ pub enum PingUploadTask {
///
/// There are two possibilities for this scenario:
/// * Pending pings queue is empty, no more pings to request;
/// * Requester has reported more than MAX_RECOVERABLE_FAILURES_PER_PERIOD
/// recoverable upload failures on the same period[1]
/// * Requester has reported more than MAX_RECOVERABLE_FAILURES_PER_UPLOADING_WINDOW
/// recoverable upload failures on the same uploading window[1]
/// and should stop requesting at this moment.
///
/// [1]: A "period" starts when a requester gets a new `PingUploadTask::Upload(PingRequest)`
/// [1]: An "uploading window" starts when a requester gets a new `PingUploadTask::Upload(PingRequest)`
/// response and finishes when they finally get a `PingUploadTask::Done` or `PingUploadTask::Wait` response.
Done,
}
Expand All @@ -141,7 +141,7 @@ pub struct PingUploadManager {
processed_pending_pings: Arc<AtomicBool>,
/// A vector to store the pending pings processed off-thread.
pending_pings: Arc<RwLock<Vec<PingPayload>>>,
/// The number of upload failures for the current period.
/// The number of upload failures for the current uploading window.
recoverable_failure_count: AtomicU32,
/// A ping counter to help rate limit the ping uploads.
///
Expand Down Expand Up @@ -347,7 +347,7 @@ impl PingUploadManager {
queue
}

fn get_upload_task_job(&self, glean: &Glean, log_ping: bool) -> PingUploadTask {
fn get_upload_task_internal(&self, glean: &Glean, log_ping: bool) -> PingUploadTask {
if !self.has_processed_pings_dir() {
log::info!(
"Tried getting an upload task, but processing is ongoing. Will come back later."
Expand All @@ -363,9 +363,9 @@ impl PingUploadManager {
self.enqueue_ping(glean, &document_id, &path, &body, headers);
}

if self.recoverable_failure_count() >= MAX_RECOVERABLE_FAILURES_PER_PERIOD {
if self.recoverable_failure_count() >= MAX_RECOVERABLE_FAILURES_PER_UPLOADING_WINDOW {
log::warn!(
"Reached maximum recoverable failures for the current period. You are done."
"Reached maximum recoverable failures for the current uploading window. You are done."
);

return PingUploadTask::Done;
Expand Down Expand Up @@ -423,7 +423,7 @@ impl PingUploadManager {
///
/// `PingUploadTask` - see [`PingUploadTask`](enum.PingUploadTask.html) for more information.
pub fn get_upload_task(&self, glean: &Glean, log_ping: bool) -> PingUploadTask {
let task = self.get_upload_task_job(glean, log_ping);
let task = self.get_upload_task_internal(glean, log_ping);
if task == PingUploadTask::Done || task == PingUploadTask::Wait {
self.reset_recoverable_failure_count()
}
Expand Down Expand Up @@ -1096,7 +1096,7 @@ mod test {
}

#[test]
fn maximum_of_recoverable_errors_is_enforced_for_period() {
fn maximum_of_recoverable_errors_is_enforced_for_uploading_window() {
let (mut glean, _) = new_glean(None);

// Wait for processing of pending pings directory to finish.
Expand All @@ -1115,7 +1115,7 @@ mod test {
}

// Return the max recoverable error failures in a row
for _ in 0..MAX_RECOVERABLE_FAILURES_PER_PERIOD {
for _ in 0..MAX_RECOVERABLE_FAILURES_PER_UPLOADING_WINDOW {
match glean.get_upload_task() {
PingUploadTask::Upload(req) => {
glean.process_ping_upload_response(&req.document_id, RecoverableFailure)
Expand Down

0 comments on commit 2bfcc25

Please sign in to comment.