From 08d80b6608deab33896c445ea9680054e74cbf45 Mon Sep 17 00:00:00 2001 From: Kirill Ivanov Date: Mon, 9 Sep 2024 14:10:13 +0300 Subject: [PATCH] change duration to points --- stats/README.md | 2 +- stats/stats-server/src/read_service.rs | 32 +++---- stats/stats-server/src/settings.rs | 6 +- stats/stats/src/charts/chart.rs | 12 ++- stats/stats/src/charts/db_interaction/read.rs | 96 +++++++++++++++++-- stats/stats/src/lib.rs | 4 +- stats/stats/src/missing_date.rs | 30 +++--- 7 files changed, 135 insertions(+), 47 deletions(-) diff --git a/stats/README.md b/stats/README.md index 59300b9eb..6dfd28aab 100644 --- a/stats/README.md +++ b/stats/README.md @@ -82,7 +82,7 @@ by enabling word wrapping | `STATS__FORCE_​UPDATE_ON_START` | | Fully recalculate all charts on start | `false` | | `STATS__CONCURRENT_​START_UPDATES` | | Amount of concurrent charts update on start | `3` | | `STATS__​DEFAULT_​SCHEDULE` | | Schedule used for update groups with no config | `"0 0 1 * * * *"` | -| `STATS__LIMITS__REQUEST_​INTERVAL_LIMIT_DAYS` | | Maximum allowed number of requested points | `182500` | <-- TODO: change +| `STATS__LIMITS__REQUESTED_​POINTS_LIMIT` | | Maximum allowed number of requested points | `182500` | [anchor]: <> (anchors.envs.end.service) diff --git a/stats/stats-server/src/read_service.rs b/stats/stats-server/src/read_service.rs index 5600a8e3e..ef93acea0 100644 --- a/stats/stats-server/src/read_service.rs +++ b/stats/stats-server/src/read_service.rs @@ -8,7 +8,7 @@ use crate::{ }; use async_trait::async_trait; -use chrono::{Duration, NaiveDate, Utc}; +use chrono::{NaiveDate, Utc}; use proto_v1::stats_service_server::StatsService; use sea_orm::{DatabaseConnection, DbErr}; use stats::{ @@ -17,7 +17,7 @@ use stats::{ timespans::{Month, Week, Year}, Timespan, }, - MissingDatePolicy, ReadError, ResolutionKind, + ApproxUnsignedDiff, MissingDatePolicy, ReadError, RequestedPointsLimit, ResolutionKind, }; use stats_proto::blockscout::stats::v1::{self as proto_v1, Point}; use tonic::{Request, Response, Status}; @@ -41,14 +41,14 @@ impl ReadService { #[derive(Debug, Clone, PartialEq, Eq)] pub struct ReadLimits { - /// See [`LimitsSettings::request_interval_limit_days`] - pub request_interval_limit: Duration, + /// See [`LimitsSettings::requested_points_limit`] + pub requested_points_limit: RequestedPointsLimit, } impl From for ReadLimits { fn from(value: LimitsSettings) -> Self { Self { - request_interval_limit: Duration::days(value.request_interval_limit_days.into()), + requested_points_limit: RequestedPointsLimit::from_points(value.requested_points_limit), } } } @@ -56,7 +56,7 @@ impl From for ReadLimits { fn map_read_error(err: ReadError) -> Status { match &err { ReadError::ChartNotFound(_) => Status::not_found(err.to_string()), - ReadError::IntervalLimitExceeded(_) => Status::invalid_argument(err.to_string()), + ReadError::IntervalTooLarge(_) => Status::invalid_argument(err.to_string()), _ => { tracing::error!(err = ?err, "internal read error"); Status::internal(err.to_string()) @@ -91,12 +91,12 @@ async fn get_serialized_line_chart_data( chart_name: String, from: Option, to: Option, - interval_limit: Option, + points_limit: Option, policy: MissingDatePolicy, mark_approx: u64, ) -> Result, ReadError> where - Resolution: Timespan + Clone + Ord + Debug, + Resolution: Timespan + ApproxUnsignedDiff + Clone + Ord + Debug, { let from = from.map(|f| Resolution::from_date(f)); let to = to.map(|t| Resolution::from_date(t)); @@ -105,7 +105,7 @@ where &chart_name, from, to, - interval_limit, + points_limit, policy, true, mark_approx, @@ -122,7 +122,7 @@ async fn get_serialized_line_chart_data_resolution_dispatch( resolution: ResolutionKind, from: Option, to: Option, - interval_limit: Option, + points_limit: Option, policy: MissingDatePolicy, mark_approx: u64, ) -> Result, ReadError> { @@ -133,7 +133,7 @@ async fn get_serialized_line_chart_data_resolution_dispatch( chart_name, from, to, - interval_limit, + points_limit, policy, mark_approx, ) @@ -145,7 +145,7 @@ async fn get_serialized_line_chart_data_resolution_dispatch( chart_name, from, to, - interval_limit, + points_limit, policy, mark_approx, ) @@ -157,7 +157,7 @@ async fn get_serialized_line_chart_data_resolution_dispatch( chart_name, from, to, - interval_limit, + points_limit, policy, mark_approx, ) @@ -169,7 +169,7 @@ async fn get_serialized_line_chart_data_resolution_dispatch( chart_name, from, to, - interval_limit, + points_limit, policy, mark_approx, ) @@ -257,14 +257,14 @@ impl StatsService for ReadService { let to = request.to.and_then(|date| NaiveDate::from_str(&date).ok()); let policy = resolution_info.missing_date_policy; let mark_approx = resolution_info.approximate_trailing_points; - let interval_limit = Some(self.limits.request_interval_limit); + let points_limit = Some(self.limits.requested_points_limit); let serialized_chart = get_serialized_line_chart_data_resolution_dispatch( &self.db, chart_name.clone(), resolution, from, to, - interval_limit, + points_limit, policy, mark_approx, ) diff --git a/stats/stats-server/src/settings.rs b/stats/stats-server/src/settings.rs index 3608e5ea4..afa03cbaa 100644 --- a/stats/stats-server/src/settings.rs +++ b/stats/stats-server/src/settings.rs @@ -77,14 +77,14 @@ pub struct LimitsSettings { /// /// If start or end of the range is left empty, min/max values /// from DB are considered. - pub request_interval_limit_days: u32, + pub requested_points_limit: u32, } impl Default for LimitsSettings { fn default() -> Self { Self { - // ~500 years seems reasonable - request_interval_limit_days: 182500, + // ~500 years for days seems reasonable + requested_points_limit: 182500, } } } diff --git a/stats/stats/src/charts/chart.rs b/stats/stats/src/charts/chart.rs index c80c7e349..bc1f56d1b 100644 --- a/stats/stats/src/charts/chart.rs +++ b/stats/stats/src/charts/chart.rs @@ -7,11 +7,13 @@ use std::fmt::Display; use crate::{types::Timespan, ReadError}; -use chrono::{DateTime, Duration, Utc}; +use chrono::{DateTime, Utc}; use entity::sea_orm_active_enums::{ChartResolution, ChartType}; use sea_orm::prelude::*; use thiserror::Error; +use super::db_interaction::read::ApproxUnsignedDiff; + #[derive(Error, Debug)] pub enum UpdateError { #[error("blockscout database error: {0}")] @@ -20,8 +22,8 @@ pub enum UpdateError { StatsDB(DbErr), #[error("chart {0} not found")] ChartNotFound(ChartKey), - #[error("date interval limit ({limit}) is exceeded; choose smaller time interval.")] - IntervalLimitExceeded { limit: Duration }, + #[error("exceeded limit on requested data points (~{limit}); choose smaller time interval.")] + IntervalTooLarge { limit: u32 }, #[error("internal error: {0}")] Internal(String), } @@ -31,7 +33,7 @@ impl From for UpdateError { match read { ReadError::DB(db) => UpdateError::StatsDB(db), ReadError::ChartNotFound(err) => UpdateError::ChartNotFound(err), - ReadError::IntervalLimitExceeded(limit) => UpdateError::IntervalLimitExceeded { limit }, + ReadError::IntervalTooLarge(limit) => UpdateError::IntervalTooLarge { limit }, } } } @@ -142,7 +144,7 @@ impl Display for ChartKey { ))] pub trait ChartProperties: Sync + Named { /// Combination name + resolution must be unique for each chart - type Resolution: Timespan; + type Resolution: Timespan + ApproxUnsignedDiff; fn chart_type() -> ChartType; fn resolution() -> ResolutionKind { diff --git a/stats/stats/src/charts/db_interaction/read.rs b/stats/stats/src/charts/db_interaction/read.rs index 5210e7edb..494e0766c 100644 --- a/stats/stats/src/charts/db_interaction/read.rs +++ b/stats/stats/src/charts/db_interaction/read.rs @@ -3,14 +3,15 @@ use crate::{ data_source::kinds::local_db::parameter_traits::QueryBehaviour, missing_date::{fill_and_filter_chart, fit_into_range}, types::{ - timespans::DateValue, ExtendedTimespanValue, Timespan, TimespanDuration, TimespanValue, + timespans::{DateValue, Month, Week, Year}, + ExtendedTimespanValue, Timespan, TimespanDuration, TimespanValue, }, utils::exclusive_datetime_range_to_inclusive, ChartProperties, MissingDatePolicy, UpdateError, }; use blockscout_db::entity::blocks; -use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, Utc}; +use chrono::{DateTime, NaiveDate, NaiveDateTime, Utc}; use entity::{chart_data, charts, sea_orm_active_enums::ChartResolution}; use itertools::Itertools; use sea_orm::{ @@ -28,8 +29,8 @@ pub enum ReadError { DB(#[from] DbErr), #[error("chart {0} not found")] ChartNotFound(ChartKey), - #[error("date interval limit ({0}) is exceeded; choose smaller time interval.")] - IntervalLimitExceeded(Duration), + #[error("exceeded limit on requested data points (~{0}); choose smaller time interval.")] + IntervalTooLarge(u32), } #[derive(Debug, FromQueryResult)] @@ -208,7 +209,7 @@ fn relevant_data_until( /// /// `approximate_trailing_points` - number of trailing points to mark as approximate. /// -/// `interval_limit` - max interval [from, to]. If `from` or `to` are none, +/// `point_limit` - max interval [from, to]. If `from` or `to` are none, /// min or max date in DB are taken. /// /// Note: if some dates within interval `[from, to]` fall on the future, data points @@ -296,13 +297,13 @@ pub async fn get_line_chart_data( chart_name: &String, from: Option, to: Option, - interval_limit: Option, + point_limit: Option, policy: MissingDatePolicy, fill_missing_dates: bool, approximate_trailing_points: u64, ) -> Result>, ReadError> where - Resolution: Timespan + Debug + Ord + Clone, + Resolution: Timespan + ApproxUnsignedDiff + Debug + Ord + Clone, { let chart = charts::Entity::find() .column(charts::Column::Id) @@ -343,7 +344,7 @@ where let data_in_range = fit_into_range(db_data, from.clone(), to.clone(), policy); let data_unmarked = if fill_missing_dates { - fill_and_filter_chart(data_in_range, from, to, policy, interval_limit)? + fill_and_filter_chart(data_in_range, from, to, policy, point_limit)? } else { data_in_range }; @@ -563,6 +564,85 @@ where Ok(row) } +/// May not be exact, but the limit is close to +/// this number +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum RequestedPointsLimit { + Points(u32), + NoLimit, +} + +impl RequestedPointsLimit { + pub fn from_points(approx_limit: u32) -> Self { + Self::Points(approx_limit) + } + + pub fn approx_limit(&self) -> Option { + match self { + RequestedPointsLimit::Points(p) => Some(*p), + RequestedPointsLimit::NoLimit => None, + } + } + + pub fn fits_in_limit(&self, from: &T, to: &T) -> bool { + let limit = match self { + RequestedPointsLimit::Points(p) => *p, + RequestedPointsLimit::NoLimit => return true, + }; + to.approx_unsigned_difference(from) + .map(|diff| diff <= limit.into()) + .unwrap_or(true) + } +} + +pub trait ApproxUnsignedDiff { + /// Approx number of repeats of this timespan to get from `other` to `self`. + /// + /// `None` if < 0. + fn approx_unsigned_difference(&self, other: &Self) -> Option; +} + +impl ApproxUnsignedDiff for NaiveDate { + fn approx_unsigned_difference(&self, other: &Self) -> Option { + self.signed_duration_since(*other) + .num_days() + .try_into() + .ok() + } +} + +impl ApproxUnsignedDiff for Week { + fn approx_unsigned_difference(&self, other: &Self) -> Option { + self.saturating_first_day() + .signed_duration_since(other.saturating_first_day()) + .num_days() + .try_into() + .ok() + .map(|n: u64| n / 7) + } +} + +impl ApproxUnsignedDiff for Month { + fn approx_unsigned_difference(&self, other: &Self) -> Option { + self.saturating_first_day() + .signed_duration_since(other.saturating_first_day()) + .num_days() + .try_into() + .ok() + // 30.436875 = mean # of days in month (according to wiki) + .map(|n: u64| (n as f64 / 30.436875) as u64) + } +} + +impl ApproxUnsignedDiff for Year { + fn approx_unsigned_difference(&self, other: &Self) -> Option { + self.number_within_naive_date() + .saturating_sub(other.number_within_naive_date()) + .try_into() + .ok() + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/stats/stats/src/lib.rs b/stats/stats/src/lib.rs index e6ccfab68..6fd73cc4a 100644 --- a/stats/stats/src/lib.rs +++ b/stats/stats/src/lib.rs @@ -15,7 +15,9 @@ pub use migration; pub use charts::{ counters, - db_interaction::read::{get_line_chart_data, get_raw_counters, ReadError}, + db_interaction::read::{ + get_line_chart_data, get_raw_counters, ApproxUnsignedDiff, ReadError, RequestedPointsLimit, + }, lines, types, ChartKey, ChartProperties, ChartPropertiesObject, MissingDatePolicy, Named, ResolutionKind, UpdateError, }; diff --git a/stats/stats/src/missing_date.rs b/stats/stats/src/missing_date.rs index e78c59003..51763b612 100644 --- a/stats/stats/src/missing_date.rs +++ b/stats/stats/src/missing_date.rs @@ -2,10 +2,11 @@ use std::{fmt::Debug, ops::RangeInclusive}; use crate::{ + charts::db_interaction::read::{ApproxUnsignedDiff, RequestedPointsLimit}, types::{Timespan, TimespanValue, ZeroTimespanValue}, MissingDatePolicy, ReadError, }; -use chrono::{Duration, NaiveDate}; +use chrono::NaiveDate; /// Fits the `data` within the range (`from`, `to`), preserving /// information nearby the boundaries according to `policy`. @@ -79,13 +80,13 @@ pub fn fill_and_filter_chart( from: Option, to: Option, policy: MissingDatePolicy, - interval_limit: Option, + point_limit: Option, ) -> Result>, ReadError> where - Resolution: Timespan + Debug + Ord + Clone, + Resolution: Timespan + ApproxUnsignedDiff + Debug + Ord + Clone, { let retrieved_count = data.len(); - let data_filled = fill_missing_points(data, policy, from.clone(), to.clone(), interval_limit)?; + let data_filled = fill_missing_points(data, policy, from.clone(), to.clone(), point_limit)?; if let Some(filled_count) = data_filled.len().checked_sub(retrieved_count) { if filled_count > 0 { tracing::debug!(policy = ?policy, "{} missing points were filled", filled_count); @@ -110,10 +111,10 @@ pub fn fill_missing_points( policy: MissingDatePolicy, from: Option, to: Option, - interval_limit: Option, + points_limit: Option, ) -> Result>, ReadError> where - T: Timespan + Ord + Clone, + T: Timespan + ApproxUnsignedDiff + Ord + Clone, { let from = vec![from.as_ref(), data.first().map(|v| &v.timespan)] .into_iter() @@ -129,9 +130,11 @@ where _ => return Ok(data), }; - if let Some(interval_limit) = interval_limit { - if to.clone().into_date() - from.clone().into_date() > interval_limit { - return Err(ReadError::IntervalLimitExceeded(interval_limit)); + if let Some(points_limit) = points_limit { + if let Some(limit_to_report) = points_limit.approx_limit() { + if !points_limit.fits_in_limit(&from, &to) { + return Err(ReadError::IntervalTooLarge(limit_to_report)); + } } } @@ -566,7 +569,8 @@ mod tests { #[test] fn limits_are_respected() { - let limit = Duration::days(4); + let n = 4; + let limit = RequestedPointsLimit::Points(n); assert_eq!( fill_missing_points( vec![ @@ -579,7 +583,7 @@ mod tests { Some(d("2023-07-12")), Some(limit) ), - Err(ReadError::IntervalLimitExceeded(limit)) + Err(ReadError::IntervalTooLarge(n)) ); assert_eq!( fill_missing_points( @@ -613,7 +617,7 @@ mod tests { Some(d("2023-07-15")), Some(limit) ), - Err(ReadError::IntervalLimitExceeded(limit)) + Err(ReadError::IntervalTooLarge(n)) ); assert_eq!( fill_missing_points( @@ -627,7 +631,7 @@ mod tests { Some(d("2023-07-14")), Some(limit) ), - Err(ReadError::IntervalLimitExceeded(limit)) + Err(ReadError::IntervalTooLarge(n)) ); }