@@ -226,6 +226,8 @@ enum {
226226 MARGIN_TARGET_PCT = 50 ,
227227 MARGIN_MAX_PCT = 100 ,
228228
229+ INUSE_ADJ_STEP_PCT = 25 ,
230+
229231 /* Have some play in timer operations */
230232 TIMER_SLACK_PCT = 1 ,
231233
@@ -443,12 +445,17 @@ struct ioc_gq {
443445 *
444446 * `last_inuse` remembers `inuse` while an iocg is idle to persist
445447 * surplus adjustments.
448+ *
449+ * `inuse` may be adjusted dynamically during period. `saved_*` are used
450+ * to determine and track adjustments.
446451 */
447452 u32 cfg_weight ;
448453 u32 weight ;
449454 u32 active ;
450455 u32 inuse ;
456+
451457 u32 last_inuse ;
458+ s64 saved_margin ;
452459
453460 sector_t cursor ; /* to detect randio */
454461
@@ -934,9 +941,11 @@ static void ioc_start_period(struct ioc *ioc, struct ioc_now *now)
934941
935942/*
936943 * Update @iocg's `active` and `inuse` to @active and @inuse, update level
937- * weight sums and propagate upwards accordingly.
944+ * weight sums and propagate upwards accordingly. If @save, the current margin
945+ * is saved to be used as reference for later inuse in-period adjustments.
938946 */
939- static void __propagate_weights (struct ioc_gq * iocg , u32 active , u32 inuse )
947+ static void __propagate_weights (struct ioc_gq * iocg , u32 active , u32 inuse ,
948+ bool save , struct ioc_now * now )
940949{
941950 struct ioc * ioc = iocg -> ioc ;
942951 int lvl ;
@@ -945,6 +954,10 @@ static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse)
945954
946955 inuse = clamp_t (u32 , inuse , 1 , active );
947956
957+ iocg -> last_inuse = iocg -> inuse ;
958+ if (save )
959+ iocg -> saved_margin = now -> vnow - atomic64_read (& iocg -> vtime );
960+
948961 if (active == iocg -> active && inuse == iocg -> inuse )
949962 return ;
950963
@@ -996,9 +1009,10 @@ static void commit_weights(struct ioc *ioc)
9961009 }
9971010}
9981011
999- static void propagate_weights (struct ioc_gq * iocg , u32 active , u32 inuse )
1012+ static void propagate_weights (struct ioc_gq * iocg , u32 active , u32 inuse ,
1013+ bool save , struct ioc_now * now )
10001014{
1001- __propagate_weights (iocg , active , inuse );
1015+ __propagate_weights (iocg , active , inuse , save , now );
10021016 commit_weights (iocg -> ioc );
10031017}
10041018
@@ -1082,7 +1096,7 @@ static u32 current_hweight_max(struct ioc_gq *iocg)
10821096 return max_t (u32 , hwm , 1 );
10831097}
10841098
1085- static void weight_updated (struct ioc_gq * iocg )
1099+ static void weight_updated (struct ioc_gq * iocg , struct ioc_now * now )
10861100{
10871101 struct ioc * ioc = iocg -> ioc ;
10881102 struct blkcg_gq * blkg = iocg_to_blkg (iocg );
@@ -1093,9 +1107,7 @@ static void weight_updated(struct ioc_gq *iocg)
10931107
10941108 weight = iocg -> cfg_weight ?: iocc -> dfl_weight ;
10951109 if (weight != iocg -> weight && iocg -> active )
1096- propagate_weights (iocg , weight ,
1097- DIV64_U64_ROUND_UP ((u64 )iocg -> inuse * weight ,
1098- iocg -> weight ));
1110+ propagate_weights (iocg , weight , iocg -> inuse , true, now );
10991111 iocg -> weight = weight ;
11001112}
11011113
@@ -1165,8 +1177,9 @@ static bool iocg_activate(struct ioc_gq *iocg, struct ioc_now *now)
11651177 */
11661178 iocg -> hweight_gen = atomic_read (& ioc -> hweight_gen ) - 1 ;
11671179 list_add (& iocg -> active_list , & ioc -> active_iocgs );
1180+
11681181 propagate_weights (iocg , iocg -> weight ,
1169- iocg -> last_inuse ?: iocg -> weight );
1182+ iocg -> last_inuse ?: iocg -> weight , true, now );
11701183
11711184 TRACE_IOCG_PATH (iocg_activate , iocg , now ,
11721185 last_period , cur_period , vtime );
@@ -1789,7 +1802,7 @@ static void transfer_surpluses(struct list_head *surpluses, struct ioc_now *now)
17891802 inuse = DIV64_U64_ROUND_UP (
17901803 parent -> child_adjusted_sum * iocg -> hweight_after_donation ,
17911804 parent -> hweight_inuse );
1792- __propagate_weights (iocg , iocg -> active , inuse );
1805+ __propagate_weights (iocg , iocg -> active , inuse , true, now );
17931806 }
17941807
17951808 /* walk list should be dissolved after use */
@@ -1844,8 +1857,7 @@ static void ioc_timer_fn(struct timer_list *timer)
18441857 iocg_kick_waitq (iocg , true, & now );
18451858 } else if (iocg_is_idle (iocg )) {
18461859 /* no waiter and idle, deactivate */
1847- iocg -> last_inuse = iocg -> inuse ;
1848- __propagate_weights (iocg , 0 , 0 );
1860+ __propagate_weights (iocg , 0 , 0 , false, & now );
18491861 list_del_init (& iocg -> active_list );
18501862 }
18511863
@@ -1925,7 +1937,7 @@ static void ioc_timer_fn(struct timer_list *timer)
19251937 list_add (& iocg -> surplus_list , & surpluses );
19261938 } else {
19271939 __propagate_weights (iocg , iocg -> active ,
1928- iocg -> active );
1940+ iocg -> active , true, & now );
19291941 nr_shortages ++ ;
19301942 }
19311943 } else {
@@ -2055,6 +2067,50 @@ static void ioc_timer_fn(struct timer_list *timer)
20552067 spin_unlock_irq (& ioc -> lock );
20562068}
20572069
2070+ static u64 adjust_inuse_and_calc_cost (struct ioc_gq * iocg , u64 vtime ,
2071+ u64 abs_cost , struct ioc_now * now )
2072+ {
2073+ struct ioc * ioc = iocg -> ioc ;
2074+ struct ioc_margins * margins = & ioc -> margins ;
2075+ u32 adj_step = DIV_ROUND_UP (iocg -> active * INUSE_ADJ_STEP_PCT , 100 );
2076+ u32 hwi ;
2077+ s64 margin ;
2078+ u64 cost , new_inuse ;
2079+
2080+ current_hweight (iocg , NULL , & hwi );
2081+ cost = abs_cost_to_cost (abs_cost , hwi );
2082+ margin = now -> vnow - vtime - cost ;
2083+
2084+ /*
2085+ * We only increase inuse during period and do so iff the margin has
2086+ * deteriorated since the previous adjustment.
2087+ */
2088+ if (margin >= iocg -> saved_margin || margin >= margins -> low ||
2089+ iocg -> inuse == iocg -> active )
2090+ return cost ;
2091+
2092+ spin_lock_irq (& ioc -> lock );
2093+
2094+ /* we own inuse only when @iocg is in the normal active state */
2095+ if (list_empty (& iocg -> active_list )) {
2096+ spin_unlock_irq (& ioc -> lock );
2097+ return cost ;
2098+ }
2099+
2100+ /* bump up inuse till @abs_cost fits in the existing budget */
2101+ new_inuse = iocg -> inuse ;
2102+ do {
2103+ new_inuse = new_inuse + adj_step ;
2104+ propagate_weights (iocg , iocg -> active , new_inuse , true, now );
2105+ current_hweight (iocg , NULL , & hwi );
2106+ cost = abs_cost_to_cost (abs_cost , hwi );
2107+ } while (time_after64 (vtime + cost , now -> vnow ) &&
2108+ iocg -> inuse != iocg -> active );
2109+
2110+ spin_unlock_irq (& ioc -> lock );
2111+ return cost ;
2112+ }
2113+
20582114static void calc_vtime_cost_builtin (struct bio * bio , struct ioc_gq * iocg ,
20592115 bool is_merge , u64 * costp )
20602116{
@@ -2136,7 +2192,6 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
21362192 struct ioc_gq * iocg = blkg_to_iocg (blkg );
21372193 struct ioc_now now ;
21382194 struct iocg_wait wait ;
2139- u32 hw_active , hw_inuse ;
21402195 u64 abs_cost , cost , vtime ;
21412196 bool use_debt , ioc_locked ;
21422197 unsigned long flags ;
@@ -2154,21 +2209,8 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
21542209 return ;
21552210
21562211 iocg -> cursor = bio_end_sector (bio );
2157-
21582212 vtime = atomic64_read (& iocg -> vtime );
2159- current_hweight (iocg , & hw_active , & hw_inuse );
2160-
2161- if (hw_inuse < hw_active &&
2162- time_after_eq64 (vtime + ioc -> margins .min , now .vnow )) {
2163- TRACE_IOCG_PATH (inuse_reset , iocg , & now ,
2164- iocg -> inuse , iocg -> weight , hw_inuse , hw_active );
2165- spin_lock_irq (& ioc -> lock );
2166- propagate_weights (iocg , iocg -> weight , iocg -> weight );
2167- spin_unlock_irq (& ioc -> lock );
2168- current_hweight (iocg , & hw_active , & hw_inuse );
2169- }
2170-
2171- cost = abs_cost_to_cost (abs_cost , hw_inuse );
2213+ cost = adjust_inuse_and_calc_cost (iocg , vtime , abs_cost , & now );
21722214
21732215 /*
21742216 * If no one's waiting and within budget, issue right away. The
@@ -2190,7 +2232,7 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
21902232 */
21912233 use_debt = bio_issue_as_root_blkg (bio ) || fatal_signal_pending (current );
21922234 ioc_locked = use_debt || READ_ONCE (iocg -> abs_vdebt );
2193-
2235+ retry_lock :
21942236 iocg_lock (iocg , ioc_locked , & flags );
21952237
21962238 /*
@@ -2232,6 +2274,17 @@ static void ioc_rqos_throttle(struct rq_qos *rqos, struct bio *bio)
22322274 return ;
22332275 }
22342276
2277+ /* guarantee that iocgs w/ waiters have maximum inuse */
2278+ if (iocg -> inuse != iocg -> active ) {
2279+ if (!ioc_locked ) {
2280+ iocg_unlock (iocg , false, & flags );
2281+ ioc_locked = true;
2282+ goto retry_lock ;
2283+ }
2284+ propagate_weights (iocg , iocg -> active , iocg -> active , true,
2285+ & now );
2286+ }
2287+
22352288 /*
22362289 * Append self to the waitq and schedule the wakeup timer if we're
22372290 * the first waiter. The timer duration is calculated based on the
@@ -2274,8 +2327,7 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
22742327 struct ioc * ioc = iocg -> ioc ;
22752328 sector_t bio_end = bio_end_sector (bio );
22762329 struct ioc_now now ;
2277- u32 hw_inuse ;
2278- u64 abs_cost , cost ;
2330+ u64 vtime , abs_cost , cost ;
22792331 unsigned long flags ;
22802332
22812333 /* bypass if disabled or for root cgroup */
@@ -2287,8 +2339,9 @@ static void ioc_rqos_merge(struct rq_qos *rqos, struct request *rq,
22872339 return ;
22882340
22892341 ioc_now (ioc , & now );
2290- current_hweight (iocg , NULL , & hw_inuse );
2291- cost = abs_cost_to_cost (abs_cost , hw_inuse );
2342+
2343+ vtime = atomic64_read (& iocg -> vtime );
2344+ cost = adjust_inuse_and_calc_cost (iocg , vtime , abs_cost , & now );
22922345
22932346 /* update cursor if backmerging into the request at the cursor */
22942347 if (blk_rq_pos (rq ) < bio_end &&
@@ -2530,7 +2583,7 @@ static void ioc_pd_init(struct blkg_policy_data *pd)
25302583 }
25312584
25322585 spin_lock_irqsave (& ioc -> lock , flags );
2533- weight_updated (iocg );
2586+ weight_updated (iocg , & now );
25342587 spin_unlock_irqrestore (& ioc -> lock , flags );
25352588}
25362589
@@ -2544,7 +2597,10 @@ static void ioc_pd_free(struct blkg_policy_data *pd)
25442597 spin_lock_irqsave (& ioc -> lock , flags );
25452598
25462599 if (!list_empty (& iocg -> active_list )) {
2547- propagate_weights (iocg , 0 , 0 );
2600+ struct ioc_now now ;
2601+
2602+ ioc_now (ioc , & now );
2603+ propagate_weights (iocg , 0 , 0 , false, & now );
25482604 list_del_init (& iocg -> active_list );
25492605 }
25502606
@@ -2612,6 +2668,7 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
26122668 struct blkcg * blkcg = css_to_blkcg (of_css (of ));
26132669 struct ioc_cgrp * iocc = blkcg_to_iocc (blkcg );
26142670 struct blkg_conf_ctx ctx ;
2671+ struct ioc_now now ;
26152672 struct ioc_gq * iocg ;
26162673 u32 v ;
26172674 int ret ;
@@ -2632,7 +2689,8 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
26322689
26332690 if (iocg ) {
26342691 spin_lock_irq (& iocg -> ioc -> lock );
2635- weight_updated (iocg );
2692+ ioc_now (iocg -> ioc , & now );
2693+ weight_updated (iocg , & now );
26362694 spin_unlock_irq (& iocg -> ioc -> lock );
26372695 }
26382696 }
@@ -2658,7 +2716,8 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
26582716
26592717 spin_lock (& iocg -> ioc -> lock );
26602718 iocg -> cfg_weight = v * WEIGHT_ONE ;
2661- weight_updated (iocg );
2719+ ioc_now (iocg -> ioc , & now );
2720+ weight_updated (iocg , & now );
26622721 spin_unlock (& iocg -> ioc -> lock );
26632722
26642723 blkg_conf_finish (& ctx );
0 commit comments