3939import java .util .HashSet ;
4040import java .util .List ;
4141import java .util .Map ;
42+ import java .util .Optional ;
4243import java .util .Set ;
4344import java .util .concurrent .CompletableFuture ;
4445import java .util .stream .Collectors ;
@@ -67,6 +68,8 @@ public class AutoForceMergeManager extends AbstractLifecycleComponent {
6768 private NodeValidator nodeValidator ;
6869 private ShardValidator shardValidator ;
6970 private Integer allocatedProcessors ;
71+ private String nodeId ;
72+ private final AutoForceMergeMetrics autoForceMergeMetrics ;
7073 private ResourceTrackerProvider .ResourceTrackers resourceTrackers ;
7174 private ForceMergeManagerSettings forceMergeManagerSettings ;
7275 private final CommonStatsFlags flags = new CommonStatsFlags (CommonStatsFlags .Flag .Segments , CommonStatsFlags .Flag .Translog );
@@ -78,14 +81,16 @@ public AutoForceMergeManager(
7881 ThreadPool threadPool ,
7982 MonitorService monitorService ,
8083 IndicesService indicesService ,
81- ClusterService clusterService
84+ ClusterService clusterService ,
85+ AutoForceMergeMetrics autoForceMergeMetrics
8286 ) {
8387 this .threadPool = threadPool ;
8488 this .osService = monitorService .osService ();
8589 this .fsService = monitorService .fsService ();
8690 this .jvmService = monitorService .jvmService ();
8791 this .clusterService = clusterService ;
8892 this .indicesService = indicesService ;
93+ this .autoForceMergeMetrics = autoForceMergeMetrics ;
8994 this .mergingShards = new HashSet <>();
9095 }
9196
@@ -98,6 +103,7 @@ protected void doStart() {
98103 this .shardValidator = new ShardValidator ();
99104 this .allocatedProcessors = OpenSearchExecutors .allocatedProcessors (clusterService .getSettings ());
100105 this .resourceTrackers = ResourceTrackerProvider .create (threadPool );
106+ this .nodeId = clusterService .localNode ().getId ();
101107 }
102108
103109 @ Override
@@ -119,20 +125,39 @@ private void modifySchedulerInterval(TimeValue schedulerInterval) {
119125 }
120126
121127 private void triggerForceMerge () {
122- if (isValidForForceMerge () == false ) {
123- return ;
128+ long startTime = System .currentTimeMillis ();
129+ try {
130+ if (isValidForForceMerge () == false ) {
131+ return ;
132+ }
133+ executeForceMergeOnShards ();
134+ } finally {
135+ autoForceMergeMetrics .recordInHistogram (
136+ autoForceMergeMetrics .schedulerExecutionTime ,
137+ (double ) System .currentTimeMillis () - startTime ,
138+ autoForceMergeMetrics .getTags (Optional .of (nodeId ), Optional .empty ())
139+ );
124140 }
125- executeForceMergeOnShards ();
126141 }
127142
128143 private boolean isValidForForceMerge () {
129144 if (configurationValidator .hasWarmNodes () == false ) {
130145 resourceTrackers .stop ();
131146 logger .debug ("No warm nodes found. Skipping Auto Force merge." );
147+ autoForceMergeMetrics .incrementCounter (
148+ autoForceMergeMetrics .skipsFromConfigValidator ,
149+ 1.0 ,
150+ autoForceMergeMetrics .getTags (Optional .of (nodeId ), Optional .empty ())
151+ );
132152 return false ;
133153 }
134154 if (nodeValidator .validate ().isAllowed () == false ) {
135155 logger .debug ("Node capacity constraints are not allowing to trigger auto ForceMerge" );
156+ autoForceMergeMetrics .incrementCounter (
157+ autoForceMergeMetrics .skipsFromNodeValidator ,
158+ 1.0 ,
159+ autoForceMergeMetrics .getTags (Optional .of (nodeId ), Optional .empty ())
160+ );
136161 return false ;
137162 }
138163 return true ;
@@ -157,13 +182,47 @@ private void executeForceMergeOnShards() {
157182
158183 private void executeForceMergeForShard (IndexShard shard ) {
159184 CompletableFuture .runAsync (() -> {
185+ long startTime = System .currentTimeMillis ();
160186 try {
161187 mergingShards .add (shard .shardId ().getId ());
188+ autoForceMergeMetrics .incrementCounter (
189+ autoForceMergeMetrics .mergesTriggered ,
190+ 1.0 ,
191+ autoForceMergeMetrics .getTags (Optional .of (nodeId ), Optional .empty ())
192+ );
193+
194+ CommonStats stats = new CommonStats (indicesService .getIndicesQueryCache (), shard , flags );
195+ if (stats .getSegments () != null ) {
196+ autoForceMergeMetrics .incrementCounter (
197+ autoForceMergeMetrics .segmentCount ,
198+ (double ) stats .getSegments ().getCount (),
199+ autoForceMergeMetrics .getTags (Optional .of (nodeId ), Optional .of (String .valueOf (shard .shardId ().getId ())))
200+ );
201+ }
202+
203+ long shardSizeInBytes = shard .store ().stats (0L ).sizeInBytes ();
204+ autoForceMergeMetrics .incrementCounter (
205+ autoForceMergeMetrics .shardSize ,
206+ (double ) shardSizeInBytes ,
207+ autoForceMergeMetrics .getTags (Optional .of (nodeId ), Optional .of (String .valueOf (shard .shardId ().getId ())))
208+ );
209+
162210 shard .forceMerge (new ForceMergeRequest ().maxNumSegments (forceMergeManagerSettings .getSegmentCount ()));
211+
163212 logger .debug ("Merging is completed successfully for the shard {}" , shard .shardId ());
164213 } catch (Exception e ) {
214+ autoForceMergeMetrics .incrementCounter (
215+ autoForceMergeMetrics .mergesFailed ,
216+ 1.0 ,
217+ autoForceMergeMetrics .getTags (Optional .of (nodeId ), Optional .empty ())
218+ );
165219 logger .error ("Error during force merge for shard {}\n Exception: {}" , shard .shardId (), e );
166220 } finally {
221+ autoForceMergeMetrics .recordInHistogram (
222+ autoForceMergeMetrics .shardMergeLatency ,
223+ (double ) System .currentTimeMillis () - startTime ,
224+ autoForceMergeMetrics .getTags (Optional .of (nodeId ), Optional .of (String .valueOf (shard .shardId ().getId ())))
225+ );
167226 mergingShards .remove (shard .shardId ().getId ());
168227 }
169228 }, threadPool .executor (ThreadPool .Names .FORCE_MERGE ));
0 commit comments