Revert "Redefine Drop Index as logical delete (opensearch-project#2386)…

… (opensearch-project#2397)" This reverts commit e939bb6. Signed-off-by: Eric <menwe@amazon.com>
mengweieric · Nov 8, 2023 · d737bfd · d737bfd
1 parent b7ca9c4
commit d737bfd
Show file tree

Hide file tree

Showing 45 changed files with 769 additions and 2,269 deletions.
diff --git a/common/src/main/java/org/opensearch/sql/common/setting/Settings.java b/common/src/main/java/org/opensearch/sql/common/setting/Settings.java
@@ -5,6 +5,8 @@
 
 package org.opensearch.sql.common.setting;
 
+import static org.opensearch.sql.common.setting.Settings.Key.SPARK_EXECUTION_SESSION_ENABLED;
+
 import com.google.common.base.Strings;
 import com.google.common.collect.ImmutableMap;
 import java.util.List;
@@ -38,8 +40,8 @@ public enum Key {
     METRICS_ROLLING_INTERVAL("plugins.query.metrics.rolling_interval"),
     SPARK_EXECUTION_ENGINE_CONFIG("plugins.query.executionengine.spark.config"),
     CLUSTER_NAME("cluster.name"),
+    SPARK_EXECUTION_SESSION_ENABLED("plugins.query.executionengine.spark.session.enabled"),
     SPARK_EXECUTION_SESSION_LIMIT("plugins.query.executionengine.spark.session.limit"),
-    SPARK_EXECUTION_REFRESH_JOB_LIMIT("plugins.query.executionengine.spark.refresh_job.limit"),
     SESSION_INDEX_TTL("plugins.query.executionengine.spark.session.index.ttl"),
     RESULT_INDEX_TTL("plugins.query.executionengine.spark.result.index.ttl"),
     AUTO_INDEX_MANAGEMENT_ENABLED(
@@ -67,4 +69,9 @@ public static Optional<Key> of(String keyValue) {
   public abstract <T> T getSettingValue(Key key);
 
   public abstract List<?> getSettings();
+
+  /** Helper class */
+  public static boolean isSparkExecutionSessionEnabled(Settings settings) {
+    return settings.getSettingValue(SPARK_EXECUTION_SESSION_ENABLED);
+  }
 }
diff --git a/docs/user/admin/settings.rst b/docs/user/admin/settings.rst
@@ -311,16 +311,15 @@ SQL query::
       "status": 400
     }
 
-
-plugins.query.executionengine.spark.session.limit
-==================================================
+plugins.query.executionengine.spark.session.enabled
+===================================================
 
 Description
 -----------
 
-Each cluster can have maximum 100 sessions running in parallel by default. You can increase limit by this setting.
+By default, execution engine is executed in session mode. You can disable session mode by this setting.
 
-1. The default value is 100.
+1. The default value is true.
 2. This setting is node scope.
 3. This setting can be updated dynamically.
 
@@ -329,7 +328,7 @@ You can update the setting with a new value like this.
 SQL query::
 
     sh$ curl -sS -H 'Content-Type: application/json' -X PUT localhost:9200/_plugins/_query/settings \
-    ... -d '{"transient":{"plugins.query.executionengine.spark.session.limit":200}}'
+    ... -d '{"transient":{"plugins.query.executionengine.spark.session.enabled":"false"}}'
     {
       "acknowledged": true,
       "persistent": {},
@@ -339,7 +338,7 @@ SQL query::
             "executionengine": {
               "spark": {
                 "session": {
-                  "limit": "200"
+                  "enabled": "false"
                 }
               }
             }
@@ -348,16 +347,15 @@ SQL query::
       }
     }
 
-
-plugins.query.executionengine.spark.refresh_job.limit
-=====================================================
+plugins.query.executionengine.spark.session.limit
+==================================================
 
 Description
 -----------
 
-Each cluster can have maximum 20 datasources. You can increase limit by this setting.
+Each cluster can have maximum 100 sessions running in parallel by default. You can increase limit by this setting.
 
-1. The default value is 20.
+1. The default value is 100.
 2. This setting is node scope.
 3. This setting can be updated dynamically.
 
@@ -366,7 +364,7 @@ You can update the setting with a new value like this.
 SQL query::
 
     sh$ curl -sS -H 'Content-Type: application/json' -X PUT localhost:9200/_plugins/_query/settings \
-    ... -d '{"transient":{"plugins.query.executionengine.spark.refresh_job.limit":200}}'
+    ... -d '{"transient":{"plugins.query.executionengine.spark.session.limit":200}}'
     {
       "acknowledged": true,
       "persistent": {},
@@ -375,7 +373,7 @@ SQL query::
           "query": {
             "executionengine": {
               "spark": {
-                "refresh_job": {
+                "session": {
                   "limit": "200"
                 }
               }

diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/setting/OpenSearchSettings.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/setting/OpenSearchSettings.java
@@ -137,17 +137,17 @@ public class OpenSearchSettings extends Settings {
           Setting.Property.NodeScope,
           Setting.Property.Dynamic);
 
-  public static final Setting<?> SPARK_EXECUTION_SESSION_LIMIT_SETTING =
-      Setting.intSetting(
-          Key.SPARK_EXECUTION_SESSION_LIMIT.getKeyValue(),
-          100,
+  public static final Setting<?> SPARK_EXECUTION_SESSION_ENABLED_SETTING =
+      Setting.boolSetting(
+          Key.SPARK_EXECUTION_SESSION_ENABLED.getKeyValue(),
+          true,
           Setting.Property.NodeScope,
           Setting.Property.Dynamic);
 
-  public static final Setting<?> SPARK_EXECUTION_REFRESH_JOB_LIMIT_SETTING =
+  public static final Setting<?> SPARK_EXECUTION_SESSION_LIMIT_SETTING =
       Setting.intSetting(
-          Key.SPARK_EXECUTION_REFRESH_JOB_LIMIT.getKeyValue(),
-          50,
+          Key.SPARK_EXECUTION_SESSION_LIMIT.getKeyValue(),
+          100,
           Setting.Property.NodeScope,
           Setting.Property.Dynamic);
 
@@ -252,15 +252,15 @@ public OpenSearchSettings(ClusterSettings clusterSettings) {
     register(
         settingBuilder,
         clusterSettings,
-        Key.SPARK_EXECUTION_SESSION_LIMIT,
-        SPARK_EXECUTION_SESSION_LIMIT_SETTING,
-        new Updater(Key.SPARK_EXECUTION_SESSION_LIMIT));
+        Key.SPARK_EXECUTION_SESSION_ENABLED,
+        SPARK_EXECUTION_SESSION_ENABLED_SETTING,
+        new Updater(Key.SPARK_EXECUTION_SESSION_ENABLED));
     register(
         settingBuilder,
         clusterSettings,
-        Key.SPARK_EXECUTION_REFRESH_JOB_LIMIT,
-        SPARK_EXECUTION_REFRESH_JOB_LIMIT_SETTING,
-        new Updater(Key.SPARK_EXECUTION_REFRESH_JOB_LIMIT));
+        Key.SPARK_EXECUTION_SESSION_LIMIT,
+        SPARK_EXECUTION_SESSION_LIMIT_SETTING,
+        new Updater(Key.SPARK_EXECUTION_SESSION_LIMIT));
     register(
         settingBuilder,
         clusterSettings,
@@ -350,8 +350,8 @@ public static List<Setting<?>> pluginSettings() {
         .add(METRICS_ROLLING_INTERVAL_SETTING)
         .add(DATASOURCE_URI_HOSTS_DENY_LIST)
         .add(SPARK_EXECUTION_ENGINE_CONFIG)
+        .add(SPARK_EXECUTION_SESSION_ENABLED_SETTING)
         .add(SPARK_EXECUTION_SESSION_LIMIT_SETTING)
-        .add(SPARK_EXECUTION_REFRESH_JOB_LIMIT_SETTING)
         .add(SESSION_INDEX_TTL_SETTING)
         .add(RESULT_INDEX_TTL_SETTING)
         .add(AUTO_INDEX_MANAGEMENT_ENABLED_SETTING)

diff --git a/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java b/plugin/src/main/java/org/opensearch/sql/plugin/SQLPlugin.java
@@ -335,8 +335,7 @@ private AsyncQueryExecutorService createAsyncQueryExecutorService(
             new FlintIndexMetadataReaderImpl(client),
             client,
             new SessionManager(stateStore, emrServerlessClient, pluginSettings),
-            new DefaultLeaseManager(pluginSettings, stateStore),
-            stateStore);
+            new DefaultLeaseManager(pluginSettings, stateStore));
     return new AsyncQueryExecutorServiceImpl(
         asyncQueryJobMetadataStorageService,
         sparkQueryDispatcher,

diff --git a/spark/build.gradle b/spark/build.gradle
@@ -123,7 +123,6 @@ jacocoTestCoverageVerification {
                     'org.opensearch.sql.spark.execution.statestore.StateStore',
                     'org.opensearch.sql.spark.execution.session.SessionModel',
                     'org.opensearch.sql.spark.execution.statement.StatementModel',
-                    'org.opensearch.sql.spark.flint.FlintIndexStateModel',
                     // TODO: add tests for purging flint indices
                     'org.opensearch.sql.spark.cluster.ClusterManagerEventListener*',
                     'org.opensearch.sql.spark.cluster.FlintIndexRetention',

diff --git a/spark/src/main/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceImpl.java b/spark/src/main/java/org/opensearch/sql/spark/asyncquery/AsyncQueryExecutorServiceImpl.java
@@ -72,6 +72,7 @@ public CreateAsyncQueryResponse createAsyncQuery(
             dispatchQueryResponse.getQueryId(),
             sparkExecutionEngineConfig.getApplicationId(),
             dispatchQueryResponse.getJobId(),
+            dispatchQueryResponse.isDropIndexQuery(),
             dispatchQueryResponse.getResultIndex(),
             dispatchQueryResponse.getSessionId()));
     return new CreateAsyncQueryResponse(

diff --git a/spark/src/main/java/org/opensearch/sql/spark/asyncquery/model/AsyncQueryJobMetadata.java b/spark/src/main/java/org/opensearch/sql/spark/asyncquery/model/AsyncQueryJobMetadata.java
@@ -29,6 +29,7 @@ public class AsyncQueryJobMetadata extends StateModel {
   private final AsyncQueryId queryId;
   private final String applicationId;
   private final String jobId;
+  private final boolean isDropIndexQuery;
   private final String resultIndex;
   // optional sessionId.
   private final String sessionId;
@@ -42,6 +43,7 @@ public AsyncQueryJobMetadata(
         queryId,
         applicationId,
         jobId,
+        false,
         resultIndex,
         null,
         SequenceNumbers.UNASSIGNED_SEQ_NO,
@@ -52,12 +54,14 @@ public AsyncQueryJobMetadata(
       AsyncQueryId queryId,
       String applicationId,
       String jobId,
+      boolean isDropIndexQuery,
       String resultIndex,
       String sessionId) {
     this(
         queryId,
         applicationId,
         jobId,
+        isDropIndexQuery,
         resultIndex,
         sessionId,
         SequenceNumbers.UNASSIGNED_SEQ_NO,
@@ -68,13 +72,15 @@ public AsyncQueryJobMetadata(
       AsyncQueryId queryId,
       String applicationId,
       String jobId,
+      boolean isDropIndexQuery,
       String resultIndex,
       String sessionId,
       long seqNo,
       long primaryTerm) {
     this.queryId = queryId;
     this.applicationId = applicationId;
     this.jobId = jobId;
+    this.isDropIndexQuery = isDropIndexQuery;
     this.resultIndex = resultIndex;
     this.sessionId = sessionId;
     this.seqNo = seqNo;
@@ -100,6 +106,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
         .field("type", TYPE_JOBMETA)
         .field("jobId", jobId)
         .field("applicationId", applicationId)
+        .field("isDropIndexQuery", isDropIndexQuery)
         .field("resultIndex", resultIndex)
         .field("sessionId", sessionId)
         .endObject();
@@ -113,6 +120,7 @@ public static AsyncQueryJobMetadata copy(
         copy.getQueryId(),
         copy.getApplicationId(),
         copy.getJobId(),
+        copy.isDropIndexQuery(),
         copy.getResultIndex(),
         copy.getSessionId(),
         seqNo,
@@ -168,7 +176,14 @@ public static AsyncQueryJobMetadata fromXContent(
       throw new IllegalArgumentException("jobId and applicationId are required fields.");
     }
     return new AsyncQueryJobMetadata(
-        queryId, applicationId, jobId, resultIndex, sessionId, seqNo, primaryTerm);
+        queryId,
+        applicationId,
+        jobId,
+        isDropIndexQuery,
+        resultIndex,
+        sessionId,
+        seqNo,
+        primaryTerm);
   }
 
   @Override

diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/AsyncQueryHandler.java b/spark/src/main/java/org/opensearch/sql/spark/dispatcher/AsyncQueryHandler.java
@@ -20,6 +20,11 @@
 public abstract class AsyncQueryHandler {
 
   public JSONObject getQueryResponse(AsyncQueryJobMetadata asyncQueryJobMetadata) {
+    if (asyncQueryJobMetadata.isDropIndexQuery()) {
+      return SparkQueryDispatcher.DropIndexResult.fromJobId(asyncQueryJobMetadata.getJobId())
+          .result();
+    }
+
     JSONObject result = getResponseFromResultIndex(asyncQueryJobMetadata);
     if (result.has(DATA_FIELD)) {
       JSONObject items = result.getJSONObject(DATA_FIELD);

diff --git a/spark/src/main/java/org/opensearch/sql/spark/dispatcher/BatchQueryHandler.java b/spark/src/main/java/org/opensearch/sql/spark/dispatcher/BatchQueryHandler.java
@@ -22,15 +22,12 @@
 import org.opensearch.sql.spark.dispatcher.model.DispatchQueryRequest;
 import org.opensearch.sql.spark.dispatcher.model.DispatchQueryResponse;
 import org.opensearch.sql.spark.dispatcher.model.JobType;
-import org.opensearch.sql.spark.leasemanager.LeaseManager;
-import org.opensearch.sql.spark.leasemanager.model.LeaseRequest;
 import org.opensearch.sql.spark.response.JobExecutionResponseReader;
 
 @RequiredArgsConstructor
 public class BatchQueryHandler extends AsyncQueryHandler {
   private final EMRServerlessClient emrServerlessClient;
   private final JobExecutionResponseReader jobExecutionResponseReader;
-  protected final LeaseManager leaseManager;
 
   @Override
   protected JSONObject getResponseFromResultIndex(AsyncQueryJobMetadata asyncQueryJobMetadata) {
@@ -63,8 +60,6 @@ public String cancelJob(AsyncQueryJobMetadata asyncQueryJobMetadata) {
   @Override
   public DispatchQueryResponse submit(
       DispatchQueryRequest dispatchQueryRequest, DispatchQueryContext context) {
-    leaseManager.borrow(new LeaseRequest(JobType.BATCH, dispatchQueryRequest.getDatasource()));
-
     String jobName = dispatchQueryRequest.getClusterName() + ":" + "non-index-query";
     Map<String, String> tags = context.getTags();
     DataSourceMetadata dataSourceMetadata = context.getDataSourceMetadata();
@@ -86,6 +81,6 @@ public DispatchQueryResponse submit(
             dataSourceMetadata.getResultIndex());
     String jobId = emrServerlessClient.startJobRun(startJobRequest);
     return new DispatchQueryResponse(
-        context.getQueryId(), jobId, dataSourceMetadata.getResultIndex(), null);
+        context.getQueryId(), jobId, false, dataSourceMetadata.getResultIndex(), null);
   }
 }