diff --git a/.gitignore b/.gitignore index 19331bddbf..ca9c4a8bc4 100644 --- a/.gitignore +++ b/.gitignore @@ -551,3 +551,4 @@ dist .venv/ .vscode/ +@* diff --git a/engine/src/main/java/com/arcadedb/index/lsm/LSMTreeIndexAbstract.java b/engine/src/main/java/com/arcadedb/index/lsm/LSMTreeIndexAbstract.java index cb66a5818f..290ef77aa4 100644 --- a/engine/src/main/java/com/arcadedb/index/lsm/LSMTreeIndexAbstract.java +++ b/engine/src/main/java/com/arcadedb/index/lsm/LSMTreeIndexAbstract.java @@ -574,6 +574,7 @@ protected boolean lookupInPageAndAddInResultset(final BasePage currentPage, fina final List allValues = readAllValuesFromResult(currentPageBuffer, result); final Set validRIDs = new HashSet<>(); + final Set deletedRIDs = new HashSet<>(); final TransactionIndexContext.ComparableKey keys = new TransactionIndexContext.ComparableKey(convertedKeys); @@ -582,13 +583,28 @@ protected boolean lookupInPageAndAddInResultset(final BasePage currentPage, fina final RID rid = allValues.get(i); if (rid.getBucketId() < 0) { - removedKeys.add(keys); + // This is a deletion marker - convert to original RID + final RID originalRID = getOriginalRID(rid); + deletedRIDs.add(originalRID); + + // For unique indexes, also mark the entire key as removed + if (mainIndex.isUnique()) { + removedKeys.add(keys); + } continue; } - if (removedKeys.contains(keys)) - // HAS BEEN DELETED + // For unique indexes, check if the entire key has been removed + if (mainIndex.isUnique() && removedKeys.contains(keys)) { + // Skipping rid because key is in removedKeys (unique index) continue; + } + + // For all indexes, check if this specific RID has been deleted + if (deletedRIDs.contains(rid)) { + // Skipping rid because it is in deletedRIDs + continue; + } validRIDs.add(rid); set.add(new IndexCursorEntry(originalKeys, rid, 1)); diff --git a/engine/src/main/java/com/arcadedb/query/sql/parser/ContainsCondition.java b/engine/src/main/java/com/arcadedb/query/sql/parser/ContainsCondition.java index 6a07343d75..7bf7eecba9 100644 --- a/engine/src/main/java/com/arcadedb/query/sql/parser/ContainsCondition.java +++ b/engine/src/main/java/com/arcadedb/query/sql/parser/ContainsCondition.java @@ -257,9 +257,9 @@ public boolean isIndexAware(final IndexSearchInfo info) { // due to the presence of a suffix/modifier. We'll check both cases: // 1. Simple identifiers: left.isBaseIdentifier() == true (e.g., "tags") // 2. Nested identifiers: Compare the full string representation (e.g., "tags.id") - + String fieldName = null; - + if (left.isBaseIdentifier()) { // Simple identifier - use default alias fieldName = left.getDefaultAlias().getStringValue(); @@ -271,13 +271,13 @@ public boolean isIndexAware(final IndexSearchInfo info) { fieldName = leftStr; } } - + if (fieldName != null && info.getField().equals(fieldName)) { // CONTAINS operator only works with BY-ITEM indexes, not regular list indexes if (info.isIndexByItem() && right != null) return right.isEarlyCalculated(info.getContext()); } - + return false; } diff --git a/engine/src/main/java/com/arcadedb/schema/TypeIndexBuilder.java b/engine/src/main/java/com/arcadedb/schema/TypeIndexBuilder.java index dfa86000fb..46351e8054 100644 --- a/engine/src/main/java/com/arcadedb/schema/TypeIndexBuilder.java +++ b/engine/src/main/java/com/arcadedb/schema/TypeIndexBuilder.java @@ -101,15 +101,15 @@ public TypeIndex create() { // First, try to find the property with the exact name (handles properties with dots in their names) Property property = type.getPolymorphicPropertyIfExists(actualPropertyName); - + if (property == null && actualPropertyName.contains(".")) { // Property with exact name doesn't exist, check if this could be a nested path final String[] pathParts = actualPropertyName.split("\\.", 2); // Split into at most 2 parts final String rootPropertyName = pathParts[0]; - + // Try to find the root property property = type.getPolymorphicPropertyIfExists(rootPropertyName); - + if (property != null) { // Found root property - this is a nested path // For nested paths with BY ITEM, the root must be a LIST @@ -118,14 +118,14 @@ public TypeIndex create() { "Cannot create index with BY ITEM on nested property path '" + typeName + "." + actualPropertyName + "' because the root property '" + rootPropertyName + "' is not a LIST type (found: " + property.getType() + ")"); } - + // For nested properties, we'll use STRING as the key type since we can't validate the nested structure at schema definition time // The actual type will be determined at runtime during indexing keyTypes[i++] = Type.STRING; continue; } } - + // If we still don't have a property, it doesn't exist if (property == null) { throw new SchemaException( diff --git a/engine/src/test/java/com/arcadedb/index/Issue2814FilteringWithIndexTest.java b/engine/src/test/java/com/arcadedb/index/Issue2814FilteringWithIndexTest.java new file mode 100644 index 0000000000..2455545940 --- /dev/null +++ b/engine/src/test/java/com/arcadedb/index/Issue2814FilteringWithIndexTest.java @@ -0,0 +1,196 @@ +/* + * Copyright © 2021-present Arcade Data Ltd (info@arcadedata.com) + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * SPDX-FileCopyrightText: 2021-present Arcade Data Ltd (info@arcadedata.com) + * SPDX-License-Identifier: Apache-2.0 + */ +package com.arcadedb.index; + +import com.arcadedb.TestHelper; +import com.arcadedb.query.sql.executor.Result; +import com.arcadedb.query.sql.executor.ResultSet; +import org.junit.jupiter.api.Test; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Test for Issue #2814: Filtering with index - https://github.com/ArcadeData/arcadedb/issues/2814 + * Tests that parameterized multi-field UPDATE statements correctly update indexes. + * + * The bug manifests when: + * 1. A non-unique index exists on a property (e.g., status) + * 2. A parameterized UPDATE statement updates multiple fields including the indexed field + * 3. After the update, WHERE clauses using the indexed field return incorrect results + */ +public class Issue2814FilteringWithIndexTest extends TestHelper { + + private String parentRid; + + @Override + public void beginTest() { + database.transaction(() -> { + // Create two types with LINK and INDEX like in the issue + database.command("sql", "CREATE DOCUMENT TYPE Parent"); + database.command("sql", "CREATE DOCUMENT TYPE Child"); + database.command("sql", "CREATE PROPERTY Child.uid STRING"); + database.command("sql", "CREATE PROPERTY Child.status STRING (default 'synced')"); + database.command("sql", "CREATE PROPERTY Child.version INTEGER (default 1)"); + database.command("sql", "CREATE PROPERTY Child.parent LINK OF Parent"); + + // Create non-unique index on status field + database.command("sql", "CREATE INDEX ON Child (status) NOTUNIQUE"); + + // Create parent + ResultSet result = database.command("sql", "INSERT INTO Parent SET name = 'p1' RETURN @this"); + parentRid = result.next().getIdentity().get().toString(); + + // Insert 3 children WITHOUT explicit status (use default 'synced') + database.command("sql", "INSERT INTO Child SET uid = 'c1', parent = " + parentRid); + database.command("sql", "INSERT INTO Child SET uid = 'c2', parent = " + parentRid); + database.command("sql", "INSERT INTO Child SET uid = 'c3', parent = " + parentRid); + + // Mark c1 and c2 as pending + database.command("sql", "UPDATE Child SET status = 'pending' WHERE uid = 'c1'"); + database.command("sql", "UPDATE Child SET status = 'pending' WHERE uid = 'c2'"); + }); + } + + @Test + public void testFilteringBeforeParameterizedUpdate() { + // Verify initial state - should find 2 pending children + database.transaction(() -> { + ResultSet pending = database.query("sql", "SELECT uid, status FROM Child WHERE status = 'pending'"); + List pendingList = pending.stream().toList(); + + assertThat(pendingList).hasSize(2); + + List uids = pendingList.stream() + .map(r -> r.getProperty("uid")) + .sorted() + .toList(); + assertThat(uids).containsExactly("c1", "c2"); + }); + } + + @Test + public void testFilteringAfterParameterizedMultiFieldUpdate() { + // This is the main test for the bug: parameterized multi-field UPDATE breaks index + database.transaction(() -> { + // Update c1 with parameterized multi-field UPDATE (including version field) + Map params = new HashMap<>(); + params.put("uid", "c1"); + params.put("version", 2); + params.put("status", "synced"); + + database.command("sql", "UPDATE Child SET version = :version, status = :status WHERE uid = :uid", params); + }); + + // BUG TEST: After parameterized update, WHERE status='pending' should find c2 + database.transaction(() -> { + ResultSet pending = database.query("sql", "SELECT uid, status FROM Child WHERE status = 'pending'"); + List pendingList = pending.stream().toList(); + + // Should find exactly 1 pending record (c2) + assertThat(pendingList).hasSize(1); + assertThat(pendingList.get(0).getProperty("uid")).isEqualTo("c2"); + }); + + // BUG TEST: WHERE status='synced' should find c1 and c3 + database.transaction(() -> { + ResultSet synced = database.query("sql", "SELECT uid, status FROM Child WHERE status = 'synced'"); + List syncedList = synced.stream().toList(); + + // Should find exactly 2 synced records (c1, c3) + assertThat(syncedList).hasSize(2); + + List uids = syncedList.stream() + .map(r -> r.getProperty("uid")) + .sorted() + .toList(); + assertThat(uids).containsExactly("c1", "c3"); + }); + + // Verify all 3 children still exist with correct statuses + database.transaction(() -> { + ResultSet all = database.query("sql", "SELECT uid, status, version FROM Child ORDER BY uid"); + List allList = all.stream().toList(); + + assertThat(allList).hasSize(3); + + // c1 should be synced with version 2 + Result c1 = allList.stream() + .filter(r -> "c1".equals(r.getProperty("uid"))) + .findFirst() + .orElseThrow(); + assertThat(c1.getProperty("status")).isEqualTo("synced"); + assertThat(c1.getProperty("version")).isEqualTo(2); + + // c2 should still be pending + Result c2 = allList.stream() + .filter(r -> "c2".equals(r.getProperty("uid"))) + .findFirst() + .orElseThrow(); + assertThat(c2.getProperty("status")).isEqualTo("pending"); + + // c3 should be synced (was never changed) + Result c3 = allList.stream() + .filter(r -> "c3".equals(r.getProperty("uid"))) + .findFirst() + .orElseThrow(); + assertThat(c3.getProperty("status")).isEqualTo("synced"); + }); + } + + @Test + public void testFilteringAfterParameterizedSingleFieldUpdate() { + // Test that single-field parameterized UPDATE works correctly (comparison test) + database.transaction(() -> { + // Update c2 with parameterized single-field UPDATE (only status) + Map params = new HashMap<>(); + params.put("uid", "c2"); + params.put("status", "synced"); + + database.command("sql", "UPDATE Child SET status = :status WHERE uid = :uid", params); + }); + + // Verify filtering works correctly after single-field parameterized update + database.transaction(() -> { + ResultSet pending = database.query("sql", "SELECT uid, status FROM Child WHERE status = 'pending'"); + List pendingList = pending.stream().toList(); + + // After updating c2, only c1 should still be pending + assertThat(pendingList).hasSize(1); + assertThat(pendingList.get(0).getProperty("uid")).isEqualTo("c1"); + }); + + database.transaction(() -> { + ResultSet synced = database.query("sql", "SELECT uid, status FROM Child WHERE status = 'synced'"); + List syncedList = synced.stream().toList(); + + // Should find c2 and c3 as synced + assertThat(syncedList).hasSize(2); + + List uids = syncedList.stream() + .map(r -> r.getProperty("uid")) + .sorted() + .toList(); + assertThat(uids).containsExactly("c2", "c3"); + }); + } +}