diff --git a/apps/dav/lib/SystemTag/SystemTagPlugin.php b/apps/dav/lib/SystemTag/SystemTagPlugin.php index eaf3a4d81438c..b0c3cb08fd240 100644 --- a/apps/dav/lib/SystemTag/SystemTagPlugin.php +++ b/apps/dav/lib/SystemTag/SystemTagPlugin.php @@ -162,7 +162,7 @@ private function createTag($data, $contentType = 'application/json') { throw new BadRequest('Missing "name" attribute'); } - $tagName = $data['name']; + $tagName = Util::sanitizeWordsAndEmojis($data['name']); $userVisible = true; $userAssignable = true; diff --git a/apps/settings/composer/composer/autoload_classmap.php b/apps/settings/composer/composer/autoload_classmap.php index 4cc6d25dff66e..e9002cf01f62f 100644 --- a/apps/settings/composer/composer/autoload_classmap.php +++ b/apps/settings/composer/composer/autoload_classmap.php @@ -130,6 +130,7 @@ 'OCA\\Settings\\SetupChecks\\PushService' => $baseDir . '/../lib/SetupChecks/PushService.php', 'OCA\\Settings\\SetupChecks\\RandomnessSecure' => $baseDir . '/../lib/SetupChecks/RandomnessSecure.php', 'OCA\\Settings\\SetupChecks\\ReadOnlyConfig' => $baseDir . '/../lib/SetupChecks/ReadOnlyConfig.php', + 'OCA\\Settings\\SetupChecks\\RepairSanitizeSystemTagsAvailable' => $baseDir . '/../lib/SetupChecks/RepairSanitizeSystemTagsAvailable.php', 'OCA\\Settings\\SetupChecks\\SchedulingTableSize' => $baseDir . '/../lib/SetupChecks/SchedulingTableSize.php', 'OCA\\Settings\\SetupChecks\\SecurityHeaders' => $baseDir . '/../lib/SetupChecks/SecurityHeaders.php', 'OCA\\Settings\\SetupChecks\\ServerIdConfig' => $baseDir . '/../lib/SetupChecks/ServerIdConfig.php', diff --git a/apps/settings/composer/composer/autoload_static.php b/apps/settings/composer/composer/autoload_static.php index d51d178546338..ac638be4d28dc 100644 --- a/apps/settings/composer/composer/autoload_static.php +++ b/apps/settings/composer/composer/autoload_static.php @@ -145,6 +145,7 @@ class ComposerStaticInitSettings 'OCA\\Settings\\SetupChecks\\PushService' => __DIR__ . '/..' . '/../lib/SetupChecks/PushService.php', 'OCA\\Settings\\SetupChecks\\RandomnessSecure' => __DIR__ . '/..' . '/../lib/SetupChecks/RandomnessSecure.php', 'OCA\\Settings\\SetupChecks\\ReadOnlyConfig' => __DIR__ . '/..' . '/../lib/SetupChecks/ReadOnlyConfig.php', + 'OCA\\Settings\\SetupChecks\\RepairSanitizeSystemTagsAvailable' => __DIR__ . '/..' . '/../lib/SetupChecks/RepairSanitizeSystemTagsAvailable.php', 'OCA\\Settings\\SetupChecks\\SchedulingTableSize' => __DIR__ . '/..' . '/../lib/SetupChecks/SchedulingTableSize.php', 'OCA\\Settings\\SetupChecks\\SecurityHeaders' => __DIR__ . '/..' . '/../lib/SetupChecks/SecurityHeaders.php', 'OCA\\Settings\\SetupChecks\\ServerIdConfig' => __DIR__ . '/..' . '/../lib/SetupChecks/ServerIdConfig.php', diff --git a/apps/settings/lib/AppInfo/Application.php b/apps/settings/lib/AppInfo/Application.php index 7a3f2a9316fae..850141a8e928d 100644 --- a/apps/settings/lib/AppInfo/Application.php +++ b/apps/settings/lib/AppInfo/Application.php @@ -67,6 +67,7 @@ use OCA\Settings\SetupChecks\PushService; use OCA\Settings\SetupChecks\RandomnessSecure; use OCA\Settings\SetupChecks\ReadOnlyConfig; +use OCA\Settings\SetupChecks\RepairSanitizeSystemTagsAvailable; use OCA\Settings\SetupChecks\SchedulingTableSize; use OCA\Settings\SetupChecks\SecurityHeaders; use OCA\Settings\SetupChecks\ServerIdConfig; @@ -207,6 +208,7 @@ public function register(IRegistrationContext $context): void { $context->registerSetupCheck(PhpOutputBuffering::class); $context->registerSetupCheck(RandomnessSecure::class); $context->registerSetupCheck(ReadOnlyConfig::class); + $context->registerSetupCheck(RepairSanitizeSystemTagsAvailable::class); $context->registerSetupCheck(SecurityHeaders::class); $context->registerSetupCheck(ServerIdConfig::class); $context->registerSetupCheck(SchedulingTableSize::class); diff --git a/apps/settings/lib/SetupChecks/RepairSanitizeSystemTagsAvailable.php b/apps/settings/lib/SetupChecks/RepairSanitizeSystemTagsAvailable.php new file mode 100644 index 0000000000000..24464b09121d2 --- /dev/null +++ b/apps/settings/lib/SetupChecks/RepairSanitizeSystemTagsAvailable.php @@ -0,0 +1,41 @@ +l10n->t('Sanitize and merge duplicate system tags available'); + } + + public function run(): SetupResult { + if ($this->repairSanitizeSystemTags->migrationsAvailable()) { + return SetupResult::warning( + $this->l10n->t('One or more system tags need to be sanitized or merged. This can take a long time on larger instances so this is not done automatically during upgrades. Use the command `occ maintenance:repair --include-expensive` to perform the migrations.'), + ); + } else { + return SetupResult::success('None'); + } + } +} diff --git a/lib/composer/composer/autoload_classmap.php b/lib/composer/composer/autoload_classmap.php index 1810eab032c74..278aa93c51f0e 100644 --- a/lib/composer/composer/autoload_classmap.php +++ b/lib/composer/composer/autoload_classmap.php @@ -2007,6 +2007,7 @@ 'OC\\Repair\\RepairInvalidShares' => $baseDir . '/lib/private/Repair/RepairInvalidShares.php', 'OC\\Repair\\RepairLogoDimension' => $baseDir . '/lib/private/Repair/RepairLogoDimension.php', 'OC\\Repair\\RepairMimeTypes' => $baseDir . '/lib/private/Repair/RepairMimeTypes.php', + 'OC\\Repair\\RepairSanitizeSystemTags' => $baseDir . '/lib/private/Repair/RepairSanitizeSystemTags.php', 'OC\\RichObjectStrings\\RichTextFormatter' => $baseDir . '/lib/private/RichObjectStrings/RichTextFormatter.php', 'OC\\RichObjectStrings\\Validator' => $baseDir . '/lib/private/RichObjectStrings/Validator.php', 'OC\\Route\\CachingRouter' => $baseDir . '/lib/private/Route/CachingRouter.php', diff --git a/lib/composer/composer/autoload_static.php b/lib/composer/composer/autoload_static.php index 753e56107d0d9..e7527a071b6d4 100644 --- a/lib/composer/composer/autoload_static.php +++ b/lib/composer/composer/autoload_static.php @@ -2048,6 +2048,7 @@ class ComposerStaticInit749170dad3f5e7f9ca158f5a9f04f6a2 'OC\\Repair\\RepairInvalidShares' => __DIR__ . '/../../..' . '/lib/private/Repair/RepairInvalidShares.php', 'OC\\Repair\\RepairLogoDimension' => __DIR__ . '/../../..' . '/lib/private/Repair/RepairLogoDimension.php', 'OC\\Repair\\RepairMimeTypes' => __DIR__ . '/../../..' . '/lib/private/Repair/RepairMimeTypes.php', + 'OC\\Repair\\RepairSanitizeSystemTags' => __DIR__ . '/../../..' . '/lib/private/Repair/RepairSanitizeSystemTags.php', 'OC\\RichObjectStrings\\RichTextFormatter' => __DIR__ . '/../../..' . '/lib/private/RichObjectStrings/RichTextFormatter.php', 'OC\\RichObjectStrings\\Validator' => __DIR__ . '/../../..' . '/lib/private/RichObjectStrings/Validator.php', 'OC\\Route\\CachingRouter' => __DIR__ . '/../../..' . '/lib/private/Route/CachingRouter.php', diff --git a/lib/private/Repair.php b/lib/private/Repair.php index 73f2ae0617818..365166a78fa45 100644 --- a/lib/private/Repair.php +++ b/lib/private/Repair.php @@ -57,6 +57,7 @@ use OC\Repair\RepairInvalidShares; use OC\Repair\RepairLogoDimension; use OC\Repair\RepairMimeTypes; +use OC\Repair\RepairSanitizeSystemTags; use OC\Template\JSCombiner; use OCA\DAV\Migration\DeleteSchedulingObjects; use OCA\DAV\Migration\RemoveObjectProperties; @@ -221,6 +222,7 @@ public static function getExpensiveRepairSteps() { ), \OCP\Server::get(DeleteSchedulingObjects::class), \OC::$server->get(RemoveObjectProperties::class), + \OCP\Server::get(RepairSanitizeSystemTags::class), ]; } diff --git a/lib/private/Repair/RepairSanitizeSystemTags.php b/lib/private/Repair/RepairSanitizeSystemTags.php new file mode 100644 index 0000000000000..fc432882ef4de --- /dev/null +++ b/lib/private/Repair/RepairSanitizeSystemTags.php @@ -0,0 +1,352 @@ +dryRun = true; + $this->sanitizeAndMergeTags(new NullOutput()); + $this->dryRun = false; + return $this->changeCount > 0; + } + + public function run(IOutput $output): void { + $this->dryRun = false; + $this->sanitizeAndMergeTags($output); + } + + private function sanitizeAndMergeTags(IOutput $output): void { + $output->info('Starting sanitization of system tags...'); + + $tags = $this->getAllTags(); + + // Group tags by sanitized name + $sanitizedMap = []; + $totalTags = 0; + foreach ($tags as $tag) { + $sanitizedMap[$tag['sanitizedName']][] = $tag; + $totalTags++; + } + + $sanitizeCount = count($sanitizedMap); + $output->info("Found $totalTags tags with $sanitizeCount unique sanitized names."); + + // Get object counts for all tags in one query + $objectCounts = []; + foreach ($this->getAllTagObjectCounts() as $tagId => $count) { + $objectCounts[$tagId] = $count; + } + + // Process each sanitized name group + foreach ($sanitizedMap as $sanitizedName => $group) { + // Single tag, no duplicates found + if (count($group) === 1) { + $tag = $group[0]; + if ($tag['originalName'] !== $sanitizedName) { + if (!$this->dryRun) { + $qb = $this->connection->getQueryBuilder(); + $qb->update('systemtag') + ->set('name', $qb->createNamedParameter($sanitizedName)) + ->where($qb->expr()->eq('id', $qb->createNamedParameter($tag['id']))) + ->executeStatement(); + } + $this->changeCount++; + $output->info("Sanitized tag ID {$tag['id']}: '{$tag['originalName']}' → '$sanitizedName'"); + } + continue; + } + + // Multiple tags with same sanitized name - merge them + $this->mergeTagGroup($group, $sanitizedName, $objectCounts, $output); + } + + $output->info('System tag sanitization and merge completed.'); + } + + private function mergeTagGroup(array $group, string $sanitizedName, array $objectCounts, IOutput $output): void { + // Validate that all tags in the group have the same visibility and editable settings + $firstTag = $group[0]; + $visibility = $firstTag['visibility']; + $editable = $firstTag['editable']; + + foreach ($group as $tag) { + if ($tag['visibility'] !== $visibility || $tag['editable'] !== $editable) { + $output->warning( + "Cannot merge tag group '$sanitizedName': tags have different visibility or editable settings. " + . 'Manual verification required. Tag IDs: ' . implode(', ', array_column($group, 'id')) + ); + return; + } + } + + // Determine which tag to keep (most object mappings, then lowest ID as tiebreaker) + $keepTag = null; + $maxCount = -1; + + foreach ($group as $tag) { + $count = $objectCounts[$tag['id']] ?? 0; + if ($count > $maxCount || ($count === $maxCount && ($keepTag === null || $tag['id'] < $keepTag['id']))) { + $maxCount = $count; + $keepTag = $tag; + } + } + + $keepId = $keepTag['id']; + if ($keepTag === null) { + $output->warning("Cannot merge tag group '$sanitizedName': unable to determine which tag to keep"); + return; + } + + $duplicateIds = array_filter(array_column($group, 'id'), fn ($id) => $id !== $keepId); + if (empty($duplicateIds)) { + return; + } + + if (!$this->dryRun) { + $this->connection->beginTransaction(); + try { + // Step 1: Delete ALL mappings from duplicate tags that conflict with keepId + // This must happen FIRST before any updates to avoid unique constraint violations + $this->deleteConflictingMappings($duplicateIds, $keepId); + + // Step 2: Update all remaining mappings from duplicates to keepId + // These won't conflict because we just deleted the conflicts + $qb = $this->connection->getQueryBuilder(); + $qb->update('systemtag_object_mapping') + ->set('systemtagid', $qb->createNamedParameter($keepId)) + ->where($qb->expr()->in('systemtagid', $qb->createNamedParameter($duplicateIds, IQueryBuilder::PARAM_INT_ARRAY))) + ->executeStatement(); + + // Step 3: Delete duplicate tags in bulk (safe now that mappings are gone) + $qb = $this->connection->getQueryBuilder(); + $qb->delete('systemtag') + ->where($qb->expr()->in('id', $qb->createNamedParameter($duplicateIds, IQueryBuilder::PARAM_INT_ARRAY))) + ->executeStatement(); + + // Step 4: Sanitize the kept tag name if needed + // This is safe because we've already deleted all duplicates with the same sanitized name + if ($keepTag['originalName'] !== $sanitizedName) { + $qb = $this->connection->getQueryBuilder(); + $qb->update('systemtag') + ->set('name', $qb->createNamedParameter($sanitizedName)) + ->where($qb->expr()->eq('id', $qb->createNamedParameter($keepId))) + ->executeStatement(); + } + + $this->connection->commit(); + } catch (\Exception $e) { + $this->connection->rollBack(); + $output->warning("Failed to merge tag group '$sanitizedName': " . $e->getMessage()); + return; + } + } + + $this->changeCount += count($duplicateIds); + if ($keepTag['originalName'] !== $sanitizedName) { + $this->changeCount++; + } + + $duplicateIdsList = implode(', ', $duplicateIds); + $output->info("Merged tags [$duplicateIdsList] into ID $keepId (sanitized: '$sanitizedName')"); + } + + /** + * Delete mappings from duplicate tags where the same object is already mapped to keepId + * This prevents unique constraint violations when updating systemtagid + */ + private function deleteConflictingMappings(array $duplicateIds, int $keepId): void { + $batchSize = 1000; + $batch = []; + + // Stream keepId mappings and process in batches + $qb = $this->connection->getQueryBuilder(); + $qb->select('objectid', 'objecttype') + ->from('systemtag_object_mapping') + ->where($qb->expr()->eq('systemtagid', $qb->createNamedParameter($keepId))); + + $result = $qb->executeQuery(); + + while ($mapping = $result->fetch()) { + $batch[] = $mapping; + + // When batch is full, delete conflicts for this batch + if (count($batch) >= $batchSize) { + $this->deleteBatchConflicts($batch, $duplicateIds); + $batch = []; // Clear batch + } + } + + $result->closeCursor(); + + // Process remaining mappings in the last batch + if (!empty($batch)) { + $this->deleteBatchConflicts($batch, $duplicateIds); + } + } + + /** + * Delete mappings in a batch that conflict with keepId mappings + */ + private function deleteBatchConflicts(array $batch, array $duplicateIds): void { + $qb = $this->connection->getQueryBuilder(); + $qb->delete('systemtag_object_mapping') + ->where($qb->expr()->in('systemtagid', $qb->createNamedParameter($duplicateIds, IQueryBuilder::PARAM_INT_ARRAY))); + + $orX = $qb->expr()->orX(); + foreach ($batch as $mapping) { + $orX->add($qb->expr()->andX( + $qb->expr()->eq('objectid', $qb->createNamedParameter($mapping['objectid'])), + $qb->expr()->eq('objecttype', $qb->createNamedParameter($mapping['objecttype'])) + )); + } + $qb->andWhere($orX); + $qb->executeStatement(); + } + + /** + * Check if a tag name already exists (excluding a specific tag ID) + */ + private function tagNameExists(string $name, int $excludeId): bool { + $qb = $this->connection->getQueryBuilder(); + $qb->select('id') + ->from('systemtag') + ->where($qb->expr()->eq('name', $qb->createNamedParameter($name))) + ->andWhere($qb->expr()->neq('id', $qb->createNamedParameter($excludeId))) + ->setMaxResults(1); + + $result = $qb->executeQuery(); + $exists = $result->fetch() !== false; + $result->closeCursor(); + + return $exists; + } + + // Fetch all tags in batches to avoid memory issues + private function getAllTags(int $offset = 0, ?int $limit = null): \Iterator { + $maxBatchSize = 1000; + + do { + if ($limit !== null) { + $batchSize = min($limit, $maxBatchSize); + $limit -= $batchSize; + } else { + $batchSize = $maxBatchSize; + } + + $tags = $this->getTags($batchSize, $offset); + $offset += $batchSize; + + foreach ($tags as $tag) { + yield $tag; + } + + } while (count($tags) === $batchSize && $limit !== 0); + } + + // Fetch tags from the database + private function getTags($limit = null, $offset = null): array { + $qb = $this->connection->getQueryBuilder(); + $qb->select('id', 'name', 'visibility', 'editable') + ->from('systemtag') + ->orderBy('name') + ->addOrderBy('id'); + + $tags = []; + + // Apply limit and offset if provided + if ($limit !== null) { + $qb->setMaxResults($limit); + } + if ($offset !== null) { + $qb->setFirstResult($offset); + } + + // Fetch and return tags + $result = $qb->executeQuery(); + while ($row = $result->fetch()) { + $tags[] = [ + 'id' => (int)$row['id'], + 'originalName' => $row['name'], + 'sanitizedName' => Util::sanitizeWordsAndEmojis($row['name']), + 'visibility' => (int)$row['visibility'], + 'editable' => (int)$row['editable'], + ]; + } + $result->closeCursor(); + return $tags; + } + + private function getAllTagObjectCounts(int $offset = 0, ?int $limit = null): \Iterator { + $maxBatchSize = 1000; + + do { + if ($limit !== null) { + $batchSize = min($limit, $maxBatchSize); + $limit -= $batchSize; + } else { + $batchSize = $maxBatchSize; + } + + $counts = $this->getTagObjectCounts($batchSize, $offset); + $offset += $batchSize; + + foreach ($counts as $tagId => $count) { + yield $tagId => $count; + } + + } while (count($counts) === $batchSize && $limit !== 0); + } + + // Get object counts for all tags in one efficient query + private function getTagObjectCounts($limit = null, $offset = null): array { + $qb = $this->connection->getQueryBuilder(); + $qb->select('systemtagid') + ->selectAlias($qb->createFunction('COUNT(*)'), 'cnt') + ->from('systemtag_object_mapping') + ->groupBy('systemtagid'); + + $counts = []; + + // Apply limit and offset if provided + if ($limit !== null) { + $qb->setMaxResults($limit); + } + if ($offset !== null) { + $qb->setFirstResult($offset); + } + + // Fetch and return counts + $result = $qb->executeQuery(); + while ($row = $result->fetch()) { + $counts[(int)$row['systemtagid']] = (int)$row['cnt']; + } + $result->closeCursor(); + return $counts; + } +} diff --git a/lib/private/SystemTag/SystemTagManager.php b/lib/private/SystemTag/SystemTagManager.php index 2b8992f0aef35..82c0a5a60bc95 100644 --- a/lib/private/SystemTag/SystemTagManager.php +++ b/lib/private/SystemTag/SystemTagManager.php @@ -23,6 +23,7 @@ use OCP\SystemTag\TagCreationForbiddenException; use OCP\SystemTag\TagNotFoundException; use OCP\SystemTag\TagUpdateForbiddenException; +use OCP\Util; /** * Manager class for system tags @@ -156,6 +157,8 @@ public function createTag(string $tagName, bool $userVisible, bool $userAssignab throw new TagCreationForbiddenException(); } + $tagName = Util::sanitizeWordsAndEmojis($tagName); + // Check if tag already exists (case-insensitive) $existingTags = $this->getAllTags(null, $tagName); foreach ($existingTags as $existingTag) { @@ -225,8 +228,9 @@ public function updateTag( } $beforeUpdate = array_shift($tags); + $newName = Util::sanitizeWordsAndEmojis($newName); + // Length of name column is 64 - $newName = trim($newName); $truncatedNewName = substr($newName, 0, 64); $afterUpdate = new SystemTag( $tagId, @@ -451,5 +455,4 @@ public function getTagGroups(ISystemTag $tag): array { return $groupIds; } - } diff --git a/lib/public/Util.php b/lib/public/Util.php index 70a862880f19d..3a3a3c2d73244 100644 --- a/lib/public/Util.php +++ b/lib/public/Util.php @@ -662,4 +662,25 @@ public static function isFunctionEnabled(string $functionName): bool { } return true; } + + /** + * Sanitize a name by removing unwanted characters + * + * This function removes any character that is not a letter, space, or symbol (including emojis). + * It also normalizes spaces by replacing multiple consecutive spaces with a single space and trimming + * leading and trailing spaces. + * + * @param string $input The input string to sanitize + * @return string The sanitized string + * @since 33.0.0 + */ + public static function sanitizeWordsAndEmojis(string $input): string { + // Remove control characters and other invisible separators, but keep everything else + $clean = preg_replace('/[\p{C}]+/u', '', $input); + + // Normalize whitespace to single spaces + $clean = preg_replace('/[[:space:]]+/u', ' ', trim($clean)); + + return $clean; + } } diff --git a/tests/lib/UtilTest.php b/tests/lib/UtilTest.php index 6738cafdb1ff2..28f1ee9d13615 100644 --- a/tests/lib/UtilTest.php +++ b/tests/lib/UtilTest.php @@ -416,4 +416,57 @@ public function testRecursiveArraySearch(): void { $this->assertFalse($result); } + + public static function sanitizeProvider(): array { + return [ + // Basic spaces and line controls + ['Hello World', 'Hello World'], + [' Hello World ', 'Hello World'], + ["Hello\t World \nAgain", 'Hello World Again'], + ["Hello\rWorld", 'HelloWorld'], + ["Hello\r\nWorld", 'HelloWorld'], + ["Hello\u{200B}World", 'HelloWorld'], // zero-width space removed + ["Hello\t\n\r World", 'Hello World'], + + // Unicode, emoji, and CJK + ['テスト 😃 💬', 'テスト 😃 💬'], + ['中文測試 ✅', '中文測試 ✅'], + ['Русский текст 😁', 'Русский текст 😁'], + ['Café crème ☕', 'Café crème ☕'], + + // Punctuation and filename-like + ['Hello-World_123.', 'Hello-World_123.'], + ['File.name, with commas', 'File.name, with commas'], + ['Smile — dash', 'Smile — dash'], + ['Invalid:/\\?%*|<>name', 'Invalid:/\\?%*|<>name'], // kept as is + ['test@example.com', 'test@example.com'], + + // Control and invisible chars + ["Bad\0Name", 'BadName'], + ["Hello\u{0007}World", 'HelloWorld'], + ["Line\r\nbreaks", 'Linebreaks'], + ["\x1F Hidden control", 'Hidden control'], + + // Whitespace and normalization + [" Multiple spaces\t and \nnewlines ", 'Multiple spaces and newlines'], + ["No-break\u{00A0}space", 'No-break space'], // NBSP normalized + ["Zero\u{2003}width\u{2009}spaces", 'Zero width spaces'], // various spaces + + // Complex mixes + ['テスト 💬.png', 'テスト 💬.png'], + [' Mix 😎 emojis 🎉 and 123 numbers ', 'Mix 😎 emojis 🎉 and 123 numbers'], + ["Hello \u{200B}\n World", 'Hello World'], + ['Path ../etc/passwd', 'Path ../etc/passwd'], + ['Symbols! @ # % ^ & * ( )', 'Symbols! @ # % ^ & * ( )'], + ['Special chars