From 93891dd14a699a12bed49e3d86d94c1f8c871b5d Mon Sep 17 00:00:00 2001 From: Steven Date: Sun, 14 Jul 2024 17:46:47 -0400 Subject: [PATCH] 1405 tags in document-order (#1477) * Capture tags in document order #1405 * rewrite tags to reestablish full doc order amonst all #1405 --- OneMore/Commands/Tagging/Hashtag.cs | 6 + .../Commands/Tagging/HashtagPageScanner.cs | 4 + OneMore/Commands/Tagging/HashtagProvider.cs | 168 ++++++++++++++---- OneMore/Commands/Tagging/HashtagScanner.cs | 30 ++-- OneMore/Commands/Tagging/HashtagsDB.sql | 2 +- 5 files changed, 152 insertions(+), 58 deletions(-) diff --git a/OneMore/Commands/Tagging/Hashtag.cs b/OneMore/Commands/Tagging/Hashtag.cs index 49720c2e92..c422b1af4e 100644 --- a/OneMore/Commands/Tagging/Hashtag.cs +++ b/OneMore/Commands/Tagging/Hashtag.cs @@ -76,6 +76,12 @@ internal class Hashtag public string Snippet { get; set; } + /// + /// Gets the document-order index value of the snippet on the page + /// + public int DocumentOrder { get; set; } + + /// /// Gets whether this instance was the result of a direct search hit or is /// just another tag on the same page diff --git a/OneMore/Commands/Tagging/HashtagPageScanner.cs b/OneMore/Commands/Tagging/HashtagPageScanner.cs index a2ce6f94b8..0291e5bde9 100644 --- a/OneMore/Commands/Tagging/HashtagPageScanner.cs +++ b/OneMore/Commands/Tagging/HashtagPageScanner.cs @@ -26,6 +26,7 @@ internal class HashtagPageScanner private readonly SearchAndReplaceEditor editor; private readonly string pageID; private bool keepTags; + private int documentOrder; /// @@ -102,6 +103,8 @@ public Hashtags Scan() if (paragraphs.Any()) { + documentOrder = 0; + foreach (var paragraph in paragraphs) { var count = tags.Count; @@ -167,6 +170,7 @@ private void ScanParagraph(XElement paragraph, Hashtags tags) PageID = pageID, ObjectID = objectID, Snippet = context, + DocumentOrder = documentOrder++, LastModified = lastModifiedTime }); } diff --git a/OneMore/Commands/Tagging/HashtagProvider.cs b/OneMore/Commands/Tagging/HashtagProvider.cs index b12a1a04cb..8685affb73 100644 --- a/OneMore/Commands/Tagging/HashtagProvider.cs +++ b/OneMore/Commands/Tagging/HashtagProvider.cs @@ -2,6 +2,8 @@ // Copyright © 2023 Steven M Cohn. All rights reserved. //************************************************************************************************ +#pragma warning disable S1133 // Deprecated code should be removed + namespace River.OneMoreAddIn.Commands { using River.OneMoreAddIn.Properties; @@ -362,20 +364,20 @@ private int Upgrade3to4(SQLiteConnection con) logger.Start(); using var cmd = con.CreateCommand(); + cmd.CommandType = CommandType.Text; + using var transaction = con.BeginTransaction(); try { logger.WriteLine("updating table hashtag_notebook"); - cmd.CommandType = CommandType.Text; cmd.CommandText = "ALTER TABLE hashtag_notebook " + "ADD COLUMN lastModified TEXT NOT NULL default('')"; cmd.ExecuteNonQuery(); - cmd.CommandType = CommandType.Text; cmd.CommandText = "UPDATE hashtag_notebook AS nb SET lastModified = COALESCE(" + "(SELECT MAX(t.lastModified) " + @@ -389,11 +391,68 @@ private int Upgrade3to4(SQLiteConnection con) } catch (Exception exc) { + transaction.Rollback(); logger.End(); logger.WriteLine("error updating table hashtag_notebook", exc); return 0; } + try + { + logger.WriteLine("updating table hashtag"); + + cmd.CommandText = + "CREATE TABLE hashtag_v4 " + + "(tag TEXT NOT NULL, moreID TEXT NOT NULL, objectID TEXT NOT NULL, " + + "snippet TEXT, documentOrder INTEGER DEFAULT (0), lastModified TEXT NOT NULL, " + + "PRIMARY KEY (tag, objectID), " + + "CONSTRAINT FK_moreID FOREIGN KEY (moreID) REFERENCES hashtag_page (moreID) " + + "ON DELETE CASCADE)"; + + cmd.ExecuteNonQuery(); + + cmd.CommandText = + "INSERT INTO hashtag_v4 (tag, moreID, objectID, snippet, lastModified) " + + "SELECT tag, moreID, objectID, snippet, lastModified " + + "FROM hashtag"; + + cmd.ExecuteNonQuery(); + + cmd.CommandText = "DROP INDEX IDX_moreID"; + cmd.ExecuteNonQuery(); + + cmd.CommandText = "DROP INDEX IDX_tag"; + cmd.ExecuteNonQuery(); + + cmd.CommandText = "DROP TABLE hashtag"; + cmd.ExecuteNonQuery(); + + cmd.CommandText = "DROP VIEW page_hashtags"; + cmd.ExecuteNonQuery(); + + cmd.CommandText = "ALTER TABLE hashtag_v4 RENAME TO hashtag"; + cmd.ExecuteNonQuery(); + + cmd.CommandText = "CREATE INDEX IDX_moreID ON hashtag(moreID)"; + cmd.ExecuteNonQuery(); + + cmd.CommandText = "CREATE INDEX IDX_tag ON hashtag(tag)"; + cmd.ExecuteNonQuery(); + + cmd.CommandText = "CREATE VIEW IF NOT EXISTS page_hashtags (moreID, tags) AS " + + "SELECT t.moreID, group_concat(DISTINCT(t.tag)) AS tags " + + "FROM hashtag t GROUP BY t.moreID"; + + cmd.ExecuteNonQuery(); + } + catch (Exception exc) + { + transaction.Rollback(); + logger.End(); + logger.WriteLine("error updating table hashtag", exc); + return 0; + } + if (!UpgradeSchemaVersion(cmd, transaction, version)) { return 0; @@ -522,6 +581,7 @@ public void DeletePhantoms(List knownIDs, string sectionID, string secti /// Deletes the specified tags /// /// A collection of Hashtags + [Obsolete("Was used as part of original tag resolution logic")] public void DeleteTags(Hashtags tags) { using var cmd = con.CreateCommand(); @@ -700,7 +760,8 @@ public Hashtags ReadPageTags(string pageID) "p.notebookID, p.sectionID, t.lastModified " + "FROM hashtag t " + "JOIN hashtag_page p ON p.moreID = t.moreID " + - "WHERE p.pageID = @p"; + "WHERE p.pageID = @p " + + "ORDER BY t.documentOrder"; return ReadTags(sql, new SQLiteParameter[] { new("@p", pageID) } @@ -792,7 +853,8 @@ public Hashtags SearchTags( var builder = new StringBuilder(); builder.Append("SELECT t.tag, t.moreID, p.pageID, p.titleID, t.objectID, "); - builder.Append("p.notebookID, p.sectionID, t.lastModified, t.snippet, p.path, p.name "); + builder.Append("p.notebookID, p.sectionID, t.lastModified, t.snippet, "); + builder.Append("t.documentOrder, p.path, p.name "); builder.Append("FROM hashtag t "); builder.Append("JOIN hashtag_page p ON t.moreID = p.moreID "); @@ -813,7 +875,7 @@ public Hashtags SearchTags( var where = query.BuildFormattedWhereClause(criteria, out parsed); builder.Append(where); - builder.Append(" ORDER BY p.path, p.name, t.tag"); + builder.Append(" ORDER BY p.path, p.name, t.documentOrder"); var sql = builder.ToString(); logger.Verbose(sql); @@ -866,8 +928,9 @@ private Hashtags ReadTags(string sql, SQLiteParameter[] parameters = null) if (reader.FieldCount > 7 && sql.Contains("snippet")) { tag.Snippet = reader[8] is DBNull ? null : reader.GetString(8); - tag.HierarchyPath = reader[9] is DBNull ? null : reader.GetString(9); - tag.PageTitle = reader[10] is DBNull ? null : reader.GetString(10); + tag.DocumentOrder = reader[9] is DBNull ? 0 : reader.GetInt32(9); + tag.HierarchyPath = reader[10] is DBNull ? null : reader.GetString(10); + tag.PageTitle = reader[11] is DBNull ? null : reader.GetString(11); } tags.Add(tag); @@ -1073,52 +1136,83 @@ public void WriteScanTime() /// Records the given tags. /// /// A collection of Hashtags - public void WriteTags(Hashtags tags) + public void WriteTags(string pageID, Hashtags tags) { - using var tagcmd = con.CreateCommand(); - tagcmd.CommandText = "INSERT INTO hashtag " + - "(tag, moreID, objectID, snippet, lastModified) VALUES (@t, @m, @o, @c, @s)"; + using var transaction = con.BeginTransaction(); - tagcmd.CommandType = CommandType.Text; - tagcmd.Parameters.Add("@t", DbType.String); - tagcmd.Parameters.Add("@m", DbType.String); - tagcmd.Parameters.Add("@o", DbType.String); - tagcmd.Parameters.Add("@c", DbType.String); - tagcmd.Parameters.Add("@s", DbType.String); + using var cmd = con.CreateCommand(); + cmd.CommandType = CommandType.Text; - using var transaction = con.BeginTransaction(); - foreach (var tag in tags) + // first purge all existing tags for page... + + cmd.CommandText = "DELETE FROM HASHTAG WHERE moreID = " + + "(SELECT moreID FROM hashtag_page WHERE pageID = @p);"; + + cmd.Parameters.AddWithValue("@p", pageID); + + try + { + cmd.ExecuteNonQuery(); + } + catch (Exception exc) { - logger.Verbose($"writing tag {tag.Tag}"); + transaction.Rollback(); + logger.WriteLine($"error deleting tags {pageID}", exc); + return; + } - tagcmd.Parameters["@t"].Value = tag.Tag; - tagcmd.Parameters["@m"].Value = tag.MoreID; - tagcmd.Parameters["@o"].Value = tag.ObjectID; - tagcmd.Parameters["@c"].Value = tag.Snippet; - tagcmd.Parameters["@s"].Value = tag.LastModified; + // now add (re-add) newly discovered tags for page, reestablishing doc order... - try - { - tagcmd.ExecuteNonQuery(); - } - catch (Exception exc) + if (tags.Any()) + { + cmd.CommandText = "INSERT INTO hashtag " + + "(tag, moreID, objectID, snippet, documentOrder, lastModified) " + + "VALUES (@t, @m, @o, @c, @d, @s)"; + + cmd.Parameters.Clear(); + cmd.Parameters.Add("@t", DbType.String); + cmd.Parameters.Add("@m", DbType.String); + cmd.Parameters.Add("@o", DbType.String); + cmd.Parameters.Add("@c", DbType.String); + cmd.Parameters.Add("@d", DbType.Int32); + cmd.Parameters.Add("@s", DbType.String); + + foreach (var tag in tags) { - logger.WriteLine($"error writing tag {tag.Tag} on {tag.PageID}"); - logger.WriteLine($"error moreID=[{tag.MoreID}]"); - logger.WriteLine($"error objectID=[{tag.ObjectID}]"); - logger.WriteLine($"error Snippet=[{tag.Snippet}]"); - logger.WriteLine($"error lastModified=[{tag.LastModified}]"); - logger.WriteLine(exc); + logger.Verbose($"writing tag {tag.Tag}"); + + cmd.Parameters["@t"].Value = tag.Tag; + cmd.Parameters["@m"].Value = tag.MoreID; + cmd.Parameters["@o"].Value = tag.ObjectID; + cmd.Parameters["@c"].Value = tag.Snippet; + cmd.Parameters["@d"].Value = tag.DocumentOrder; + cmd.Parameters["@s"].Value = tag.LastModified; + + try + { + cmd.ExecuteNonQuery(); + } + catch (Exception exc) + { + logger.WriteLine($"error writing tag {tag.Tag} on {tag.PageID}"); + logger.WriteLine($"error moreID=[{tag.MoreID}]"); + logger.WriteLine($"error objectID=[{tag.ObjectID}]"); + logger.WriteLine($"error Snippet=[{tag.Snippet}]"); + logger.WriteLine($"error lastModified=[{tag.LastModified}]"); + logger.WriteLine(exc); + } } } + CleanupPages(); + try { transaction.Commit(); } catch (Exception exc) { - ReportError("error writing tags", tagcmd, exc); + ReportError("error writing tags", cmd, exc); } } diff --git a/OneMore/Commands/Tagging/HashtagScanner.cs b/OneMore/Commands/Tagging/HashtagScanner.cs index 8a977fdd62..3cebdef0b2 100644 --- a/OneMore/Commands/Tagging/HashtagScanner.cs +++ b/OneMore/Commands/Tagging/HashtagScanner.cs @@ -324,7 +324,10 @@ private async Task ScanPage( var candidates = scanner.Scan(); + // saved tags will be in document-order but not have DocumentOrder set, + // we can rely on tag + objectID to continue resolving var saved = provider.ReadPageTags(pageID); + var discovered = new Hashtags(); var updated = new Hashtags(); @@ -337,7 +340,9 @@ private async Task ScanPage( } else { - if (forceThru || candidate.LastModified.CompareTo(lastTime) > 0) + if (forceThru || + candidate.LastModified.CompareTo(lastTime) > 0 || + candidate.DocumentOrder != found.DocumentOrder) { updated.Add(candidate); } @@ -348,26 +353,11 @@ private async Task ScanPage( var dirtyPage = false; - if (saved.Any()) - { - // remaining saved entries were not matched with candidates - // on page so should be deleted - provider.DeleteTags(saved); - dirtyPage = true; - } - - if (updated.Any()) + if (saved.Any() || updated.Any() || discovered.Any()) { - // tag context updated since last scan - provider.UpdateSnippet(updated); - dirtyPage = true; - } - - if (discovered.Any()) - { - // discovered entries are new on the page and not found in saved - - provider.WriteTags(discovered); + // much simpler to purge old and rewrite new, even if that means recreating a + // few copied records. should scale without issue into the many tens-of-tags + provider.WriteTags(pageID, candidates); dirtyPage = true; } diff --git a/OneMore/Commands/Tagging/HashtagsDB.sql b/OneMore/Commands/Tagging/HashtagsDB.sql index 9f2728e20b..cae264ad9f 100644 --- a/OneMore/Commands/Tagging/HashtagsDB.sql +++ b/OneMore/Commands/Tagging/HashtagsDB.sql @@ -1,5 +1,5 @@ CREATE TABLE IF NOT EXISTS hashtag_scanner (scannerID INTEGER PRIMARY KEY UNIQUE NOT NULL, version NUMERIC (12) UNIQUE NOT NULL, scanTime TEXT NOT NULL); -CREATE TABLE IF NOT EXISTS hashtag (tag TEXT NOT NULL, moreID TEXT NOT NULL, objectID TEXT NOT NULL, snippet TEXT, lastModified TEXT NOT NULL, PRIMARY KEY (tag, objectID), CONSTRAINT FK_moreID FOREIGN KEY (moreID) REFERENCES hashtag_page (moreID) ON DELETE CASCADE); +CREATE TABLE IF NOT EXISTS hashtag (tag TEXT NOT NULL, moreID TEXT NOT NULL, objectID TEXT NOT NULL, snippet TEXT, documentOrder INTEGER DEFAULT (0), lastModified TEXT NOT NULL, PRIMARY KEY (tag, objectID), CONSTRAINT FK_moreID FOREIGN KEY (moreID) REFERENCES hashtag_page (moreID) ON DELETE CASCADE); CREATE TABLE IF NOT EXISTS hashtag_page (moreID PRIMARY KEY, pageID TEXT NOT NULL, titleID TEXT, notebookID TEXT NOT NULL, sectionID TEXT NOT NULL, path TEXT, name TEXT); CREATE TABLE IF NOT EXISTS hashtag_notebook (notebookID TEXT PRIMARY KEY, name TEXT, lastModified TEXT NOT NULL DEFAULT ''); CREATE INDEX IF NOT EXISTS IDX_moreID ON hashtag (moreID);