Skip to content

Commit

Permalink
1405 tags in document-order (#1477)
Browse files Browse the repository at this point in the history
* Capture tags in document order
#1405

* rewrite tags to reestablish full doc order amonst all
#1405
  • Loading branch information
stevencohn committed Jul 14, 2024
1 parent 6b1a3a9 commit 93891dd
Show file tree
Hide file tree
Showing 5 changed files with 152 additions and 58 deletions.
6 changes: 6 additions & 0 deletions OneMore/Commands/Tagging/Hashtag.cs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@ internal class Hashtag
public string Snippet { get; set; }


/// <summary>
/// Gets the document-order index value of the snippet on the page
/// </summary>
public int DocumentOrder { get; set; }


/// <summary>
/// Gets whether this instance was the result of a direct search hit or is
/// just another tag on the same page
Expand Down
4 changes: 4 additions & 0 deletions OneMore/Commands/Tagging/HashtagPageScanner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ internal class HashtagPageScanner
private readonly SearchAndReplaceEditor editor;
private readonly string pageID;
private bool keepTags;
private int documentOrder;


/// <summary>
Expand Down Expand Up @@ -102,6 +103,8 @@ public Hashtags Scan()

if (paragraphs.Any())
{
documentOrder = 0;

foreach (var paragraph in paragraphs)
{
var count = tags.Count;
Expand Down Expand Up @@ -167,6 +170,7 @@ private void ScanParagraph(XElement paragraph, Hashtags tags)
PageID = pageID,
ObjectID = objectID,
Snippet = context,
DocumentOrder = documentOrder++,
LastModified = lastModifiedTime
});
}
Expand Down
168 changes: 131 additions & 37 deletions OneMore/Commands/Tagging/HashtagProvider.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
// Copyright © 2023 Steven M Cohn. All rights reserved.
//************************************************************************************************

#pragma warning disable S1133 // Deprecated code should be removed

namespace River.OneMoreAddIn.Commands
{
using River.OneMoreAddIn.Properties;
Expand Down Expand Up @@ -362,20 +364,20 @@ private int Upgrade3to4(SQLiteConnection con)
logger.Start();

using var cmd = con.CreateCommand();
cmd.CommandType = CommandType.Text;

using var transaction = con.BeginTransaction();

try
{
logger.WriteLine("updating table hashtag_notebook");

cmd.CommandType = CommandType.Text;
cmd.CommandText =
"ALTER TABLE hashtag_notebook " +
"ADD COLUMN lastModified TEXT NOT NULL default('')";

cmd.ExecuteNonQuery();

cmd.CommandType = CommandType.Text;
cmd.CommandText =
"UPDATE hashtag_notebook AS nb SET lastModified = COALESCE(" +
"(SELECT MAX(t.lastModified) " +
Expand All @@ -389,11 +391,68 @@ private int Upgrade3to4(SQLiteConnection con)
}
catch (Exception exc)
{
transaction.Rollback();
logger.End();
logger.WriteLine("error updating table hashtag_notebook", exc);
return 0;
}

try
{
logger.WriteLine("updating table hashtag");

cmd.CommandText =
"CREATE TABLE hashtag_v4 " +
"(tag TEXT NOT NULL, moreID TEXT NOT NULL, objectID TEXT NOT NULL, " +
"snippet TEXT, documentOrder INTEGER DEFAULT (0), lastModified TEXT NOT NULL, " +
"PRIMARY KEY (tag, objectID), " +
"CONSTRAINT FK_moreID FOREIGN KEY (moreID) REFERENCES hashtag_page (moreID) " +
"ON DELETE CASCADE)";

cmd.ExecuteNonQuery();

cmd.CommandText =
"INSERT INTO hashtag_v4 (tag, moreID, objectID, snippet, lastModified) " +
"SELECT tag, moreID, objectID, snippet, lastModified " +
"FROM hashtag";

cmd.ExecuteNonQuery();

cmd.CommandText = "DROP INDEX IDX_moreID";
cmd.ExecuteNonQuery();

cmd.CommandText = "DROP INDEX IDX_tag";
cmd.ExecuteNonQuery();

cmd.CommandText = "DROP TABLE hashtag";
cmd.ExecuteNonQuery();

cmd.CommandText = "DROP VIEW page_hashtags";
cmd.ExecuteNonQuery();

cmd.CommandText = "ALTER TABLE hashtag_v4 RENAME TO hashtag";
cmd.ExecuteNonQuery();

cmd.CommandText = "CREATE INDEX IDX_moreID ON hashtag(moreID)";
cmd.ExecuteNonQuery();

cmd.CommandText = "CREATE INDEX IDX_tag ON hashtag(tag)";
cmd.ExecuteNonQuery();

cmd.CommandText = "CREATE VIEW IF NOT EXISTS page_hashtags (moreID, tags) AS " +
"SELECT t.moreID, group_concat(DISTINCT(t.tag)) AS tags " +
"FROM hashtag t GROUP BY t.moreID";

cmd.ExecuteNonQuery();
}
catch (Exception exc)
{
transaction.Rollback();
logger.End();
logger.WriteLine("error updating table hashtag", exc);
return 0;
}

if (!UpgradeSchemaVersion(cmd, transaction, version))
{
return 0;
Expand Down Expand Up @@ -522,6 +581,7 @@ public void DeletePhantoms(List<string> knownIDs, string sectionID, string secti
/// Deletes the specified tags
/// </summary>
/// <param name="tags">A collection of Hashtags</param>
[Obsolete("Was used as part of original tag resolution logic")]
public void DeleteTags(Hashtags tags)
{
using var cmd = con.CreateCommand();
Expand Down Expand Up @@ -700,7 +760,8 @@ public Hashtags ReadPageTags(string pageID)
"p.notebookID, p.sectionID, t.lastModified " +
"FROM hashtag t " +
"JOIN hashtag_page p ON p.moreID = t.moreID " +
"WHERE p.pageID = @p";
"WHERE p.pageID = @p " +
"ORDER BY t.documentOrder";

return ReadTags(sql,
new SQLiteParameter[] { new("@p", pageID) }
Expand Down Expand Up @@ -792,7 +853,8 @@ public Hashtags SearchTags(

var builder = new StringBuilder();
builder.Append("SELECT t.tag, t.moreID, p.pageID, p.titleID, t.objectID, ");
builder.Append("p.notebookID, p.sectionID, t.lastModified, t.snippet, p.path, p.name ");
builder.Append("p.notebookID, p.sectionID, t.lastModified, t.snippet, ");
builder.Append("t.documentOrder, p.path, p.name ");
builder.Append("FROM hashtag t ");
builder.Append("JOIN hashtag_page p ON t.moreID = p.moreID ");

Expand All @@ -813,7 +875,7 @@ public Hashtags SearchTags(
var where = query.BuildFormattedWhereClause(criteria, out parsed);
builder.Append(where);

builder.Append(" ORDER BY p.path, p.name, t.tag");
builder.Append(" ORDER BY p.path, p.name, t.documentOrder");
var sql = builder.ToString();

logger.Verbose(sql);
Expand Down Expand Up @@ -866,8 +928,9 @@ private Hashtags ReadTags(string sql, SQLiteParameter[] parameters = null)
if (reader.FieldCount > 7 && sql.Contains("snippet"))
{
tag.Snippet = reader[8] is DBNull ? null : reader.GetString(8);
tag.HierarchyPath = reader[9] is DBNull ? null : reader.GetString(9);
tag.PageTitle = reader[10] is DBNull ? null : reader.GetString(10);
tag.DocumentOrder = reader[9] is DBNull ? 0 : reader.GetInt32(9);
tag.HierarchyPath = reader[10] is DBNull ? null : reader.GetString(10);
tag.PageTitle = reader[11] is DBNull ? null : reader.GetString(11);
}

tags.Add(tag);
Expand Down Expand Up @@ -1073,52 +1136,83 @@ public void WriteScanTime()
/// Records the given tags.
/// </summary>
/// <param name="tags">A collection of Hashtags</param>
public void WriteTags(Hashtags tags)
public void WriteTags(string pageID, Hashtags tags)
{
using var tagcmd = con.CreateCommand();
tagcmd.CommandText = "INSERT INTO hashtag " +
"(tag, moreID, objectID, snippet, lastModified) VALUES (@t, @m, @o, @c, @s)";
using var transaction = con.BeginTransaction();

tagcmd.CommandType = CommandType.Text;
tagcmd.Parameters.Add("@t", DbType.String);
tagcmd.Parameters.Add("@m", DbType.String);
tagcmd.Parameters.Add("@o", DbType.String);
tagcmd.Parameters.Add("@c", DbType.String);
tagcmd.Parameters.Add("@s", DbType.String);
using var cmd = con.CreateCommand();
cmd.CommandType = CommandType.Text;

using var transaction = con.BeginTransaction();
foreach (var tag in tags)
// first purge all existing tags for page...

cmd.CommandText = "DELETE FROM HASHTAG WHERE moreID = " +
"(SELECT moreID FROM hashtag_page WHERE pageID = @p);";

cmd.Parameters.AddWithValue("@p", pageID);

try
{
cmd.ExecuteNonQuery();
}
catch (Exception exc)
{
logger.Verbose($"writing tag {tag.Tag}");
transaction.Rollback();
logger.WriteLine($"error deleting tags {pageID}", exc);
return;
}

tagcmd.Parameters["@t"].Value = tag.Tag;
tagcmd.Parameters["@m"].Value = tag.MoreID;
tagcmd.Parameters["@o"].Value = tag.ObjectID;
tagcmd.Parameters["@c"].Value = tag.Snippet;
tagcmd.Parameters["@s"].Value = tag.LastModified;
// now add (re-add) newly discovered tags for page, reestablishing doc order...

try
{
tagcmd.ExecuteNonQuery();
}
catch (Exception exc)
if (tags.Any())
{
cmd.CommandText = "INSERT INTO hashtag " +
"(tag, moreID, objectID, snippet, documentOrder, lastModified) " +
"VALUES (@t, @m, @o, @c, @d, @s)";

cmd.Parameters.Clear();
cmd.Parameters.Add("@t", DbType.String);
cmd.Parameters.Add("@m", DbType.String);
cmd.Parameters.Add("@o", DbType.String);
cmd.Parameters.Add("@c", DbType.String);
cmd.Parameters.Add("@d", DbType.Int32);
cmd.Parameters.Add("@s", DbType.String);

foreach (var tag in tags)
{
logger.WriteLine($"error writing tag {tag.Tag} on {tag.PageID}");
logger.WriteLine($"error moreID=[{tag.MoreID}]");
logger.WriteLine($"error objectID=[{tag.ObjectID}]");
logger.WriteLine($"error Snippet=[{tag.Snippet}]");
logger.WriteLine($"error lastModified=[{tag.LastModified}]");
logger.WriteLine(exc);
logger.Verbose($"writing tag {tag.Tag}");

cmd.Parameters["@t"].Value = tag.Tag;
cmd.Parameters["@m"].Value = tag.MoreID;
cmd.Parameters["@o"].Value = tag.ObjectID;
cmd.Parameters["@c"].Value = tag.Snippet;
cmd.Parameters["@d"].Value = tag.DocumentOrder;
cmd.Parameters["@s"].Value = tag.LastModified;

try
{
cmd.ExecuteNonQuery();
}
catch (Exception exc)
{
logger.WriteLine($"error writing tag {tag.Tag} on {tag.PageID}");
logger.WriteLine($"error moreID=[{tag.MoreID}]");
logger.WriteLine($"error objectID=[{tag.ObjectID}]");
logger.WriteLine($"error Snippet=[{tag.Snippet}]");
logger.WriteLine($"error lastModified=[{tag.LastModified}]");
logger.WriteLine(exc);
}
}
}

CleanupPages();

try
{
transaction.Commit();
}
catch (Exception exc)
{
ReportError("error writing tags", tagcmd, exc);
ReportError("error writing tags", cmd, exc);
}
}

Expand Down
30 changes: 10 additions & 20 deletions OneMore/Commands/Tagging/HashtagScanner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,10 @@ private async Task<bool> ScanPage(

var candidates = scanner.Scan();

// saved tags will be in document-order but not have DocumentOrder set,
// we can rely on tag + objectID to continue resolving
var saved = provider.ReadPageTags(pageID);

var discovered = new Hashtags();
var updated = new Hashtags();

Expand All @@ -337,7 +340,9 @@ private async Task<bool> ScanPage(
}
else
{
if (forceThru || candidate.LastModified.CompareTo(lastTime) > 0)
if (forceThru ||
candidate.LastModified.CompareTo(lastTime) > 0 ||
candidate.DocumentOrder != found.DocumentOrder)
{
updated.Add(candidate);
}
Expand All @@ -348,26 +353,11 @@ private async Task<bool> ScanPage(

var dirtyPage = false;

if (saved.Any())
{
// remaining saved entries were not matched with candidates
// on page so should be deleted
provider.DeleteTags(saved);
dirtyPage = true;
}

if (updated.Any())
if (saved.Any() || updated.Any() || discovered.Any())
{
// tag context updated since last scan
provider.UpdateSnippet(updated);
dirtyPage = true;
}

if (discovered.Any())
{
// discovered entries are new on the page and not found in saved

provider.WriteTags(discovered);
// much simpler to purge old and rewrite new, even if that means recreating a
// few copied records. should scale without issue into the many tens-of-tags
provider.WriteTags(pageID, candidates);
dirtyPage = true;
}

Expand Down
2 changes: 1 addition & 1 deletion OneMore/Commands/Tagging/HashtagsDB.sql
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
CREATE TABLE IF NOT EXISTS hashtag_scanner (scannerID INTEGER PRIMARY KEY UNIQUE NOT NULL, version NUMERIC (12) UNIQUE NOT NULL, scanTime TEXT NOT NULL);
CREATE TABLE IF NOT EXISTS hashtag (tag TEXT NOT NULL, moreID TEXT NOT NULL, objectID TEXT NOT NULL, snippet TEXT, lastModified TEXT NOT NULL, PRIMARY KEY (tag, objectID), CONSTRAINT FK_moreID FOREIGN KEY (moreID) REFERENCES hashtag_page (moreID) ON DELETE CASCADE);
CREATE TABLE IF NOT EXISTS hashtag (tag TEXT NOT NULL, moreID TEXT NOT NULL, objectID TEXT NOT NULL, snippet TEXT, documentOrder INTEGER DEFAULT (0), lastModified TEXT NOT NULL, PRIMARY KEY (tag, objectID), CONSTRAINT FK_moreID FOREIGN KEY (moreID) REFERENCES hashtag_page (moreID) ON DELETE CASCADE);
CREATE TABLE IF NOT EXISTS hashtag_page (moreID PRIMARY KEY, pageID TEXT NOT NULL, titleID TEXT, notebookID TEXT NOT NULL, sectionID TEXT NOT NULL, path TEXT, name TEXT);
CREATE TABLE IF NOT EXISTS hashtag_notebook (notebookID TEXT PRIMARY KEY, name TEXT, lastModified TEXT NOT NULL DEFAULT '');
CREATE INDEX IF NOT EXISTS IDX_moreID ON hashtag (moreID);
Expand Down

0 comments on commit 93891dd

Please sign in to comment.