Skip to content

Commit 20d788a

Browse files
fix: refactor and fix bugs with missing_files.py (#739)
* fix: relink unlinked entry to existing entry without sql error (#720) * edited and added db functions get_entry_full_by_path & merge_entries * implemented edge case for entry existing on relinking * added test for merge_entries * fix: don't remove item while iterating over list * fix: catch `FileNotFoundError` for unlinked raw files * refactor: rename methods and variables in missing_files.py * refactor: rename `missing_files_count` to `missing_file_entries_count` --------- Co-authored-by: mashed5894 <mashed5894@gmail.com>
1 parent 458925f commit 20d788a

File tree

8 files changed

+140
-37
lines changed

8 files changed

+140
-37
lines changed

tagstudio/src/core/library/alchemy/library.py

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -490,6 +490,32 @@ def get_entries_full(self, entry_ids: list[int] | set[int]) -> Iterator[Entry]:
490490
yield entry
491491
session.expunge(entry)
492492

493+
def get_entry_full_by_path(self, path: Path) -> Entry | None:
494+
"""Get the entry with the corresponding path."""
495+
with Session(self.engine) as session:
496+
stmt = select(Entry).where(Entry.path == path)
497+
stmt = (
498+
stmt.outerjoin(Entry.text_fields)
499+
.outerjoin(Entry.datetime_fields)
500+
.options(selectinload(Entry.text_fields), selectinload(Entry.datetime_fields))
501+
)
502+
stmt = (
503+
stmt.outerjoin(Entry.tags)
504+
.outerjoin(TagAlias)
505+
.options(
506+
selectinload(Entry.tags).options(
507+
joinedload(Tag.aliases),
508+
joinedload(Tag.parent_tags),
509+
)
510+
)
511+
)
512+
entry = session.scalar(stmt)
513+
if not entry:
514+
return None
515+
session.expunge(entry)
516+
make_transient(entry)
517+
return entry
518+
493519
@property
494520
def entries_count(self) -> int:
495521
with Session(self.engine) as session:
@@ -698,7 +724,13 @@ def search_tags(
698724

699725
return res
700726

701-
def update_entry_path(self, entry_id: int | Entry, path: Path) -> None:
727+
def update_entry_path(self, entry_id: int | Entry, path: Path) -> bool:
728+
"""Set the path field of an entry.
729+
730+
Returns True if the action succeeded and False if the path already exists.
731+
"""
732+
if self.has_path_entry(path):
733+
return False
702734
if isinstance(entry_id, Entry):
703735
entry_id = entry_id.id
704736

@@ -715,6 +747,7 @@ def update_entry_path(self, entry_id: int | Entry, path: Path) -> None:
715747

716748
session.execute(update_stmt)
717749
session.commit()
750+
return True
718751

719752
def remove_tag(self, tag: Tag):
720753
with Session(self.engine, expire_on_commit=False) as session:
@@ -1185,6 +1218,18 @@ def mirror_entry_fields(self, *entries: Entry) -> None:
11851218
value=field.value,
11861219
)
11871220

1221+
def merge_entries(self, from_entry: Entry, into_entry: Entry) -> None:
1222+
"""Add fields and tags from the first entry to the second, and then delete the first."""
1223+
for field in from_entry.fields:
1224+
self.add_field_to_entry(
1225+
entry_id=into_entry.id,
1226+
field_id=field.type_key,
1227+
value=field.value,
1228+
)
1229+
tag_ids = [tag.id for tag in from_entry.tags]
1230+
self.add_tags_to_entry(into_entry.id, tag_ids)
1231+
self.remove_entries([from_entry.id])
1232+
11881233
@property
11891234
def tag_color_groups(self) -> dict[str, list[TagColorGroup]]:
11901235
"""Return every TagColorGroup in the library."""

tagstudio/src/core/utils/missing_files.py

Lines changed: 47 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,7 @@
44

55
import structlog
66
from src.core.library import Entry, Library
7-
8-
IGNORE_ITEMS = [
9-
"$recycle.bin",
10-
]
7+
from src.core.utils.refresh_dir import GLOBAL_IGNORE_SET
118

129
logger = structlog.get_logger()
1310

@@ -18,49 +15,73 @@ class MissingRegistry:
1815

1916
library: Library
2017
files_fixed_count: int = 0
21-
missing_files: list[Entry] = field(default_factory=list)
18+
missing_file_entries: list[Entry] = field(default_factory=list)
2219

2320
@property
24-
def missing_files_count(self) -> int:
25-
return len(self.missing_files)
21+
def missing_file_entries_count(self) -> int:
22+
return len(self.missing_file_entries)
2623

2724
def refresh_missing_files(self) -> Iterator[int]:
28-
"""Track the number of Entries that point to an invalid file path."""
29-
logger.info("refresh_missing_files running")
30-
self.missing_files = []
25+
"""Track the number of entries that point to an invalid filepath."""
26+
logger.info("[refresh_missing_files] Refreshing missing files...")
27+
self.missing_file_entries = []
3128
for i, entry in enumerate(self.library.get_entries()):
3229
full_path = self.library.library_dir / entry.path
3330
if not full_path.exists() or not full_path.is_file():
34-
self.missing_files.append(entry)
31+
self.missing_file_entries.append(entry)
3532
yield i
3633

37-
def match_missing_file(self, match_item: Entry) -> list[Path]:
38-
"""Try to find missing entry files within the library directory.
34+
def match_missing_file_entry(self, match_entry: Entry) -> list[Path]:
35+
"""Try and match unlinked file entries with matching results in the library directory.
3936
4037
Works if files were just moved to different subfolders and don't have duplicate names.
4138
"""
4239
matches = []
43-
for item in self.library.library_dir.glob(f"**/{match_item.path.name}"):
44-
if item.name == match_item.path.name: # TODO - implement IGNORE_ITEMS
45-
new_path = Path(item).relative_to(self.library.library_dir)
40+
for path in self.library.library_dir.glob(f"**/{match_entry.path.name}"):
41+
# Ensure matched file isn't in a globally ignored folder
42+
skip: bool = False
43+
for part in path.parts:
44+
if part in GLOBAL_IGNORE_SET:
45+
skip = True
46+
break
47+
if skip:
48+
continue
49+
if path.name == match_entry.path.name:
50+
new_path = Path(path).relative_to(self.library.library_dir)
4651
matches.append(new_path)
4752

53+
logger.info("[MissingRegistry] Matches", matches=matches)
4854
return matches
4955

50-
def fix_missing_files(self) -> Iterator[int]:
51-
"""Attempt to fix missing files by finding a match in the library directory."""
56+
def fix_unlinked_entries(self) -> Iterator[int]:
57+
"""Attempt to fix unlinked file entries by finding a match in the library directory."""
5258
self.files_fixed_count = 0
53-
for i, entry in enumerate(self.missing_files, start=1):
54-
item_matches = self.match_missing_file(entry)
59+
matched_entries: list[Entry] = []
60+
for i, entry in enumerate(self.missing_file_entries):
61+
item_matches = self.match_missing_file_entry(entry)
5562
if len(item_matches) == 1:
56-
logger.info("fix_missing_files", entry=entry, item_matches=item_matches)
57-
self.library.update_entry_path(entry.id, item_matches[0])
63+
logger.info(
64+
"[fix_unlinked_entries]",
65+
entry=entry.path.as_posix(),
66+
item_matches=item_matches[0].as_posix(),
67+
)
68+
if not self.library.update_entry_path(entry.id, item_matches[0]):
69+
try:
70+
match = self.library.get_entry_full_by_path(item_matches[0])
71+
entry_full = self.library.get_entry_full(entry.id)
72+
self.library.merge_entries(entry_full, match)
73+
except AttributeError:
74+
continue
5875
self.files_fixed_count += 1
59-
# remove fixed file
60-
self.missing_files.remove(entry)
76+
matched_entries.append(entry)
6177
yield i
6278

79+
for entry in matched_entries:
80+
self.missing_file_entries.remove(entry)
81+
6382
def execute_deletion(self) -> None:
64-
self.library.remove_entries(list(map(lambda missing: missing.id, self.missing_files)))
83+
self.library.remove_entries(
84+
list(map(lambda missing: missing.id, self.missing_file_entries))
85+
)
6586

66-
self.missing_files = []
87+
self.missing_file_entries = []

tagstudio/src/qt/modals/delete_unlinked.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def __init__(self, driver: "QtDriver", tracker: MissingRegistry):
4343
Translations.translate_qobject(
4444
self.desc_widget,
4545
"entries.unlinked.delete.confirm",
46-
count=self.tracker.missing_files_count,
46+
count=self.tracker.missing_file_entries_count,
4747
)
4848
self.desc_widget.setAlignment(Qt.AlignmentFlag.AlignCenter)
4949

@@ -75,12 +75,12 @@ def __init__(self, driver: "QtDriver", tracker: MissingRegistry):
7575
def refresh_list(self):
7676
self.desc_widget.setText(
7777
Translations.translate_formatted(
78-
"entries.unlinked.delete.confirm", count=self.tracker.missing_files_count
78+
"entries.unlinked.delete.confirm", count=self.tracker.missing_file_entries_count
7979
)
8080
)
8181

8282
self.model.clear()
83-
for i in self.tracker.missing_files:
83+
for i in self.tracker.missing_file_entries:
8484
item = QStandardItem(str(i.path))
8585
item.setEditable(False)
8686
self.model.appendRow(item)
@@ -90,7 +90,7 @@ def displayed_text(x):
9090
return Translations.translate_formatted(
9191
"entries.unlinked.delete.deleting_count",
9292
idx=x,
93-
count=self.tracker.missing_files_count,
93+
count=self.tracker.missing_file_entries_count,
9494
)
9595

9696
pw = ProgressWidget(

tagstudio/src/qt/modals/fix_unlinked.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ def set_missing_count(self, count: int | None = None):
129129
if count is not None:
130130
self.missing_count = count
131131
else:
132-
self.missing_count = self.tracker.missing_files_count
132+
self.missing_count = self.tracker.missing_file_entries_count
133133

134134
if self.missing_count < 0:
135135
self.search_button.setDisabled(True)

tagstudio/src/qt/modals/relink_unlinked.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,16 @@ def displayed_text(x):
2121
return Translations.translate_formatted(
2222
"entries.unlinked.relink.attempting",
2323
idx=x,
24-
missing_count=self.tracker.missing_files_count,
24+
missing_count=self.tracker.missing_file_entries_count,
2525
fixed_count=self.tracker.files_fixed_count,
2626
)
2727

2828
pw = ProgressWidget(
2929
label_text="",
3030
cancel_button_text=None,
3131
minimum=0,
32-
maximum=self.tracker.missing_files_count,
32+
maximum=self.tracker.missing_file_entries_count,
3333
)
3434
Translations.translate_with_setter(pw.setWindowTitle, "entries.unlinked.relink.title")
3535

36-
pw.from_iterable_function(self.tracker.fix_missing_files, displayed_text, self.done.emit)
36+
pw.from_iterable_function(self.tracker.fix_unlinked_entries, displayed_text, self.done.emit)

tagstudio/src/qt/widgets/preview/preview_thumb.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,7 @@ def _update_image(self, filepath: Path, ext: str) -> dict:
198198
except (
199199
rawpy._rawpy.LibRawIOError,
200200
rawpy._rawpy.LibRawFileUnsupportedError,
201+
FileNotFoundError,
201202
):
202203
pass
203204
elif MediaCategories.is_ext_in_category(

tagstudio/tests/macros/test_missing_files.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
CWD = pathlib.Path(__file__).parent
1010

1111

12+
# NOTE: Does this test actually work?
1213
@pytest.mark.parametrize("library", [TemporaryDirectory()], indirect=True)
1314
def test_refresh_missing_files(library: Library):
1415
registry = MissingRegistry(library=library)
@@ -20,10 +21,10 @@ def test_refresh_missing_files(library: Library):
2021
assert list(registry.refresh_missing_files()) == [0, 1]
2122

2223
# neither of the library entries exist
23-
assert len(registry.missing_files) == 2
24+
assert len(registry.missing_file_entries) == 2
2425

2526
# iterate through two files
26-
assert list(registry.fix_missing_files()) == [1, 2]
27+
assert list(registry.fix_unlinked_entries()) == [0, 1]
2728

2829
# `bar.md` should be relinked to new correct path
2930
results = library.search_library(FilterState.from_path("bar.md"))

tagstudio/tests/test_library.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,41 @@ def test_mirror_entry_fields(library: Library, entry_full):
309309
}
310310

311311

312+
def test_merge_entries(library: Library):
313+
a = Entry(
314+
folder=library.folder,
315+
path=Path("a"),
316+
fields=[
317+
TextField(type_key=_FieldID.AUTHOR.name, value="Author McAuthorson", position=0),
318+
TextField(type_key=_FieldID.DESCRIPTION.name, value="test description", position=2),
319+
],
320+
)
321+
b = Entry(
322+
folder=library.folder,
323+
path=Path("b"),
324+
fields=[TextField(type_key=_FieldID.NOTES.name, value="test note", position=1)],
325+
)
326+
try:
327+
ids = library.add_entries([a, b])
328+
entry_a = library.get_entry_full(ids[0])
329+
entry_b = library.get_entry_full(ids[1])
330+
tag_0 = library.add_tag(Tag(id=1000, name="tag_0"))
331+
tag_1 = library.add_tag(Tag(id=1001, name="tag_1"))
332+
tag_2 = library.add_tag(Tag(id=1002, name="tag_2"))
333+
library.add_tags_to_entry(ids[0], [tag_0.id, tag_2.id])
334+
library.add_tags_to_entry(ids[1], [tag_1.id])
335+
library.merge_entries(entry_a, entry_b)
336+
assert library.has_path_entry(Path("b"))
337+
assert not library.has_path_entry(Path("a"))
338+
fields = [field.value for field in entry_a.fields]
339+
assert "Author McAuthorson" in fields
340+
assert "test description" in fields
341+
assert "test note" in fields
342+
assert b.has_tag(tag_0) and b.has_tag(tag_1) and b.has_tag(tag_2)
343+
except AttributeError:
344+
AssertionError()
345+
346+
312347
def test_remove_tag_from_entry(library, entry_full):
313348
removed_tag_id = -1
314349
for tag in entry_full.tags:

0 commit comments

Comments
 (0)