From 49179d4e3d61166c09c8c540250ba2e35234d49c Mon Sep 17 00:00:00 2001 From: Karl Voit Date: Thu, 2 Aug 2018 21:27:04 +0200 Subject: [PATCH] added "--hardlinks" and more debug output for scanning large hierarchies --- README.org | 64 +++++++++++++++++++++++--------------------- filetags/__init__.py | 44 +++++++++++++++++++++--------- 2 files changed, 66 insertions(+), 42 deletions(-) diff --git a/README.org b/README.org index 59ed774..7cdcb03 100644 --- a/README.org +++ b/README.org @@ -83,19 +83,19 @@ If you use the GitHub sources (and not pip), the executable is ** Usage #+BEGIN_SRC sh :results output :wrap src -./filetags/__init__.py --help +./filetags/__init__.py --help | sed 'sX/home/vkX\$HOMEX' #+END_SRC #+BEGIN_src -usage: filetags [-h] [-t "STRING WITH TAGS"] [--remove] [-i] - [-R] [-s] [-f] - [--filebrowser PATH_TO_FILEBROWSER] [--tagtrees] - [--tagtrees-handle-no-tag "treeroot" | "ignore" | "FOLDERNAME"] - [--tagtrees-link-missing-mutual-tagged-items] - [--tagtrees-dir ] - [--tagtrees-depth TAGTREES_DEPTH] [--ln] [--la] - [--lu] [--tag-gardening] [-v] [-q] [--version] - [FILE [FILE ...]] +usage: ./filetags/__init__.py [-h] [-t "STRING WITH TAGS"] [--remove] [-i] + [-R] [-s] [--hardlinks] [-f] + [--filebrowser PATH_TO_FILEBROWSER] [--tagtrees] + [--tagtrees-handle-no-tag "treeroot" | "ignore" | "FOLDERNAME"] + [--tagtrees-link-missing-mutual-tagged-items] + [--tagtrees-dir ] + [--tagtrees-depth TAGTREES_DEPTH] [--ln] [--la] + [--lu] [--tag-gardening] [-v] [-q] [--version] + [FILE [FILE ...]] This tool adds or removes simple tags to/from file names. @@ -131,32 +131,35 @@ positional arguments: optional arguments: -h, --help show this help message and exit -t "STRING WITH TAGS", --tags "STRING WITH TAGS" - one or more tags (in quotes, separated by spaces) to + One or more tags (in quotes, separated by spaces) to add/remove - --remove remove tags from (instead of adding to) file name(s) - -i, --interactive interactive mode: ask for (a)dding or (r)emoving and + --remove Remove tags from (instead of adding to) file name(s) + -i, --interactive Interactive mode: ask for (a)dding or (r)emoving and name of tag(s) - -R, --recursive recursively go through the current directory and all + -R, --recursive Recursively go through the current directory and all of its subdirectories. Implemented for --tag-gardening and --tagtrees - -s, --dryrun enable dryrun mode: just simulate what would happen, + -s, --dryrun Enable dryrun mode: just simulate what would happen, do not modify files - -f, --filter ask for list of tags and generate links in - "/home/USER/.filetags_tagfilter" containing links to all + --hardlinks Use hard links instead of symbolic links. This is + ignored on Windows systems. Note that renaming link + originals when tagging does not work with hardlinks. + -f, --filter Ask for list of tags and generate links in + "$HOME/.filetags_tagfilter" containing links to all files with matching tags and start the filebrowser. Target directory can be overridden by --tagtrees-dir. --filebrowser PATH_TO_FILEBROWSER - use this option to override the tool to view/manage + Use this option to override the tool to view/manage files (for --filter; default: geeqie). Use "none" to omit the default one. --tagtrees This generates nested directories in - "/home/vk/.filetags_tagfilter" for each combination of + "$HOME/.filetags_tagfilter" for each combination of tags up to a limit of 2. Target directory can be overridden by --tagtrees-dir. Please note that this may take long since it relates exponentially to the - number of tags involved. See also http://Karl- - Voit.at/tagstore/ and http://Karl- - Voit.at/tagstore/downloads/Voit2012b.pdf + number of tags involved. Can be combined with + --filter. See also http://Karl-Voit.at/tagstore/ and + http://Karl-Voit.at/tagstore/downloads/Voit2012b.pdf --tagtrees-handle-no-tag "treeroot" | "ignore" | "FOLDERNAME" When tagtrees are created, this parameter defines how to handle items that got no tag at all. The value @@ -178,7 +181,7 @@ optional arguments: --tagtrees-dir When tagtrees are created, this parameter overrides the default target directory - "/home/vk/.filetags_tagfilter" with a user-defined + "$HOME/.filetags_tagfilter" with a user-defined one. It has to be an empty directory or a non-existing directory which will be created. This also overrides the default directory for --filter. @@ -191,24 +194,24 @@ optional arguments: instead of symbolic links) the performance is really slow. Choose wisely. --ln, --list-tags-by-number - list all file-tags sorted by their number of use + List all file-tags sorted by their number of use --la, --list-tags-by-alphabet - list all file-tags sorted by their name + List all file-tags sorted by their name --lu, --list-tags-unknown-to-vocabulary - list all file-tags which are found in file names but + List all file-tags which are found in file names but are not part of .filetags --tag-gardening This is for getting an overview on tags that might require to be renamed (typos, singular/plural, ...). See also http://www.webology.org/2008/v5n3/a58.html - -v, --verbose enable verbose mode - -q, --quiet enable quiet mode - --version display version and exit + -v, --verbose Enable verbose mode + -q, --quiet Enable quiet mode + --version Display version and exit :copyright: (c) by Karl Voit :license: GPL v3 or any later version :URL: https://github.com/novoid/filetags :bugreports: via github or -:version: 2018-04-25 +:version: 2018-08-02 · #+END_src @@ -322,6 +325,7 @@ For =--filter= and =--tagtrees= examples see sections below. - added hints to [[https://github.com/novoid/integratethis][=integratethis=]] to ease the Windows Explorer integration - 2018-07-23: =--tagtrees== can now be filtered with =--filter= +- 2018-08-02: added option =--hardlinks= as an alternative for non-Windows systems ** Get the most out of filetags: controlled vocabulary ~.filetags~ :PROPERTIES: diff --git a/filetags/__init__.py b/filetags/__init__.py index 8ad0d14..9ea6c32 100755 --- a/filetags/__init__.py +++ b/filetags/__init__.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -PROG_VERSION = "Time-stamp: <2018-08-02 20:38:00 vk>" +PROG_VERSION = "Time-stamp: <2018-08-02 21:15:14 vk>" # TODO: # - fix parts marked with «FIXXME» @@ -176,6 +176,10 @@ def save_import(library): parser.add_argument("-s", "--dryrun", dest="dryrun", action="store_true", help="Enable dryrun mode: just simulate what would happen, do not modify files") +parser.add_argument("--hardlinks", dest="hardlinks", action="store_true", + help="Use hard links instead of symbolic links. This is ignored on Windows systems. " + + "Note that renaming link originals when tagging does not work with hardlinks.") + parser.add_argument("-f", "--filter", dest="tagfilter", action="store_true", help="Ask for list of tags and generate links in \"" + TAGFILTER_DIRECTORY + "\" " + "containing links to all files with matching tags and start the filebrowser. " + @@ -960,8 +964,7 @@ def handle_file_and_optional_link(orig_filename, tags, do_remove, do_filter, dry def create_link(source, destination): - """ - On non-Windows systems, a symbolic link is created that links + """On non-Windows systems, a symbolic link is created that links source (existing file) to destination (the new symlink). On Windows systems a lnk-file is created instead. @@ -974,8 +977,12 @@ def create_link(source, destination): This is the reason why the "--tagrees" option does perform really bad on Windows. And "really bad" means factor 10 to 1000. I measured it. + The command link option "--hardlinks" switches to hardlinks. This + is ignored on Windows systems. + @param source: a file name of the source, an existing file @param destination: a file name for the link which is about to be created + """ logging.debug('create_link(' + source + ', ' + destination + ') called') @@ -994,8 +1001,16 @@ def create_link(source, destination): shortcut.save() else: - # for normal operating systems, use good old high-performing symbolic links: - os.symlink(source, destination) + # for normal operating systems: + if options.hardlinks: + try: + # use good old high-performing hard links: + os.link(source, destination) + except OSError: + logging.warning('Due to cross-device links, I had to use a symbolic link as a fall-back for: ' + source) + else: + # use good old high-performing symbolic links: + os.symlink(source, destination) def handle_file(orig_filename, tags, do_remove, do_filter, dryrun): @@ -1223,12 +1238,12 @@ def get_tags_from_files_and_subfolders(startdir=os.getcwd(), use_cache=True): (startdir, str(len(list(cache_of_tags_by_folder.keys()))))) if use_cache and startdir in list(cache_of_tags_by_folder.keys()): - logging.debug("found " + str(len(cache_of_tags_by_folder[startdir])) + + logging.debug("get_tags_from_files_and_subfolders: found " + str(len(cache_of_tags_by_folder[startdir])) + " tags in cache for directory: " + startdir) return cache_of_tags_by_folder[startdir] elif use_cache and startdir in cache_of_files_with_metadata.keys(): - logging.debug('using cache_of_files_with_metadata instead of traversing file system again') + logging.debug('get_tags_from_files_and_subfolders: using cache_of_files_with_metadata instead of traversing file system again') cachedata = cache_of_files_with_metadata[startdir] # FIXXME: check if tags are extracted from dirnames as in traversal algorithm below @@ -1258,7 +1273,7 @@ def get_tags_from_files_and_subfolders(startdir=os.getcwd(), use_cache=True): options.tag_gardening)): break # do not loop - logging.debug("Writing " + str(len(list(tags.keys()))) + + logging.debug("get_tags_from_files_and_subfolders: Writing " + str(len(list(tags.keys()))) + " tags in cache for directory: " + startdir) if use_cache: cache_of_tags_by_folder[startdir] = tags @@ -1921,12 +1936,17 @@ def get_files_of_directory(directory): """ files = [] + logging.debug('get_files_of_directory(' + directory + ') called and traversing file system ...') for (dirpath, dirnames, filenames) in os.walk(directory): + if len(files) % 5000 == 0 and len(files) > 0: + # while debugging a large hierarchy scan, I'd like to print out some stuff in-between scanning + logging.info('found ' + str(len(files)) + ' files so far ... counting ...') if options.recursive: files.extend([os.path.join(dirpath, x) for x in filenames]) else: files.extend(filenames) break + logging.debug('get_files_of_directory(' + directory + ') finished with ' + str(len(files)) + ' items') return files @@ -2086,6 +2106,7 @@ def generate_tagtrees(directory, maxdepth, ignore_nontagged, nontagged_subdir, l 'even simple to find and catch while testing for me either. Or was it? Make an educated guess. :-)') if filtertags: + logging.debug('generate_tagtrees: filtering tags ...') files = filter_files_matching_tags(files, filtertags) if len(files) == 0 and not options.recursive: @@ -2099,7 +2120,7 @@ def generate_tagtrees(directory, maxdepth, ignore_nontagged, nontagged_subdir, l controlled_vocabulary_filename = locate_file_in_cwd_and_parent_directories(os.getcwd(), CONTROLLED_VOCABULARY_FILENAME) if controlled_vocabulary_filename: - logging.debug('I found controlled_vocabulary_filename "' + + logging.debug('generate_tagtrees: I found controlled_vocabulary_filename "' + controlled_vocabulary_filename + '" which I\'m going to link to the tagtrees folder') if not options.dryrun: @@ -2108,7 +2129,7 @@ def generate_tagtrees(directory, maxdepth, ignore_nontagged, nontagged_subdir, l CONTROLLED_VOCABULARY_FILENAME)) else: - logging.debug('I did not find a controlled_vocabulary_filename') + logging.debug('generate_tagtrees: I did not find a controlled_vocabulary_filename') logging.info('Creating tagtrees and their links. It may take a while … ' + '(exponentially with respect to number of tags)') @@ -2142,7 +2163,6 @@ def create_tagtrees_dir(basedirectory, tagpermutation): filename, dirname, \ basename, basename_without_lnk = split_up_filename(currentfile[1]) - logging.debug('generate_tagtrees: handling file "' + filename + '" …') if len(tags_of_currentfile) == 0: @@ -2224,7 +2244,7 @@ def create_tagtrees_dir(basedirectory, tagpermutation): no_uniqueset_tag_found_dir = os.path.join(directory, 'no-' + ("-").join(unique_tagset)) # example: "no-draft-final" if not os.path.isdir(no_uniqueset_tag_found_dir): - logging.debug('creating non-existent no_uniqueset_tag_found_dir "%s" ...' % + logging.debug('generate_tagtrees: creating non-existent no_uniqueset_tag_found_dir "%s" ...' % str(no_uniqueset_tag_found_dir)) if not options.dryrun: os.makedirs(no_uniqueset_tag_found_dir)