github · tausbn · May 16, 2025 · Apr 29, 2025 · May 2, 2025 · Apr 30, 2025
@@ -0,0 +1,3 @@
+name: Test Config
+paths-ignore:
+  - "**/.*/**"
@@ -0,0 +1,6 @@
+|             name              |
++-------------------------------+
+| .hidden_file.py               |
+| another_non_hidden.py         |
+| foo.py                        |
+| visible_file_in_hidden_dir.py |
@@ -0,0 +1,4 @@
+|      name       |
++-----------------+
+| .hidden_file.py |
+| foo.py          |
@@ -0,0 +1,3 @@
+import python
+
+select any(File f).getShortName() as name order by name
@@ -0,0 +1 @@
+print(42)
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+set -Eeuo pipefail # see https://vaneyckt.io/posts/safer_bash_scripts_with_set_euxo_pipefail/
+
+set -x
+
+CODEQL=${CODEQL:-codeql}
+
+SCRIPTDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+cd "$SCRIPTDIR"
+
+rm -rf db db-skipped
+
+# Test 1: Default behavior should be to extract files in hidden directories
+$CODEQL database create db --language python --source-root repo_dir/
+$CODEQL query run --database db query.ql > query-default.actual
+diff query-default.expected query-default.actual
+
+# Test 2: The default behavior can be overridden by setting `paths-ignore` in the config file
+$CODEQL database create db-skipped --language python --source-root repo_dir/ --codescanning-config=config.yml
+$CODEQL query run --database db-skipped query.ql > query-skipped.actual
+diff query-skipped.expected query-skipped.actual
+
+rm -rf db db-skipped
@@ -41,19 +41,27 @@ def glob_part_to_regex(glob, add_sep):
 
 def glob_to_regex(glob, prefix=""):
     '''Convert entire glob to a compiled regex'''
+    # When the glob ends in `/`, we need to remember this so that we don't accidentally add an
+    # extra separator to the final regex.
+    end_sep = "" if glob.endswith("/") else SEP
     glob = glob.strip().strip("/")
     parts = glob.split("/")
     #Trailing '**' is redundant, so strip it off.
     if parts[-1] == "**":
         parts = parts[:-1]
         if not parts:
             return ".*"
+    # The `glob.strip("/")` call above will have removed all trailing slashes, but if there was at
+    # least one trailing slash, we want there to be an extra part, so we add it explicitly here in
+    # that case, using the emptyness of `end_sep` as a proxy.
+    if end_sep == "":
+        parts += [""]
     parts = [ glob_part_to_regex(escape(p), True) for p in parts[:-1] ] + [ glob_part_to_regex(escape(parts[-1]), False) ]
     # we need to escape the prefix, specifically because on windows the prefix will be
     # something like `C:\\folder\\subfolder\\` and without escaping the
     # backslash-path-separators will get interpreted as regex escapes (which might be
     # invalid sequences, causing the extractor to crash)
-    full_pattern = escape(prefix) + ''.join(parts) + "(?:" + SEP + ".*|$)"
+    full_pattern = escape(prefix) + ''.join(parts) + "(?:" + end_sep + ".*|$)"
     return re.compile(full_pattern)
 
 def filter_from_pattern(pattern, prev_filter, prefix):

@@ -83,46 +83,21 @@ def _treewalk(self, path):
                 self.logger.debug("Ignoring %s (symlink)", fullpath)
                 continue
             if isdir(fullpath):
-                if fullpath in self.exclude_paths or is_hidden(fullpath):
-                    if is_hidden(fullpath):
-                        self.logger.debug("Ignoring %s (hidden)", fullpath)
-                    else:
-                        self.logger.debug("Ignoring %s (excluded)", fullpath)
-                else:
-                    empty = True
-                    for item in self._treewalk(fullpath):
-                        yield item
-                        empty = False
-                    if not empty:
-                        yield fullpath
+                if fullpath in self.exclude_paths:
+                    self.logger.debug("Ignoring %s (excluded)", fullpath)
+                    continue
+
+                empty = True
+                for item in self._treewalk(fullpath):
+                    yield item
+                    empty = False
+                if not empty:
+                    yield fullpath
             elif self.filter(fullpath):
                 yield fullpath
             else:
                 self.logger.debug("Ignoring %s (filter)", fullpath)
 
-
-if os.name== 'nt':
-    import ctypes
-
-    def is_hidden(path):
-        #Magical windows code
-        try:
-            attrs = ctypes.windll.kernel32.GetFileAttributesW(str(path))
-            if attrs == -1:
-                return False
-            if attrs&2:
-                return True
-        except Exception:
-            #Not sure what to log here, probably best to carry on.
-            pass
-        return os.path.basename(path).startswith(".")
-
-else:
-
-    def is_hidden(path):
-        return os.path.basename(path).startswith(".")
-
-
 def exclude_filter_from_options(options):
     if options.exclude_package:
         choices = '|'.join(mod.replace('.', r'\.') for mod in options.exclude_package)

@@ -10,7 +10,7 @@
 
 #Semantic version of extractor.
 #Update this if any changes are made
-VERSION = "7.1.2"
+VERSION = "7.1.3"
 
 PY_EXTENSIONS = ".py", ".pyw"
 

@@ -0,0 +1,5 @@
+---
+category: minorAnalysis
+---
+
+- The Python extractor now extracts files in hidden directories by default. If you would like to skip files in hidden directories, add `paths-ignore: ["**/.*/**"]` to your [Code Scanning config](https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning#specifying-directories-to-scan). If you would like to skip all hidden files, you can use `paths-ignore: ["**/.*"]`. When using the CodeQL CLI for extraction, specify the configuration (creating the configuration file if necessary) using the `--codescanning-config` option.
@@ -1,3 +1,5 @@
+| .hidden/inner/test.py |
+| .hidden/module.py |
 | folder/module.py |
 | package |
 | package/__init__.py |

@@ -3,3 +3,4 @@
 | Module foo.bar |
 | Module foo.include_test |
 | Package foo |
+| Script hidden_foo.py |
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		import python

		select any(File f).getShortName() as name order by name