Skip to content
This repository has been archived by the owner on Mar 10, 2024. It is now read-only.

Code for doing symbol extraction #65

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions libcflib/symbol_inspection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import jedi

import os

from tqdm import tqdm


def file_path_to_import(file_path: str):
return file_path.replace("/__init__.py", "").replace(".py", "").replace("/", ".")


def get_all_symbol_names(top_dir):
# Note Jedi seems to pick up things that are protected by a
# __name__ == '__main__' if statement
# this could cause some over-reporting of viable imports this
# shouldn't cause issues with an audit since we don't expect 3rd parties
# to depend on those
symbols_dict = {}
module_import = top_dir.split("/")[-1]
# walk all the files looking for python files
for root, dirs, files in tqdm(os.walk(top_dir)):
_files = [f for f in files if f.endswith(".py")]
for file in _files:
Copy link
Contributor

@mariusvniekerk mariusvniekerk Nov 30, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you might only want to walk _files if there is a __init__.py (unless its one of those non directory packages)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, those non-directory pkgs has burned me in the past.

file_name = os.path.join(root, file)
import_name = file_path_to_import(
"".join(file_name.rpartition(module_import)[1:])
)
data = jedi.Script(path=file_name).complete()
symbols_from_script = {
k.full_name: k.type
for k in data
if k.full_name and module_import + "." in k.full_name
}

# cull statements within functions and classes, which are not importable
classes_and_functions = {
k for k, v in symbols_from_script.items() if v in ["class", "function"]
}
for k in list(symbols_from_script):
for cf in classes_and_functions:
if k != cf and k.startswith(cf) and k in symbols_from_script:
symbols_from_script.pop(k)

symbols_dict[import_name] = set(symbols_from_script)

symbols = set()
# handle star imports, which don't usually get added but are valid symbols
for k, v in symbols_dict.items():
symbols.update(v)
symbols.update({f"{k}.{vv.rsplit('.', 1)[-1]}" for vv in v})
return symbols
10 changes: 10 additions & 0 deletions tests/test_symbol_inspection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from pathlib import Path

from libcflib.symbol_inspection import get_all_symbol_names


def test_get_all_symbol_names():
top_dir = Path(__file__).parent / Path("..") / Path("libcflib")
assert "libcflib.symbol_inspection.get_all_symbol_names" in get_all_symbol_names(
str(top_dir)
)