Skip to content

Commit

Permalink
Add Python API (#11)
Browse files Browse the repository at this point in the history
* Add run function for Python API

* Fix docstrings

* Add test for std_lib.py module

* Update README to include Python API

* Bump version
  • Loading branch information
zzhengnan authored Jun 13, 2021
1 parent 96f9e26 commit ad675dd
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 40 deletions.
52 changes: 30 additions & 22 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,21 @@

Ever wondered which dependencies your Python project relies on?

`iscan` gives you a clear view of all the third-party packages imported by your project, along with modules in the standard library.
`iscan` gives you a clear view of all the third-party packages and standard library modules imported by your project

- [Example](#Example)
- [Installation](#Installation)
- [Dependencies](#Dependencies)
- [Usage](#Usage)
- [1. Quick start](#1.-Quick-start)
- [2. Installation](#2.-Installation)
- [3. Dependencies](#3.-Dependencies)
- [4. Usage](#4.-Usage)
- [4.1 Command line interface](#4.1-Command-line-interface)
- [4.2 Python API](#4.2-Python-API)

## Example
Running `iscan` on a local clone of the popular HTTP library [requests](https://github.com/psf/requests/tree/v2.25.1) gives the following results -- these are all the third-party packages and standard library modules `requests` relies on.
## 1. Quick start
Simply provide the path to your project. That's it!

Here's an example of running `iscan` on a local clone of the popular HTTP library [requests](https://github.com/psf/requests/tree/v2.25.1). These are all the third-party packages and standard library modules `requests` relies on.
```
$ iscan ./requests/ # From the top level of the requests repo
$ iscan ./requests/ # Executed at the top level of the requests repo
Packages imported across all Python files in directory "./requests/"
Third-party packages:
Expand All @@ -42,17 +46,20 @@ Standard library modules:
...
```

## Installation
## 2. Installation
`iscan` can be installed with either conda or pip.
```
$ conda install iscan -c conda-forge
$ python -m pip install iscan
```

## Dependencies
## 3. Dependencies
`iscan` is light-weight and doesn't rely on anything outside the standard library. The core functionality relies on the [ast](https://docs.python.org/3/library/ast.html#module-ast) module.

## Usage
## 4. Usage
`iscan` provides both a command line interface and a Python API.

### 4.1 Command line interface
Basic usage requires simply providing the path to the directory you wish to scan.

```
Expand All @@ -79,18 +86,19 @@ Third-party packages:
- urllib3
```

The complete help message is shown below.
The complete help message can be accessed as follows.
```
$ iscan --help
usage: iscan [-h] [-x DIR_TO_EXCLUDE] [--ignore-std-lib] DIR_TO_SCAN
Look for packages imported across all Python files in a given directory.
positional arguments:
DIR_TO_SCAN target directory to scan
```

optional arguments:
-h, --help show this help message and exit
-x DIR_TO_EXCLUDE directory to exclude during scanning
--ignore-std-lib whether to omit standard library modules
### 4.2 Python API
The Python API exposes a `run` function that returns the scanning result as a dictionary, split between third-party packages and standard library modules.
```python
>>> from iscan import run
>>> dir_to_scan = './requests'
>>> dir_to_exclude = './tests' # Use None to not exclude anything (default)
>>> result = run(dir_to_scan, dir_to_exclude)
>>> result
{'third_party': ['OpenSSL', 'certifi', 'chardet', 'cryptography', 'idna', 'simplejson', 'urllib3'],
'std_lib': ['Cookie', 'StringIO', '__future__', '_winreg', 'base64', 'calendar', 'codecs', ...]}
```
5 changes: 4 additions & 1 deletion iscan/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
__version__ = '0.4.0'
from iscan.scan import run


__version__ = '0.4.1'
41 changes: 28 additions & 13 deletions iscan/scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import argparse
import ast
import os
from typing import Iterable, List
from typing import Dict, Iterable, List, Optional

from iscan.std_lib import separate_third_party_from_std_lib

Expand Down Expand Up @@ -66,14 +66,15 @@ def convert_source_to_tree(fpath: str) -> ast.Module:
return tree


def scan_directory(dir_to_scan: str, dir_to_exclude: str) -> List[str]:
def scan_directory(dir_to_scan: str, dir_to_exclude: Optional[str] = None) -> List[str]:
"""Extract packages imported across all Python files in a directory.
Args:
dir_to_scan: Path to the directory of interest
dir_to_exclude: Path to the directory to be excluded during scanning
Returns:
List of packages imported; might contain duplicates
Imported packages; might contain duplicates
"""
all_imports = []
for root_dir, _, fnames in os.walk(top=dir_to_scan):
Expand Down Expand Up @@ -115,22 +116,23 @@ def get_unique_base_packages(packages: Iterable[str]) -> List[str]:
"""Remove duplicates and extract the base package names.
Args:
packages: List of package names that might contain duplicates
packages: Package names that might contain duplicates
Returns:
List of unique base package names
Unique base package names
"""
return sorted(set(map(get_base_name, packages)))


def show_result(third_party: Iterable[str], std_lib: Iterable[str], ignore_std_lib: bool) -> None:
def show_result(result: Dict[str, List[str]], ignore_std_lib: bool) -> None:
"""Print the result of running iscan.
Args:
third_party: List of third-party packages
std_lib: List of standard library modules
result: Imported third-party packages and standard library modules
ignore_std_lib: Whether to omit standard library modules in the output
"""
third_party, std_lib = result['third_party'], result['std_lib']

print('\nThird-party packages:\n - ', end='')
print('\n - '.join(third_party))

Expand All @@ -139,6 +141,22 @@ def show_result(third_party: Iterable[str], std_lib: Iterable[str], ignore_std_l
print('\n - '.join(std_lib))


def run(dir_to_scan: str, dir_to_exclude: Optional[str] = None) -> Dict[str, List[str]]:
"""Run iscan for a given set of parameters.
Args:
dir_to_scan: Path to the directory of interest
dir_to_exclude: Path to the directory to be excluded during scanning
Returns:
Third-party packages and standard library modules
"""
all_imports = scan_directory(dir_to_scan, dir_to_exclude)
unique_imports = get_unique_base_packages(all_imports)
third_party, std_lib = separate_third_party_from_std_lib(unique_imports)
return dict(third_party=third_party, std_lib=std_lib)


def cli() -> argparse.Namespace:
"""Command line interface."""
parser = argparse.ArgumentParser(
Expand Down Expand Up @@ -168,14 +186,11 @@ def cli() -> argparse.Namespace:

def main() -> None:
args = cli()

all_imports = scan_directory(args.DIR_TO_SCAN, args.DIR_TO_EXCLUDE)
unique_imports = get_unique_base_packages(all_imports)
third_party, std_lib = separate_third_party_from_std_lib(unique_imports)
result = run(args.DIR_TO_SCAN, args.DIR_TO_EXCLUDE)

print(
f'Packages imported across all Python files in directory "{args.DIR_TO_SCAN}"',
end=f', excluding "{args.DIR_TO_EXCLUDE}"\n' if args.DIR_TO_EXCLUDE else '\n'
)

show_result(third_party, std_lib, args.IGNORE_STD_LIB)
show_result(result, args.IGNORE_STD_LIB)
6 changes: 3 additions & 3 deletions iscan/std_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def separate_third_party_from_std_lib(packages: Iterable[str]) -> Tuple[List[str
"""Separate third-party packages from standard library modules.
Args:
packages: List of package names
packages: Package names
Returns:
Third-party packages, standard library modules
Expand All @@ -70,10 +70,10 @@ def get_std_lib(version: str) -> List[str]:
"""Scrape modules in the standard library for a given Python version.
Args:
url: List of package names
version: Python version
Returns:
Third-party packages, standard library modules
Standard library modules for a given Python version
"""
import requests # type: ignore
from bs4 import BeautifulSoup # type: ignore
Expand Down
17 changes: 16 additions & 1 deletion iscan/tests/test_scan.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pytest

from iscan.scan import (ImportScanner, convert_source_to_tree, get_base_name,
get_unique_base_packages, scan_directory)
get_unique_base_packages, run, scan_directory)


CURRENT_DIR = abspath(dirname(__file__))
Expand Down Expand Up @@ -49,3 +49,18 @@ def test_get_base_name(full_name, expected):
])
def test_get_unique_base_packages(packages, expected):
assert get_unique_base_packages(packages) == expected


@pytest.mark.parametrize('dir_to_exclude, expected', [
(None, {
'third_party': ['matplotlib', 'numpy', 'pandas'],
'std_lib': ['ctypes', 'datetime', 'os', 'shutil', 'time']
}),
(join(CURRENT_DIR, 'test_package', 'city'), {
'third_party': ['matplotlib', 'numpy', 'pandas'],
'std_lib': ['os', 'shutil', 'time']
})
])
def test_run(dir_to_exclude, expected):
dir_to_scan = join(CURRENT_DIR, 'test_package')
assert run(dir_to_scan, dir_to_exclude) == expected
7 changes: 7 additions & 0 deletions iscan/tests/test_std_lib.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from iscan.std_lib import separate_third_party_from_std_lib


def test_separate_third_party_from_std_lib():
packages = ['numpy', 'pandas', 'os', 'time']
expected = ['numpy', 'pandas'], ['os', 'time']
assert separate_third_party_from_std_lib(packages) == expected

0 comments on commit ad675dd

Please sign in to comment.