-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
222 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,7 +13,6 @@ dist | |
download_release.sh | ||
|
||
src/ai3/_version.py | ||
papers/JOSS | ||
|
||
papers/JOSS/jats | ||
papers/IEEE_HPEC_24/results |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
import torch | ||
import torchvision | ||
import ai3 | ||
|
||
def conv2d_selector(orig, input_shape) -> str: | ||
out_channels = orig.weight.shape[0] | ||
if (out_channels < 50 and | ||
input_shape[1] < 50 and | ||
input_shape[1] > 150 and | ||
input_shape[2] > 150): | ||
return 'direct' | ||
return 'smm' | ||
|
||
input_shape = (1, 3, 224, 224) | ||
input_data = torch.randn(input_shape) | ||
vgg16 = torchvision.models.vgg16( | ||
weights=torchvision.models.VGG16_Weights.DEFAULT) | ||
vgg16.eval() | ||
with torch.inference_mode(): | ||
orig_out = vgg16(input_data) | ||
model: ai3.Model = ai3.convert( | ||
vgg16, {'conv2d': conv2d_selector, | ||
'maxpool2d': 'default'}, | ||
input_shape) | ||
sb_out = model(input_data) | ||
ai3.swap_operation(torch.nn.Conv2d, vgg16, ['direct', 'smm'] * 8, input_shape) | ||
sc_out = vgg16(input_data) | ||
assert torch.allclose( | ||
orig_out, sb_out, atol=1e-4) | ||
assert torch.allclose( | ||
orig_out, sc_out, atol=1e-4) |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
@misc{initial_proposal, | ||
title = {A Framework to Enable Algorithmic Design Choice Exploration in DNNs | ||
}, | ||
author = {Timothy L. {Cronin IV} and Sanmukh Kuppannagari}, | ||
year = {2024}, | ||
eprint = {2410.08300}, | ||
archivePrefix = {arXiv}, | ||
primaryClass = {cs.LG}, | ||
url = {https://arxiv.org/abs/2410.08300}, | ||
} | ||
|
||
@inproceedings{smm, | ||
author = {Ofir, Amir and Ben-Artzi, Gil}, | ||
booktitle = {2022 IEEE/CVF Conference on Computer Vision and Pattern | ||
Recognition Workshops (CVPRW)}, | ||
title = {SMM-Conv: Scalar Matrix Multiplication with Zero Packing for | ||
Accelerated Convolution}, | ||
year = {2022}, | ||
volume = {}, | ||
number = {}, | ||
pages = {3066-3074}, | ||
keywords = {Deep learning;Computer vision;Conferences;Memory | ||
management;Network architecture;Pattern recognition;Kernel}, | ||
doi = {10.1109/CVPRW56347.2022.00346}, | ||
} | ||
|
||
@inproceedings{pytorch2, | ||
author = {Ansel, Jason and Yang, Edward and He, Horace and Gimelshein, | ||
Natalia and Jain, Animesh and Voznesensky, Michael and Bao, Bin and | ||
Bell, Peter and Berard, David and Burovski, Evgeni and Chauhan, | ||
Geeta and Chourdia, Anjali and Constable, Will and Desmaison, Alban | ||
and DeVito, Zachary and Ellison, Elias and Feng, Will and Gong, | ||
Jiong and Gschwind, Michael and Hirsh, Brian and Huang, Sherlock | ||
and Kalambarkar, Kshiteej and Kirsch, Laurent and Lazos, Michael | ||
and Lezcano, Mario and Liang, Yanbo and Liang, Jason and Lu, | ||
Yinghai and Luk, CK and Maher, Bert and Pan, Yunjie and Puhrsch, | ||
Christian and Reso, Matthias and Saroufim, Mark and Siraichi, | ||
Marcos Yukio and Suk, Helen and Suo, Michael and Tillet, Phil and | ||
Wang, Eikan and Wang, Xiaodong and Wen, William and Zhang, Shunting | ||
and Zhao, Xu and Zhou, Keren and Zou, Richard and Mathews, Ajit and | ||
Chanan, Gregory and Wu, Peng and Chintala, Soumith}, | ||
booktitle = {29th ACM International Conference on Architectural Support for | ||
Programming Languages and Operating Systems, Volume 2 (ASPLOS | ||
'24)}, | ||
doi = {10.1145/3620665.3640366}, | ||
month = apr, | ||
publisher = {ACM}, | ||
title = {{P}y{T}orch 2: Faster Machine Learning Through Dynamic {P}ython | ||
Bytecode Transformation and Graph Compilation}, | ||
url = {https://pytorch.org/assets/pytorch2-2.pdf}, | ||
year = {2024}, | ||
} | ||
|
||
@software{torchvision, | ||
title = {TorchVision: PyTorch's Computer Vision library}, | ||
author = {TorchVision maintainers and contributors}, | ||
year = 2016, | ||
journal = {GitHub repository}, | ||
publisher = {GitHub}, | ||
howpublished = {\url{https://github.com/pytorch/vision}}, | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
--- | ||
title: '*ai3:* A Framework Enabling Algorithmic Selection in Deep Neural Networks' | ||
tags: | ||
- Python | ||
- artificial intelligence | ||
- machine learning | ||
- algorithms | ||
- acceleration | ||
authors: | ||
- name: Timothy L. {Cronin IV} | ||
affiliation: "1" | ||
- name: Sanmukh R. Kuppannagari | ||
affiliation: "1" | ||
affiliations: | ||
- index: 1 | ||
name: Department of Computer and Data Sciences, Case Western Reserve University | ||
bibliography: paper.bib | ||
header-includes: | ||
- | | ||
```{=latex} | ||
\usepackage{listings} | ||
\definecolor{codegreen}{rgb}{0,0.6,0} | ||
\definecolor{codegray}{rgb}{0.5,0.5,0.5} | ||
\definecolor{codepurple}{rgb}{0.58,0,0.82} | ||
\definecolor{backcolor}{rgb}{0.95,0.95,0.92} | ||
\usepackage{listings} | ||
\usepackage{caption} | ||
\renewcommand{\lstlistingname}{Code Sample} | ||
\lstdefinestyle{mystyle}{ | ||
language=Python, | ||
backgroundcolor=\color{backcolor}, | ||
caption=\relax, | ||
commentstyle=\color{codegreen}, | ||
keywordstyle=\color{magenta}, | ||
stringstyle=\color{codepurple}, | ||
basicstyle=\ttfamily\small, | ||
breakatwhitespace=false, | ||
breaklines=true, | ||
captionpos=b, | ||
keepspaces=true, | ||
showspaces=false, | ||
showstringspaces=false, | ||
showtabs=false, | ||
tabsize=2 | ||
} | ||
\lstset{style=mystyle} | ||
``` | ||
--- | ||
|
||
# Summary | ||
|
||
*ai3*, initially proposed in [@initial_proposal], is an open source framework | ||
providing fine grain algorithmic control over an existing deep neural network (*DNN*) in both its | ||
training and inference stage. *ai3* provides high performance, accelerated | ||
*C++* implementations of various algorithms to complete common deep learning | ||
operations. Additionally, *ai3*, enables users to implement their own *C++* | ||
implementations which are included in the package upon installation. Both the | ||
user defined and builtin implementations can be selected to complete the | ||
operations making up a *DNN*. Selection and altering of the *DNN* is done via | ||
an easy to use *Python* interface \ref{lst:example}. *ai3*, is an abbreviation | ||
of algorithmic innovations for accelerated implementations of artificial | ||
intelligence, reflecting the accelerated implementations provided by and the | ||
potential for algorithmic innovations when using the framework. | ||
|
||
# Statement of Need | ||
|
||
*DNNs* have demonstrated significant success across many domains. This success | ||
has been accompanied by substantial improvements to the algorithms used to | ||
perform the operations forming the *DNN*. However, the most efficient algorithm | ||
for any given operation of a *DNN* depends on a high number of factors such as | ||
the use case of the *DNN*, input size, operation hyper-parameters and hardware | ||
available. Due to this, discovering the best performing algorithm is a | ||
difficult and time consuming task. Additionally, altering the algorithms in use | ||
by a *DNN* is a difficult task requiring requiring algorithm development and | ||
hardware mapping expertise. This makes such exploration and its benefit of | ||
improved performance inaccessible to a significant portion of users who come | ||
from scientific and engineering domains but may not be experts in computer | ||
science. | ||
|
||
# Framework Overview | ||
|
||
The framework provides two functions to perform algorithmic selection and | ||
swapping. One function creates a new equivalent *DNN* with all operations from the | ||
original *DNN* converted to the frameworks implementations of the selected | ||
algorithms. The second function swaps, in place, a specific operation out of | ||
the existing *DNN* for the frameworks implementation of the selected algorithm. | ||
The function which swaps a specific operation attempts to make the new | ||
operation integrate well with the *DNNs* original framework. For example, after | ||
swapping the operations within a *DNN* built with *PyTorch* [@pytorch2], the | ||
*DNN* can still be trained and compiled for execution in *PyTorch's* graph | ||
mode. Both of these functions are illustrated in figure \ref{fig:overview}. | ||
|
||
![Possible Paths of a *DNN* Through *ai3*\label{fig:overview}](./framework_overview.png) | ||
|
||
Both of these functions have an optional parameter for the algorithmic | ||
selector, if none is passed then a default algorithm is selected by the | ||
framework. The function converting every operation receives a mapping from | ||
operation type to the selector and the function swapping one operation receives | ||
a single algorithmic selector for the operation being swapped. There are | ||
multiple types of algorithmic selectors. Possible options include a single | ||
algorithm which is used for all instances of the operation or a list of | ||
algorithms where each operation uses the algorithm with the same index as that | ||
operation has relative to other operations of the same type. Another option is a | ||
function which returns the algorithm to use. The function is called by the | ||
framework with the original operation as the parameter, the function will also | ||
be passed the input shape if an input shape is provided to the function | ||
providing the algorithmic selection. | ||
|
||
# Code Example | ||
|
||
This example demonstrates use of both of functions on a *VGG-16* model provided | ||
by the *torchvision* [@torchvision] package. In the call to `convert`, a mapping | ||
is passed from operation to algorithmic selector. When performing the swaps | ||
for convolution, the operation and the shape of the input to that operation are | ||
passed to a function which analyzes those parameters and returns the algorithm | ||
to use. For all other operations the algorithm used is the default provided by | ||
the framework. In the `swap_operation` call, the `torch.nn.Conv2d` type is passed to | ||
swap out convolutional layers, a list of $16$ elements alternating between ``direct`` | ||
and ``SMM`` [@smm] is passed meaning the $16$ convolution layers alternate between | ||
the ``direct`` and ``SMM`` algorithms to perform the convolutions. | ||
|
||
\lstinputlisting[label={lst:example}, caption={Use of Both Functions}]{example.py} | ||
|
||
# Acknowledgements | ||
|
||
This work is sponsored by the *U.S.* National Science Foundation under award | ||
numbers $2117439$, $2411447$, and $2425535$. | ||
|
||
# References |