add JOSS source

KLab-AI3 · Oct 20, 2024 · b772d22 · b772d22
1 parent d9c00e5
commit b772d22
Show file tree

Hide file tree

Showing 5 changed files with 222 additions and 1 deletion.
diff --git a/.gitignore b/.gitignore
@@ -13,7 +13,6 @@ dist
 download_release.sh
 
 src/ai3/_version.py
-papers/JOSS
 
 papers/JOSS/jats
 papers/IEEE_HPEC_24/results
diff --git a/papers/JOSS/example.py b/papers/JOSS/example.py
@@ -0,0 +1,31 @@
+import torch
+import torchvision
+import ai3
+
+def conv2d_selector(orig, input_shape) -> str:
+    out_channels = orig.weight.shape[0]
+    if (out_channels < 50 and
+        input_shape[1] < 50 and
+        input_shape[1] > 150 and
+            input_shape[2] > 150):
+        return 'direct'
+    return 'smm'
+
+input_shape = (1, 3, 224, 224)
+input_data = torch.randn(input_shape)
+vgg16 = torchvision.models.vgg16(
+    weights=torchvision.models.VGG16_Weights.DEFAULT)
+vgg16.eval()
+with torch.inference_mode():
+    orig_out = vgg16(input_data)
+    model: ai3.Model = ai3.convert(
+        vgg16, {'conv2d': conv2d_selector,
+                'maxpool2d': 'default'},
+        input_shape)
+    sb_out = model(input_data)
+    ai3.swap_operation(torch.nn.Conv2d, vgg16, ['direct', 'smm'] * 8, input_shape)
+    sc_out = vgg16(input_data)
+    assert torch.allclose(
+        orig_out, sb_out, atol=1e-4)
+    assert torch.allclose(
+        orig_out, sc_out, atol=1e-4)
diff --git a/papers/JOSS/framework_overview.png b/papers/JOSS/framework_overview.png
diff --git a/papers/JOSS/paper.bib b/papers/JOSS/paper.bib
@@ -0,0 +1,61 @@
+@misc{initial_proposal,
+    title = {A Framework to Enable Algorithmic Design Choice Exploration in DNNs
+             },
+    author = {Timothy L. {Cronin IV} and Sanmukh Kuppannagari},
+    year = {2024},
+    eprint = {2410.08300},
+    archivePrefix = {arXiv},
+    primaryClass = {cs.LG},
+    url = {https://arxiv.org/abs/2410.08300},
+}
+
+@inproceedings{smm,
+    author = {Ofir, Amir and Ben-Artzi, Gil},
+    booktitle = {2022 IEEE/CVF Conference on Computer Vision and Pattern
+                 Recognition Workshops (CVPRW)},
+    title = {SMM-Conv: Scalar Matrix Multiplication with Zero Packing for
+             Accelerated Convolution},
+    year = {2022},
+    volume = {},
+    number = {},
+    pages = {3066-3074},
+    keywords = {Deep learning;Computer vision;Conferences;Memory
+                management;Network architecture;Pattern recognition;Kernel},
+    doi = {10.1109/CVPRW56347.2022.00346},
+}
+
+@inproceedings{pytorch2,
+    author = {Ansel, Jason and Yang, Edward and He, Horace and Gimelshein,
+              Natalia and Jain, Animesh and Voznesensky, Michael and Bao, Bin and
+              Bell, Peter and Berard, David and Burovski, Evgeni and Chauhan,
+              Geeta and Chourdia, Anjali and Constable, Will and Desmaison, Alban
+              and DeVito, Zachary and Ellison, Elias and Feng, Will and Gong,
+              Jiong and Gschwind, Michael and Hirsh, Brian and Huang, Sherlock
+              and Kalambarkar, Kshiteej and Kirsch, Laurent and Lazos, Michael
+              and Lezcano, Mario and Liang, Yanbo and Liang, Jason and Lu,
+              Yinghai and Luk, CK and Maher, Bert and Pan, Yunjie and Puhrsch,
+              Christian and Reso, Matthias and Saroufim, Mark and Siraichi,
+              Marcos Yukio and Suk, Helen and Suo, Michael and Tillet, Phil and
+              Wang, Eikan and Wang, Xiaodong and Wen, William and Zhang, Shunting
+              and Zhao, Xu and Zhou, Keren and Zou, Richard and Mathews, Ajit and
+              Chanan, Gregory and Wu, Peng and Chintala, Soumith},
+    booktitle = {29th ACM International Conference on Architectural Support for
+                 Programming Languages and Operating Systems, Volume 2 (ASPLOS
+                 '24)},
+    doi = {10.1145/3620665.3640366},
+    month = apr,
+    publisher = {ACM},
+    title = {{P}y{T}orch 2: Faster Machine Learning Through Dynamic {P}ython
+             Bytecode Transformation and Graph Compilation},
+    url = {https://pytorch.org/assets/pytorch2-2.pdf},
+    year = {2024},
+}
+
+@software{torchvision,
+    title = {TorchVision: PyTorch's Computer Vision library},
+    author = {TorchVision maintainers and contributors},
+    year = 2016,
+    journal = {GitHub repository},
+    publisher = {GitHub},
+    howpublished = {\url{https://github.com/pytorch/vision}},
+}
diff --git a/papers/JOSS/paper.md b/papers/JOSS/paper.md
@@ -0,0 +1,130 @@
+---
+title: '*ai3:* A Framework Enabling Algorithmic Selection in Deep Neural Networks'
+tags:
+  - Python
+  - artificial intelligence
+  - machine learning
+  - algorithms
+  - acceleration
+authors:
+  - name: Timothy L. {Cronin IV}
+    affiliation: "1"
+  - name: Sanmukh R. Kuppannagari
+    affiliation: "1"
+affiliations:
+  - index: 1
+    name: Department of Computer and Data Sciences, Case Western Reserve University
+bibliography: paper.bib
+header-includes:
+- |
+  ```{=latex}
+  \usepackage{listings}
+  \definecolor{codegreen}{rgb}{0,0.6,0}
+  \definecolor{codegray}{rgb}{0.5,0.5,0.5}
+  \definecolor{codepurple}{rgb}{0.58,0,0.82}
+  \definecolor{backcolor}{rgb}{0.95,0.95,0.92}
+
+  \usepackage{listings}
+  \usepackage{caption}
+  \renewcommand{\lstlistingname}{Code Sample}
+  \lstdefinestyle{mystyle}{
+      language=Python,
+      backgroundcolor=\color{backcolor},
+      caption=\relax,
+      commentstyle=\color{codegreen},
+      keywordstyle=\color{magenta},
+      stringstyle=\color{codepurple},
+      basicstyle=\ttfamily\small,
+      breakatwhitespace=false,
+      breaklines=true,
+      captionpos=b,
+      keepspaces=true,
+      showspaces=false,
+      showstringspaces=false,
+      showtabs=false,
+      tabsize=2
+  }
+  \lstset{style=mystyle}
+  ```
+---
+
+# Summary
+
+*ai3*, initially proposed in [@initial_proposal], is an open source framework
+providing fine grain algorithmic control over an existing deep neural network (*DNN*) in both its
+training and inference stage. *ai3* provides high performance, accelerated
+*C++* implementations of various algorithms to complete common deep learning
+operations. Additionally, *ai3*, enables users to implement their own *C++*
+implementations which are included in the package upon installation. Both the
+user defined and builtin implementations can be selected to complete the
+operations making up a *DNN*. Selection and altering of the *DNN* is done via
+an easy to use *Python* interface \ref{lst:example}. *ai3*, is an abbreviation
+of algorithmic innovations for accelerated implementations of artificial
+intelligence, reflecting the accelerated implementations provided by and the
+potential for algorithmic innovations when using the framework.
+
+# Statement of Need
+
+*DNNs* have demonstrated significant success across many domains. This success
+has been accompanied by substantial improvements to the algorithms used to
+perform the operations forming the *DNN*. However, the most efficient algorithm
+for any given operation of a *DNN* depends on a high number of factors such as
+the use case of the *DNN*, input size, operation hyper-parameters and hardware
+available. Due to this, discovering the best performing algorithm is a
+difficult and time consuming task. Additionally, altering the algorithms in use
+by a *DNN* is a difficult task requiring requiring algorithm development and
+hardware mapping expertise. This makes such exploration and its benefit of
+improved performance inaccessible to a significant portion of users who come
+from scientific and engineering domains but may not be experts in computer
+science.
+
+# Framework Overview
+
+The framework provides two functions to perform algorithmic selection and
+swapping. One function creates a new equivalent *DNN* with all operations from the
+original *DNN* converted to the frameworks implementations of the selected
+algorithms. The second function swaps, in place, a specific operation out of
+the existing *DNN* for the frameworks implementation of the selected algorithm.
+The function which swaps a specific operation attempts to make the new
+operation integrate well with the *DNNs* original framework. For example, after
+swapping the operations within a *DNN* built with *PyTorch* [@pytorch2], the
+*DNN* can still be trained and compiled for execution in *PyTorch's* graph
+mode. Both of these functions are illustrated in figure \ref{fig:overview}.
+
+![Possible Paths of a *DNN* Through *ai3*\label{fig:overview}](./framework_overview.png)
+
+Both of these functions have an optional parameter for the algorithmic
+selector, if none is passed then a default algorithm is selected by the
+framework. The function converting every operation receives a mapping from
+operation type to the selector and the function swapping one operation receives
+a single algorithmic selector for the operation being swapped. There are
+multiple types of algorithmic selectors. Possible options include a single
+algorithm which is used for all instances of the operation or a list of
+algorithms where each operation uses the algorithm with the same index as that
+operation has relative to other operations of the same type. Another option is a
+function which returns the algorithm to use. The function is called by the
+framework with the original operation as the parameter, the function will also
+be passed the input shape if an input shape is provided to the function
+providing the algorithmic selection.
+
+# Code Example
+
+This example demonstrates use of both of functions on a *VGG-16* model provided
+by the *torchvision* [@torchvision] package. In the call to `convert`, a mapping
+is passed from operation to algorithmic selector. When performing the swaps
+for convolution, the operation and the shape of the input to that operation are
+passed to a function which analyzes those parameters and returns the algorithm
+to use. For all other operations the algorithm used is the default provided by
+the framework. In the `swap_operation` call, the `torch.nn.Conv2d` type is passed to
+swap out convolutional layers, a list of $16$ elements alternating between ``direct``
+and ``SMM`` [@smm] is passed meaning the $16$ convolution layers alternate between
+the ``direct`` and ``SMM`` algorithms to perform the convolutions.
+
+\lstinputlisting[label={lst:example}, caption={Use of Both Functions}]{example.py}
+
+# Acknowledgements
+
+This work is sponsored by the *U.S.* National Science Foundation under award
+numbers $2117439$, $2411447$, and $2425535$.
+
+# References