Skip to content

Commit 8ac10e9

Browse files
PoojanSmartpre-commit-ci[bot]
authored andcommitted
Add automatic differentiation algorithm (TheAlgorithms#10977)
* Added automatic differentiation algorithm * file name changed * Resolved pre commit errors * updated dependency * added noqa for ignoring check * adding typing_extension for adding Self type in __new__ * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * sorted requirement.text dependency * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * resolved ruff --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent a09c5bd commit 8ac10e9

File tree

2 files changed

+329
-1
lines changed

2 files changed

+329
-1
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,327 @@
1+
"""
2+
Demonstration of the Automatic Differentiation (Reverse mode).
3+
4+
Reference: https://en.wikipedia.org/wiki/Automatic_differentiation
5+
6+
Author: Poojan Smart
7+
Email: smrtpoojan@gmail.com
8+
"""
9+
from __future__ import annotations
10+
11+
from collections import defaultdict
12+
from enum import Enum
13+
from types import TracebackType
14+
from typing import Any
15+
16+
import numpy as np
17+
from typing_extensions import Self # noqa: UP035
18+
19+
20+
class OpType(Enum):
21+
"""
22+
Class represents list of supported operations on Variable for gradient calculation.
23+
"""
24+
25+
ADD = 0
26+
SUB = 1
27+
MUL = 2
28+
DIV = 3
29+
MATMUL = 4
30+
POWER = 5
31+
NOOP = 6
32+
33+
34+
class Variable:
35+
"""
36+
Class represents n-dimensional object which is used to wrap numpy array on which
37+
operations will be performed and the gradient will be calculated.
38+
39+
Examples:
40+
>>> Variable(5.0)
41+
Variable(5.0)
42+
>>> Variable([5.0, 2.9])
43+
Variable([5. 2.9])
44+
>>> Variable([5.0, 2.9]) + Variable([1.0, 5.5])
45+
Variable([6. 8.4])
46+
>>> Variable([[8.0, 10.0]])
47+
Variable([[ 8. 10.]])
48+
"""
49+
50+
def __init__(self, value: Any) -> None:
51+
self.value = np.array(value)
52+
53+
# pointers to the operations to which the Variable is input
54+
self.param_to: list[Operation] = []
55+
# pointer to the operation of which the Variable is output of
56+
self.result_of: Operation = Operation(OpType.NOOP)
57+
58+
def __repr__(self) -> str:
59+
return f"Variable({self.value})"
60+
61+
def to_ndarray(self) -> np.ndarray:
62+
return self.value
63+
64+
def __add__(self, other: Variable) -> Variable:
65+
result = Variable(self.value + other.value)
66+
67+
with GradientTracker() as tracker:
68+
# if tracker is enabled, computation graph will be updated
69+
if tracker.enabled:
70+
tracker.append(OpType.ADD, params=[self, other], output=result)
71+
return result
72+
73+
def __sub__(self, other: Variable) -> Variable:
74+
result = Variable(self.value - other.value)
75+
76+
with GradientTracker() as tracker:
77+
# if tracker is enabled, computation graph will be updated
78+
if tracker.enabled:
79+
tracker.append(OpType.SUB, params=[self, other], output=result)
80+
return result
81+
82+
def __mul__(self, other: Variable) -> Variable:
83+
result = Variable(self.value * other.value)
84+
85+
with GradientTracker() as tracker:
86+
# if tracker is enabled, computation graph will be updated
87+
if tracker.enabled:
88+
tracker.append(OpType.MUL, params=[self, other], output=result)
89+
return result
90+
91+
def __truediv__(self, other: Variable) -> Variable:
92+
result = Variable(self.value / other.value)
93+
94+
with GradientTracker() as tracker:
95+
# if tracker is enabled, computation graph will be updated
96+
if tracker.enabled:
97+
tracker.append(OpType.DIV, params=[self, other], output=result)
98+
return result
99+
100+
def __matmul__(self, other: Variable) -> Variable:
101+
result = Variable(self.value @ other.value)
102+
103+
with GradientTracker() as tracker:
104+
# if tracker is enabled, computation graph will be updated
105+
if tracker.enabled:
106+
tracker.append(OpType.MATMUL, params=[self, other], output=result)
107+
return result
108+
109+
def __pow__(self, power: int) -> Variable:
110+
result = Variable(self.value**power)
111+
112+
with GradientTracker() as tracker:
113+
# if tracker is enabled, computation graph will be updated
114+
if tracker.enabled:
115+
tracker.append(
116+
OpType.POWER,
117+
params=[self],
118+
output=result,
119+
other_params={"power": power},
120+
)
121+
return result
122+
123+
def add_param_to(self, param_to: Operation) -> None:
124+
self.param_to.append(param_to)
125+
126+
def add_result_of(self, result_of: Operation) -> None:
127+
self.result_of = result_of
128+
129+
130+
class Operation:
131+
"""
132+
Class represents operation between single or two Variable objects.
133+
Operation objects contains type of operation, pointers to input Variable
134+
objects and pointer to resulting Variable from the operation.
135+
"""
136+
137+
def __init__(
138+
self,
139+
op_type: OpType,
140+
other_params: dict | None = None,
141+
) -> None:
142+
self.op_type = op_type
143+
self.other_params = {} if other_params is None else other_params
144+
145+
def add_params(self, params: list[Variable]) -> None:
146+
self.params = params
147+
148+
def add_output(self, output: Variable) -> None:
149+
self.output = output
150+
151+
def __eq__(self, value) -> bool:
152+
return self.op_type == value if isinstance(value, OpType) else False
153+
154+
155+
class GradientTracker:
156+
"""
157+
Class contains methods to compute partial derivatives of Variable
158+
based on the computation graph.
159+
160+
Examples:
161+
162+
>>> with GradientTracker() as tracker:
163+
... a = Variable([2.0, 5.0])
164+
... b = Variable([1.0, 2.0])
165+
... m = Variable([1.0, 2.0])
166+
... c = a + b
167+
... d = a * b
168+
... e = c / d
169+
>>> tracker.gradient(e, a)
170+
array([-0.25, -0.04])
171+
>>> tracker.gradient(e, b)
172+
array([-1. , -0.25])
173+
>>> tracker.gradient(e, m) is None
174+
True
175+
176+
>>> with GradientTracker() as tracker:
177+
... a = Variable([[2.0, 5.0]])
178+
... b = Variable([[1.0], [2.0]])
179+
... c = a @ b
180+
>>> tracker.gradient(c, a)
181+
array([[1., 2.]])
182+
>>> tracker.gradient(c, b)
183+
array([[2.],
184+
[5.]])
185+
186+
>>> with GradientTracker() as tracker:
187+
... a = Variable([[2.0, 5.0]])
188+
... b = a ** 3
189+
>>> tracker.gradient(b, a)
190+
array([[12., 75.]])
191+
"""
192+
193+
instance = None
194+
195+
def __new__(cls) -> Self:
196+
"""
197+
Executes at the creation of class object and returns if
198+
object is already created. This class follows singleton
199+
design pattern.
200+
"""
201+
if cls.instance is None:
202+
cls.instance = super().__new__(cls)
203+
return cls.instance
204+
205+
def __init__(self) -> None:
206+
self.enabled = False
207+
208+
def __enter__(self) -> Self:
209+
self.enabled = True
210+
return self
211+
212+
def __exit__(
213+
self,
214+
exc_type: type[BaseException] | None,
215+
exc: BaseException | None,
216+
traceback: TracebackType | None,
217+
) -> None:
218+
self.enabled = False
219+
220+
def append(
221+
self,
222+
op_type: OpType,
223+
params: list[Variable],
224+
output: Variable,
225+
other_params: dict | None = None,
226+
) -> None:
227+
"""
228+
Adds Operation object to the related Variable objects for
229+
creating computational graph for calculating gradients.
230+
231+
Args:
232+
op_type: Operation type
233+
params: Input parameters to the operation
234+
output: Output variable of the operation
235+
"""
236+
operation = Operation(op_type, other_params=other_params)
237+
param_nodes = []
238+
for param in params:
239+
param.add_param_to(operation)
240+
param_nodes.append(param)
241+
output.add_result_of(operation)
242+
243+
operation.add_params(param_nodes)
244+
operation.add_output(output)
245+
246+
def gradient(self, target: Variable, source: Variable) -> np.ndarray | None:
247+
"""
248+
Reverse accumulation of partial derivatives to calculate gradients
249+
of target variable with respect to source variable.
250+
251+
Args:
252+
target: target variable for which gradients are calculated.
253+
source: source variable with respect to which the gradients are
254+
calculated.
255+
256+
Returns:
257+
Gradient of the source variable with respect to the target variable
258+
"""
259+
260+
# partial derivatives with respect to target
261+
partial_deriv = defaultdict(lambda: 0)
262+
partial_deriv[target] = np.ones_like(target.to_ndarray())
263+
264+
# iterating through each operations in the computation graph
265+
operation_queue = [target.result_of]
266+
while len(operation_queue) > 0:
267+
operation = operation_queue.pop()
268+
for param in operation.params:
269+
# as per the chain rule, multiplying partial derivatives
270+
# of variables with respect to the target
271+
dparam_doutput = self.derivative(param, operation)
272+
dparam_dtarget = dparam_doutput * partial_deriv[operation.output]
273+
partial_deriv[param] += dparam_dtarget
274+
275+
if param.result_of and param.result_of != OpType.NOOP:
276+
operation_queue.append(param.result_of)
277+
278+
return partial_deriv.get(source)
279+
280+
def derivative(self, param: Variable, operation: Operation) -> np.ndarray:
281+
"""
282+
Compute the derivative of given operation/function
283+
284+
Args:
285+
param: variable to be differentiated
286+
operation: function performed on the input variable
287+
288+
Returns:
289+
Derivative of input variable with respect to the output of
290+
the operation
291+
"""
292+
params = operation.params
293+
294+
if operation == OpType.ADD:
295+
return np.ones_like(params[0].to_ndarray(), dtype=np.float64)
296+
if operation == OpType.SUB:
297+
if params[0] == param:
298+
return np.ones_like(params[0].to_ndarray(), dtype=np.float64)
299+
return -np.ones_like(params[1].to_ndarray(), dtype=np.float64)
300+
if operation == OpType.MUL:
301+
return (
302+
params[1].to_ndarray().T
303+
if params[0] == param
304+
else params[0].to_ndarray().T
305+
)
306+
if operation == OpType.DIV:
307+
if params[0] == param:
308+
return 1 / params[1].to_ndarray()
309+
return -params[0].to_ndarray() / (params[1].to_ndarray() ** 2)
310+
if operation == OpType.MATMUL:
311+
return (
312+
params[1].to_ndarray().T
313+
if params[0] == param
314+
else params[0].to_ndarray().T
315+
)
316+
if operation == OpType.POWER:
317+
power = operation.other_params["power"]
318+
return power * (params[0].to_ndarray() ** (power - 1))
319+
320+
err_msg = f"invalid operation type: {operation.op_type}"
321+
raise ValueError(err_msg)
322+
323+
324+
if __name__ == "__main__":
325+
import doctest
326+
327+
doctest.testmod()

requirements.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,6 @@ statsmodels
1919
sympy
2020
tensorflow ; python_version < '3.12'
2121
tweepy
22-
xgboost
2322
# yulewalker # uncomment once audio_filters/equal_loudness_filter.py is fixed
23+
typing_extensions
24+
xgboost

0 commit comments

Comments
 (0)