增加总代价函数cost，非正规运算符threshold和accuracy，完成了浅层神经网络逻辑回归

kbdsbx · Jun 25, 2023 · 4542c9d · 4542c9d
1 parent 578628d
commit 4542c9d
Show file tree

Hide file tree

Showing 11 changed files with 276 additions and 2 deletions.
diff --git a/README.md b/README.md
@@ -41,6 +41,7 @@
     * 基本一元运算
         * 次方（用其他方式代替）
         * 张量求和sum（完成）
+        * 总代价cost（完成）
         * ...
     * 复合一元操作
         * relu（完成）
@@ -467,4 +468,81 @@ g.setUpdateFunc('W1', lambda z, dz : z - rate * dz)
 7. 规范化nous语言格式
 8. 实现nous语言解释器，能够将nous代码解释为计算图
 9. 自定义操作符
-10. 自定义初始化类
+10. 自定义初始化类
+
+# 示例
+
+### 浅层神经网络
+
+感谢吴恩达深度学习课程课后实现提供的数据集，全部代码见 `\test\test_logistic_regression.py`
+
+浅层神经网络是一种简单的神经网络，只有一层参数层以及一层激活层，使用交叉熵函数计算代价
+
+训练阶段：
+
+```python
+# 迭代次数
+num_iterations = 1000
+# 学习率
+rate = 0.005
+
+# 每个图片样本有为64 * 64 * 3 = 12288个参数，所以W被初始化为(1, 12288)
+# 一共有209个样本
+# 偏置参数b被初始化为0，所有样本共有一个偏置
+# 使用交叉熵函数与标签集Y计算各个样本代价，并由cost函数计算所有样本总代价
+# 最终代价存储在终止符J中
+g = nous('''
+W:zeros(1, 12288) matmul X:(12288, 209) add b:0 -> sigmoid as temp -> cross_entropy Y -> cost -> J:$$
+''').parse()
+
+# 给样本X赋值
+g.setData('X', train_set_x)
+# 给标签Y赋值
+g.setData('Y', train_set_y_orig)
+# 更新参数的方法，w := w- rate * dj/dw；b := b - rate * dj/db
+g.setUpdateFunc('W', lambda w, dw : w - rate * dw)
+g.setUpdateFunc('b', lambda b, db : b - rate * db)
+
+# 循环计算n次
+for i in range(num_iterations) :
+
+    # 先执行前向传播
+    g.fprop()
+    # 再执行反向传播
+    g.bprop()
+    # 反复多次
+
+    if i % 100 == 0 :
+        print('Cost after iteration %i : %f' % (i, g.getData('J') ) )
+```
+
+验证阶段：
+
+验证阶段可以新建一个计算图，并把之前计算图中的参数传递给新的计算图进行计算，也可以在老的计算图上进行计算
+
+```python
+# 新建一个计算图
+# 此计算图只为了计算神经网络准确率，故而使用了非正规操作threshold和accuracy，这两个操作都不实现反向传播
+# threshold阈值操作一般跟在sigmoid后边，大于阈值为1，小于阈值为0
+# accuracy计算yhat与y之间数据相同的比例，目前只能计算逻辑回归函数的值，有待进一步完善
+g1 = nous('''
+W matmul X add b -> sigmoid -> threshold 0.5 -> accuracy Y -> J:$$
+''').parse()
+
+# 将图g中学习好的参数传入新的计算图g1
+g1.setData('W', g.getData('W'))
+g1.setData('b', g.getData('b'))
+
+# 计算训练精准率
+g1.setData('X', train_set_x)
+g1.setData('Y', train_set_y_orig)
+g1.fprop()
+print('train accuracy: %s' %(g1.getData('J')))
+
+# 计算测试精准率
+g1.setData('X', test_set_x)
+g1.setData('Y', test_set_y_orig)
+g1.fprop()
+print('test accuracy: %s' %(g1.getData('J')))
+
+```
diff --git a/erud/nous.py b/erud/nous.py
@@ -14,11 +14,15 @@
 from erud.opts.softmax import softmax
 from erud.opts.tanh import tanh
 from erud.opts.cross_entropy import cross_entropy
+from erud.opts.cost import cost
 from erud.tensor.var import var
 from erud.tensor.rest import rest
 import numpy as np
 import re
 
+from erud.opts_extend.accuracy import accuracy
+from erud.opts_extend.threshold import threshold
+
 # 解析代码，构造计算图
 class nous :
     # 可用的操作符
@@ -36,6 +40,10 @@ class nous :
         'softmax' : softmax,
         'tanh' : tanh,
         'cross_entropy' : cross_entropy,
+        'cost' : cost,
+
+        'accuracy' : accuracy,
+        'threshold' : threshold,
     }
 
     # 所有语句关键词

diff --git a/erud/opts/cost.py b/erud/opts/cost.py
@@ -0,0 +1,25 @@
+from erud.cg.payload import payload
+import numpy as np
+
+# 总代价函数
+class cost (payload) :
+    __x = any
+
+    def fprop(self, x) -> float :
+        r = x
+        if isinstance(x, np.ndarray) :
+            r = 1.0 / np.size(x) * np.sum(x)
+
+        self.__x = x
+
+        return r
+
+    def bprop(self, dz) -> list[any] :
+        _x = self.__x
+
+        dx = 1
+        if isinstance(_x, np.ndarray) :
+            dx = np.ones_like(_x) / np.size(_x) * dz
+
+        return [dx]
+
diff --git a/erud/opts_extend/__init__.py b/erud/opts_extend/__init__.py
@@ -0,0 +1 @@
+# 这里有一些参与计算但并不是那么标准的操作符
diff --git a/erud/opts_extend/accuracy.py b/erud/opts_extend/accuracy.py
@@ -0,0 +1,13 @@
+from erud.cg.payload import payload
+import numpy as np
+
+# 精准度
+class accuracy (payload) :
+    __yhat : any = None
+    __y : any = None
+
+    def fprop(self, yhat, y) -> any :
+        return 100 - np.mean(np.abs(yhat - y) * 100)
+
+    def bprop(self, dz) -> list[any] :
+        return [np.zeros_like(self.__yhat), np.zeros_like(self.__y)]
diff --git a/erud/opts_extend/threshold.py b/erud/opts_extend/threshold.py
@@ -0,0 +1,12 @@
+from erud.cg.payload import payload
+import numpy as np
+
+class threshold (payload) :
+    __x : any
+
+    def fprop(self, x, thresholds) -> any :
+        self.__x = x
+        return x > thresholds
+
+    def bprop(self, dz) -> list[any] :
+        return [np.zeros_like(self.__x), 0]
diff --git a/erud/tensor/var.py b/erud/tensor/var.py
@@ -38,7 +38,9 @@ def fprop(self) -> any:
     def bprop(self, dz = None) -> list[any]:
         # 反向传播更新参数
         if dz is not None and self.__update_func is not None :
-            self.__data = self.__update_func( self.__data, dz )
+            res = self.__update_func( self.__data, dz )
+            if res is not None :
+                self.__data = res
 
         if isinstance(self.__data, np.ndarray) :
             return [np.zeros_like(self.__data)]

diff --git a/test/datasets/test_catvnoncat.h5 b/test/datasets/test_catvnoncat.h5
diff --git a/test/datasets/train_catvnoncat.h5 b/test/datasets/train_catvnoncat.h5
diff --git a/test/test_logistic_regression.py b/test/test_logistic_regression.py
@@ -0,0 +1,82 @@
+import numpy as np
+import h5py
+from erud.nous import nous
+
+def load_dataset():
+    path = __file__[:__file__.rfind('\\')]
+    print(path)
+
+
+    train_dataset = h5py.File(path + '/datasets/train_catvnoncat.h5', "r")
+    train_set_x_orig = np.array(train_dataset["train_set_x"][:]) # your train set features
+    train_set_y_orig = np.array(train_dataset["train_set_y"][:]) # your train set labels
+
+    test_dataset = h5py.File(path + '/datasets/test_catvnoncat.h5', "r")
+    test_set_x_orig = np.array(test_dataset["test_set_x"][:]) # your test set features
+    test_set_y_orig = np.array(test_dataset["test_set_y"][:]) # your test set labels
+
+    classes = np.array(test_dataset["list_classes"][:]) # the list of classes
+
+    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
+    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
+
+    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes
+
+def test_logistic_regression () :
+    train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes = load_dataset()
+
+    assert train_set_x_orig.shape == (209, 64, 64, 3)
+    assert train_set_y_orig.shape == (1, 209)
+    assert test_set_x_orig.shape == (50, 64, 64, 3)
+    assert test_set_y_orig.shape == (1, 50)
+
+    # 中心化、向量化
+    train_set_x = train_set_x_orig.reshape((209, 64 * 64 * 3)).T / 255
+    test_set_x = test_set_x_orig.reshape((50, 64 * 64 * 3)).T / 255
+
+    assert train_set_x.shape == (12288, 209)
+    assert test_set_x.shape == (12288, 50)
+
+    # 训练
+    num_iterations = 1000
+    rate = 0.005
+
+    g = nous('''
+    W:zeros(1, 12288) matmul X add b:0 -> sigmoid as temp -> cross_entropy Y -> cost -> J:$$
+    ''').parse()
+
+    g.setData('X', train_set_x)
+    g.setData('Y', train_set_y_orig)
+    g.setUpdateFunc('W', lambda z, dz : z - rate * dz)
+    g.setUpdateFunc('b', lambda z, dz : z - rate * dz)
+
+    for i in range(num_iterations) :
+
+        g.fprop()
+        g.bprop()
+
+        if i % 100 == 0 :
+            print('Cost after iteration %i : %f' % (i, g.getData('J') ) )
+
+
+    # 测试
+
+    g1 = nous('''
+    W matmul X add b -> sigmoid -> threshold 0.5 -> accuracy Y -> J:$$
+    ''').parse()
+
+    g1.setData('X', train_set_x)
+    g1.setData('Y', train_set_y_orig)
+    g1.setData('W', g.getData('W'))
+    g1.setData('b', g.getData('b'))
+
+    g1.fprop()
+
+    print('train accuracy: %s' %(g1.getData('J')))
+
+    g1.setData('X', test_set_x)
+    g1.setData('Y', test_set_y_orig)
+
+    g1.fprop()
+
+    print('test accuracy: %s' %(g1.getData('J')))
diff --git a/test/test_opts.py b/test/test_opts.py
@@ -317,5 +317,58 @@ def test_softmax() :
     [dz, _] = softmax_opt.bprop(da)
 
     assert np.all(dz == np.array([-0.2416897266247951,  0.23762678570983872, 0.004062940914956294]))
+
+from erud.opts.cost import cost
+from erud.nous import nous
+
+# cost
+def test_cost() :
+    w = np.array([[1], [2]])
+    b = 2
+    X = np.array([[1,2], [3,4]])
+    Y = np.array([[1,0]])
+    sigmoid_opt = sigmoid()
+    cross_entropy_opt = cross_entropy()
+    cost_opt = cost()
+
+    A = sigmoid_opt.fprop(np.dot(w.T, X) + b)
+    j = cross_entropy_opt.fprop(A, Y)
+    c = cost_opt.fprop(j)
+
+    assert c == 6.000064773192205
+
+    # ↑
+    # 上下等价
+    # ↓
+
+    g = nous(
+        """
+        W:[[1, 2]] matmul X:[[1, 2], [3, 4]] add b:2 ->
+        sigmoid ->
+        cross_entropy Y:[[1,0]] ->
+        cost ->
+        j:$$
+        """
+    ).parse()
+
+    # 前向传播
+
+    g.fprop()
+    c = g.getData('j')
+    assert c == 6.000064773192205
+
+    # 反向传播计算梯度
+
+    def update_w_func (w, dw) :
+        assert np.all(dw == np.array([[0.9999321585374046, 1.999802619786816 ]]))
+    def update_b_func (b, db) :
+        assert db == 0.4999352306247057
+
+    # 给W和B设置更新参数，但此处不更新而是断言dw和db的值是否正确
+    g.setUpdateFunc('W', update_w_func )
+    g.setUpdateFunc('b', update_b_func )
+
+    g.bprop()
+
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		# 这里有一些参与计算但并不是那么标准的操作符