From 579e99f652cdec7eace577952cfabf34405a912e Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Sat, 14 Oct 2023 20:27:18 +0330 Subject: [PATCH 01/92] Adding new implementation Adding my python implementation of Gaussian Elimination pivoting as a numerical linear algebra algorithm --- .../src/GaussianEliminationpivoting.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 linear_algebra/src/GaussianEliminationpivoting.py diff --git a/linear_algebra/src/GaussianEliminationpivoting.py b/linear_algebra/src/GaussianEliminationpivoting.py new file mode 100644 index 000000000000..71779fd5b41e --- /dev/null +++ b/linear_algebra/src/GaussianEliminationpivoting.py @@ -0,0 +1,27 @@ +import numpy as np + +def pivoting(a,n,i): + max_index=i + for index in range(i+1,n): + if abs(a[index][i])>abs(a[max_index][i]): + max_index=index + return max_index + +def gauss_elimination_pivoting(a,b,n): + x=[] + for i in range(n-1): + new_index=pivoting(a,n,i) + a[i],a[new_index]=a[new_index],a[i] + b[i],b[new_index]=b[new_index],b[i] + pivot=a[i][i] + for j in range(i+1,n): + m=-1*a[j][i]/pivot + for k in range(0,n): + a[j][k]+=m*a[i][k] + b[j]+=m*b[i] + + for p in range(n-1,-1,-1): + x.append(b[p]/a[p][p]) + for q in range(p-1,-1,-1): + b[q]=b[q]-x[n-p-1]*a[q][p] + return x From ad230e5dd430ecd84cdd48b5c4cb98a37726fbbe Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Sat, 14 Oct 2023 20:37:59 +0330 Subject: [PATCH 02/92] Delete linear_algebra/src/GaussianEliminationpivoting.py --- .../src/GaussianEliminationpivoting.py | 27 ------------------- 1 file changed, 27 deletions(-) delete mode 100644 linear_algebra/src/GaussianEliminationpivoting.py diff --git a/linear_algebra/src/GaussianEliminationpivoting.py b/linear_algebra/src/GaussianEliminationpivoting.py deleted file mode 100644 index 71779fd5b41e..000000000000 --- a/linear_algebra/src/GaussianEliminationpivoting.py +++ /dev/null @@ -1,27 +0,0 @@ -import numpy as np - -def pivoting(a,n,i): - max_index=i - for index in range(i+1,n): - if abs(a[index][i])>abs(a[max_index][i]): - max_index=index - return max_index - -def gauss_elimination_pivoting(a,b,n): - x=[] - for i in range(n-1): - new_index=pivoting(a,n,i) - a[i],a[new_index]=a[new_index],a[i] - b[i],b[new_index]=b[new_index],b[i] - pivot=a[i][i] - for j in range(i+1,n): - m=-1*a[j][i]/pivot - for k in range(0,n): - a[j][k]+=m*a[i][k] - b[j]+=m*b[i] - - for p in range(n-1,-1,-1): - x.append(b[p]/a[p][p]) - for q in range(p-1,-1,-1): - b[q]=b[q]-x[n-p-1]*a[q][p] - return x From dc9fb8acd4428ebb53f467cfd2e37155e9ec9993 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Sat, 14 Oct 2023 20:39:20 +0330 Subject: [PATCH 03/92] Adding new implementation Adding my python implementation of Gaussian Elimination pivoting as a numerical linear algebra algorithm --- .../src/gaussianeliminationpivoting.py | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 linear_algebra/src/gaussianeliminationpivoting.py diff --git a/linear_algebra/src/gaussianeliminationpivoting.py b/linear_algebra/src/gaussianeliminationpivoting.py new file mode 100644 index 000000000000..3ccfd047de3b --- /dev/null +++ b/linear_algebra/src/gaussianeliminationpivoting.py @@ -0,0 +1,35 @@ +import numpy as np + +def custom_pivoting(a, n, i): + min_index = i + for index in range(i + 1, n): + if abs(a[index][i]) < abs(a[min_index][i]): + min_index = index + return min_index + +def custom_gauss_elimination_pivoting(a, b, n): + result = [] + for i in range(n - 1): + new_index = custom_pivoting(a, n, i) + a[i], a[new_index] = a[new_index], a[i] + b[i], b[new_index] = b[new_index], b[i] + pivot = a[i][i] + for j in range(i + 1, n): + m = -1 * a[j][i] / pivot + for k in range(0, n): + a[j][k] += m * a[i][k] + b[j] += m * b[i] + + for p in range(n - 1, -1, -1): + result.append(b[p] / a[p][p]) + for q in range(p - 1, -1, -1): + b[q] = b[q] - result[n - p - 1] * a[q][p] + return result + +# Example usage: +# n_size = 3 +# a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) +# b_vector = np.array([10, 11, 12], dtype=float) + +# solution = custom_gauss_elimination_pivoting(a_matrix, b_vector, n_size) +# print("Solution:", solution) From 44ca32dc94119746b67d5d46b59e9c673239b7b5 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Sat, 14 Oct 2023 20:45:43 +0330 Subject: [PATCH 04/92] Delete linear_algebra/src/gaussianeliminationpivoting.py --- .../src/gaussianeliminationpivoting.py | 35 ------------------- 1 file changed, 35 deletions(-) delete mode 100644 linear_algebra/src/gaussianeliminationpivoting.py diff --git a/linear_algebra/src/gaussianeliminationpivoting.py b/linear_algebra/src/gaussianeliminationpivoting.py deleted file mode 100644 index 3ccfd047de3b..000000000000 --- a/linear_algebra/src/gaussianeliminationpivoting.py +++ /dev/null @@ -1,35 +0,0 @@ -import numpy as np - -def custom_pivoting(a, n, i): - min_index = i - for index in range(i + 1, n): - if abs(a[index][i]) < abs(a[min_index][i]): - min_index = index - return min_index - -def custom_gauss_elimination_pivoting(a, b, n): - result = [] - for i in range(n - 1): - new_index = custom_pivoting(a, n, i) - a[i], a[new_index] = a[new_index], a[i] - b[i], b[new_index] = b[new_index], b[i] - pivot = a[i][i] - for j in range(i + 1, n): - m = -1 * a[j][i] / pivot - for k in range(0, n): - a[j][k] += m * a[i][k] - b[j] += m * b[i] - - for p in range(n - 1, -1, -1): - result.append(b[p] / a[p][p]) - for q in range(p - 1, -1, -1): - b[q] = b[q] - result[n - p - 1] * a[q][p] - return result - -# Example usage: -# n_size = 3 -# a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) -# b_vector = np.array([10, 11, 12], dtype=float) - -# solution = custom_gauss_elimination_pivoting(a_matrix, b_vector, n_size) -# print("Solution:", solution) From 9c48e4bdd982cc279e7a9003aa5fefe5aa9f4f01 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Sat, 14 Oct 2023 20:47:02 +0330 Subject: [PATCH 05/92] Adding new implementation Adding my python implementation of Gaussian Elimination pivoting as a numerical linear algebra algorithm for the third time because the last two times had conflict with the rules in PR --- .../src/gaussianeliminationpivoting.py | 39 +++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 linear_algebra/src/gaussianeliminationpivoting.py diff --git a/linear_algebra/src/gaussianeliminationpivoting.py b/linear_algebra/src/gaussianeliminationpivoting.py new file mode 100644 index 000000000000..4a5609a2d9d4 --- /dev/null +++ b/linear_algebra/src/gaussianeliminationpivoting.py @@ -0,0 +1,39 @@ +import numpy as np + +def custom_pivoting(a, n, i): + min_index = i + for index in range(i + 1, n): + if abs(a[index][i]) < abs(a[min_index][i]): + min_index = index + return min_index + +def custom_gauss_elimination_pivoting(a, b, n): + result = [] + for i in range(n - 1): + new_index = custom_pivoting(a, n, i) + a[i], a[new_index] = a[new_index], a[i] + b[i], b[new_index] = b[new_index], b[i] + pivot = a[i][i] + for j in range(i + 1, n): + m = -1 * a[j][i] / pivot + for k in range(0, n): + a[j][k] += m * a[i][k] + b[j] += m * b[i] + + for p in range(n - 1, -1, -1): + result.append(b[p] / a[p][p]) + for q in range(p - 1, -1, -1): + b[q] = b[q] - result[n - p - 1] * a[q][p] + return result + +# Example usage: +# n_size = 3 +# a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) +# b_vector = np.array([10, 11, 12], dtype=float) + +# solution = custom_gauss_elimination_pivoting(a_matrix, b_vector, n_size) +# print("Solution:", solution) + + +#URL that points to Wikipedia or another similar explanation. +#>>>>>>URL:https://courses.engr.illinois.edu/cs357/su2013/lectures/lecture07.pdf<<<<<# \ No newline at end of file From d9db2977afb8311f8e4e351e66eb40daa439ab78 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 14 Oct 2023 17:21:57 +0000 Subject: [PATCH 06/92] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- linear_algebra/src/gaussianeliminationpivoting.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/linear_algebra/src/gaussianeliminationpivoting.py b/linear_algebra/src/gaussianeliminationpivoting.py index 4a5609a2d9d4..242e3a1ec588 100644 --- a/linear_algebra/src/gaussianeliminationpivoting.py +++ b/linear_algebra/src/gaussianeliminationpivoting.py @@ -1,5 +1,6 @@ import numpy as np + def custom_pivoting(a, n, i): min_index = i for index in range(i + 1, n): @@ -7,6 +8,7 @@ def custom_pivoting(a, n, i): min_index = index return min_index + def custom_gauss_elimination_pivoting(a, b, n): result = [] for i in range(n - 1): @@ -26,6 +28,7 @@ def custom_gauss_elimination_pivoting(a, b, n): b[q] = b[q] - result[n - p - 1] * a[q][p] return result + # Example usage: # n_size = 3 # a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) @@ -35,5 +38,5 @@ def custom_gauss_elimination_pivoting(a, b, n): # print("Solution:", solution) -#URL that points to Wikipedia or another similar explanation. -#>>>>>>URL:https://courses.engr.illinois.edu/cs357/su2013/lectures/lecture07.pdf<<<<<# \ No newline at end of file +# URL that points to Wikipedia or another similar explanation. +# >>>>>>URL:https://courses.engr.illinois.edu/cs357/su2013/lectures/lecture07.pdf<<<<<# From 135405a7791ef40b33494ec92cd7060430fdf287 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Sat, 14 Oct 2023 21:05:51 +0330 Subject: [PATCH 07/92] Delete linear_algebra/src/gaussianeliminationpivoting.py --- .../src/gaussianeliminationpivoting.py | 42 ------------------- 1 file changed, 42 deletions(-) delete mode 100644 linear_algebra/src/gaussianeliminationpivoting.py diff --git a/linear_algebra/src/gaussianeliminationpivoting.py b/linear_algebra/src/gaussianeliminationpivoting.py deleted file mode 100644 index 242e3a1ec588..000000000000 --- a/linear_algebra/src/gaussianeliminationpivoting.py +++ /dev/null @@ -1,42 +0,0 @@ -import numpy as np - - -def custom_pivoting(a, n, i): - min_index = i - for index in range(i + 1, n): - if abs(a[index][i]) < abs(a[min_index][i]): - min_index = index - return min_index - - -def custom_gauss_elimination_pivoting(a, b, n): - result = [] - for i in range(n - 1): - new_index = custom_pivoting(a, n, i) - a[i], a[new_index] = a[new_index], a[i] - b[i], b[new_index] = b[new_index], b[i] - pivot = a[i][i] - for j in range(i + 1, n): - m = -1 * a[j][i] / pivot - for k in range(0, n): - a[j][k] += m * a[i][k] - b[j] += m * b[i] - - for p in range(n - 1, -1, -1): - result.append(b[p] / a[p][p]) - for q in range(p - 1, -1, -1): - b[q] = b[q] - result[n - p - 1] * a[q][p] - return result - - -# Example usage: -# n_size = 3 -# a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) -# b_vector = np.array([10, 11, 12], dtype=float) - -# solution = custom_gauss_elimination_pivoting(a_matrix, b_vector, n_size) -# print("Solution:", solution) - - -# URL that points to Wikipedia or another similar explanation. -# >>>>>>URL:https://courses.engr.illinois.edu/cs357/su2013/lectures/lecture07.pdf<<<<<# From 0eb31fca2dbea4ea27fc40b2214f6c6540e298ee Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Sat, 14 Oct 2023 21:06:48 +0330 Subject: [PATCH 08/92] Adding gaussianeliminationpivoting.py Adding my python implementation of Gaussian Elimination pivoting as a numerical linear algebra algorithm for the fourth time because the last three times had conflict with the rules in PR and bots --- .../src/gaussianeliminationpivoting.py | 75 +++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 linear_algebra/src/gaussianeliminationpivoting.py diff --git a/linear_algebra/src/gaussianeliminationpivoting.py b/linear_algebra/src/gaussianeliminationpivoting.py new file mode 100644 index 000000000000..3031bcfcb33d --- /dev/null +++ b/linear_algebra/src/gaussianeliminationpivoting.py @@ -0,0 +1,75 @@ +import numpy as np +from typing import List + +def custom_pivoting(a: np.ndarray, n: int, i: int) -> int: + """ + Selects the index of the minimum absolute value in the i-th column of a matrix. + + Parameters: + - a (np.ndarray): The input matrix. + - n (int): The size of the matrix. + - i (int): The column index. + + Returns: + - int: The index of the minimum absolute value in the i-th column. + + Example: + >>> a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) + >>> custom_pivoting(a_matrix, 3, 1) + 0 + """ + min_index = i + for index in range(i + 1, n): + if abs(a[index][i]) < abs(a[min_index][i]): + min_index = index + return min_index + + +def custom_gauss_elimination_pivoting(a: List[List[float]], b: List[float], n: int) -> List[float]: + """ + Solves a system of linear equations using Gaussian elimination with partial pivoting. + + Parameters: + - a (List[List[float]]): The coefficient matrix. + - b (List[float]): The constant vector. + - n (int): The size of the system. + + Returns: + - List[float]: The solution vector. + + Example: + >>> a_matrix = [[2, 3, 4], [1, -2, 3], [3, 4, 5]] + >>> b_vector = [20, 9, 11] + >>> custom_gauss_elimination_pivoting(a_matrix, b_vector, 3) + [1.0, 2.0, 3.0] + """ + result = [] + for i in range(n - 1): + new_index = custom_pivoting(a, n, i) + a[i], a[new_index] = a[new_index], a[i] + b[i], b[new_index] = b[new_index], b[i] + pivot = a[i][i] + for j in range(i + 1, n): + m = -1 * a[j][i] / pivot + for k in range(0, n): + a[j][k] += m * a[i][k] + b[j] += m * b[i] + + for p in range(n - 1, -1, -1): + result.append(b[p] / a[p][p]) + for q in range(p - 1, -1, -1): + b[q] = b[q] - result[n - p - 1] * a[q][p] + return result + + +# Example usage: +# n_size = 3 +# a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) +# b_vector = np.array([10, 11, 12], dtype=float) + +# solution = custom_gauss_elimination_pivoting(a_matrix, b_vector, n_size) +# print("Solution:", solution) + + +#URL that points to Wikipedia or another similar explanation. +#>>>>>>URL:https://courses.engr.illinois.edu/cs357/su2013/lectures/lecture07.pdf<<<<<# \ No newline at end of file From 56ceb6a3e07b89fbe698a5ba8c636b62565896e8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 14 Oct 2023 17:37:40 +0000 Subject: [PATCH 09/92] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- linear_algebra/src/gaussianeliminationpivoting.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/linear_algebra/src/gaussianeliminationpivoting.py b/linear_algebra/src/gaussianeliminationpivoting.py index 3031bcfcb33d..8040c8995228 100644 --- a/linear_algebra/src/gaussianeliminationpivoting.py +++ b/linear_algebra/src/gaussianeliminationpivoting.py @@ -1,6 +1,7 @@ import numpy as np from typing import List + def custom_pivoting(a: np.ndarray, n: int, i: int) -> int: """ Selects the index of the minimum absolute value in the i-th column of a matrix. @@ -25,7 +26,9 @@ def custom_pivoting(a: np.ndarray, n: int, i: int) -> int: return min_index -def custom_gauss_elimination_pivoting(a: List[List[float]], b: List[float], n: int) -> List[float]: +def custom_gauss_elimination_pivoting( + a: List[List[float]], b: List[float], n: int +) -> List[float]: """ Solves a system of linear equations using Gaussian elimination with partial pivoting. @@ -71,5 +74,5 @@ def custom_gauss_elimination_pivoting(a: List[List[float]], b: List[float], n: i # print("Solution:", solution) -#URL that points to Wikipedia or another similar explanation. -#>>>>>>URL:https://courses.engr.illinois.edu/cs357/su2013/lectures/lecture07.pdf<<<<<# \ No newline at end of file +# URL that points to Wikipedia or another similar explanation. +# >>>>>>URL:https://courses.engr.illinois.edu/cs357/su2013/lectures/lecture07.pdf<<<<<# From f270415285d0f074c3705dc7bdc67edc3927aec9 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Wed, 18 Oct 2023 23:25:52 +0330 Subject: [PATCH 10/92] Update gaussianeliminationpivoting.py --- .../src/gaussianeliminationpivoting.py | 31 +++++++++---------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/linear_algebra/src/gaussianeliminationpivoting.py b/linear_algebra/src/gaussianeliminationpivoting.py index 8040c8995228..b7945a4754f7 100644 --- a/linear_algebra/src/gaussianeliminationpivoting.py +++ b/linear_algebra/src/gaussianeliminationpivoting.py @@ -1,6 +1,4 @@ import numpy as np -from typing import List - def custom_pivoting(a: np.ndarray, n: int, i: int) -> int: """ @@ -15,35 +13,32 @@ def custom_pivoting(a: np.ndarray, n: int, i: int) -> int: - int: The index of the minimum absolute value in the i-th column. Example: - >>> a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) - >>> custom_pivoting(a_matrix, 3, 1) + >> a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) + >> custom_pivoting(a_matrix, 3, 1) 0 - """ + """ min_index = i for index in range(i + 1, n): if abs(a[index][i]) < abs(a[min_index][i]): min_index = index return min_index - -def custom_gauss_elimination_pivoting( - a: List[List[float]], b: List[float], n: int -) -> List[float]: +def custom_gauss_elimination_pivoting(a: list, b: list, n: int) -> list: """ Solves a system of linear equations using Gaussian elimination with partial pivoting. Parameters: - - a (List[List[float]]): The coefficient matrix. - - b (List[float]): The constant vector. + - a (list): The coefficient matrix. + - b (list): The constant vector. - n (int): The size of the system. Returns: - - List[float]: The solution vector. + - list: The solution vector. Example: - >>> a_matrix = [[2, 3, 4], [1, -2, 3], [3, 4, 5]] - >>> b_vector = [20, 9, 11] - >>> custom_gauss_elimination_pivoting(a_matrix, b_vector, 3) + >>a_matrix = [[2, 3, 4], [1, -2, 3], [3, 4, 5]] + >> b_vector = [20, 9, 11] + >> custom_gauss_elimination_pivoting(a_matrix, b_vector, 3) [1.0, 2.0, 3.0] """ result = [] @@ -65,6 +60,8 @@ def custom_gauss_elimination_pivoting( return result + + # Example usage: # n_size = 3 # a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) @@ -74,5 +71,5 @@ def custom_gauss_elimination_pivoting( # print("Solution:", solution) -# URL that points to Wikipedia or another similar explanation. -# >>>>>>URL:https://courses.engr.illinois.edu/cs357/su2013/lectures/lecture07.pdf<<<<<# +#URL that points to Wikipedia or another similar explanation. +#>>>>>>URL:https://courses.engr.illinois.edu/cs357/su2013/lectures/lecture07.pdf<<<<<# From 3d1e8aa7183ebb5246f28120bd997a9287f4b290 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 18 Oct 2023 19:58:53 +0000 Subject: [PATCH 11/92] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- linear_algebra/src/gaussianeliminationpivoting.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/linear_algebra/src/gaussianeliminationpivoting.py b/linear_algebra/src/gaussianeliminationpivoting.py index b7945a4754f7..a6956e3aa1ac 100644 --- a/linear_algebra/src/gaussianeliminationpivoting.py +++ b/linear_algebra/src/gaussianeliminationpivoting.py @@ -1,5 +1,6 @@ import numpy as np + def custom_pivoting(a: np.ndarray, n: int, i: int) -> int: """ Selects the index of the minimum absolute value in the i-th column of a matrix. @@ -16,13 +17,14 @@ def custom_pivoting(a: np.ndarray, n: int, i: int) -> int: >> a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) >> custom_pivoting(a_matrix, 3, 1) 0 - """ + """ min_index = i for index in range(i + 1, n): if abs(a[index][i]) < abs(a[min_index][i]): min_index = index return min_index + def custom_gauss_elimination_pivoting(a: list, b: list, n: int) -> list: """ Solves a system of linear equations using Gaussian elimination with partial pivoting. @@ -60,8 +62,6 @@ def custom_gauss_elimination_pivoting(a: list, b: list, n: int) -> list: return result - - # Example usage: # n_size = 3 # a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) @@ -71,5 +71,5 @@ def custom_gauss_elimination_pivoting(a: list, b: list, n: int) -> list: # print("Solution:", solution) -#URL that points to Wikipedia or another similar explanation. -#>>>>>>URL:https://courses.engr.illinois.edu/cs357/su2013/lectures/lecture07.pdf<<<<<# +# URL that points to Wikipedia or another similar explanation. +# >>>>>>URL:https://courses.engr.illinois.edu/cs357/su2013/lectures/lecture07.pdf<<<<<# From 6a0b6dd261ad2eaacffb7d373b27387c5edafc70 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Wed, 18 Oct 2023 23:34:06 +0330 Subject: [PATCH 12/92] Update gaussianeliminationpivoting.py --- .../src/gaussianeliminationpivoting.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/linear_algebra/src/gaussianeliminationpivoting.py b/linear_algebra/src/gaussianeliminationpivoting.py index a6956e3aa1ac..f51e5044bda5 100644 --- a/linear_algebra/src/gaussianeliminationpivoting.py +++ b/linear_algebra/src/gaussianeliminationpivoting.py @@ -1,9 +1,8 @@ import numpy as np - - def custom_pivoting(a: np.ndarray, n: int, i: int) -> int: """ - Selects the index of the minimum absolute value in the i-th column of a matrix. + Selects the index of the minimum absolute + value in the i-th column of a matrix. Parameters: - a (np.ndarray): The input matrix. @@ -24,10 +23,10 @@ def custom_pivoting(a: np.ndarray, n: int, i: int) -> int: min_index = index return min_index - def custom_gauss_elimination_pivoting(a: list, b: list, n: int) -> list: """ - Solves a system of linear equations using Gaussian elimination with partial pivoting. + Solves a system of linear equations using + Gaussian elimination with partial pivoting. Parameters: - a (list): The coefficient matrix. @@ -51,7 +50,7 @@ def custom_gauss_elimination_pivoting(a: list, b: list, n: int) -> list: pivot = a[i][i] for j in range(i + 1, n): m = -1 * a[j][i] / pivot - for k in range(0, n): + for k in range(n): a[j][k] += m * a[i][k] b[j] += m * b[i] @@ -62,6 +61,8 @@ def custom_gauss_elimination_pivoting(a: list, b: list, n: int) -> list: return result + + # Example usage: # n_size = 3 # a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) @@ -71,5 +72,5 @@ def custom_gauss_elimination_pivoting(a: list, b: list, n: int) -> list: # print("Solution:", solution) -# URL that points to Wikipedia or another similar explanation. -# >>>>>>URL:https://courses.engr.illinois.edu/cs357/su2013/lectures/lecture07.pdf<<<<<# +#URL that points to Wikipedia or another similar explanation. +#>>>>>>URL:https://courses.engr.illinois.edu/cs357/su2013/lectures/lecture07.pdf<<<<<# From 8753484b4f7ffb84939bedeeb0797ad8e2959365 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 18 Oct 2023 20:04:40 +0000 Subject: [PATCH 13/92] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- linear_algebra/src/gaussianeliminationpivoting.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/linear_algebra/src/gaussianeliminationpivoting.py b/linear_algebra/src/gaussianeliminationpivoting.py index f51e5044bda5..822f7c79d8c5 100644 --- a/linear_algebra/src/gaussianeliminationpivoting.py +++ b/linear_algebra/src/gaussianeliminationpivoting.py @@ -1,7 +1,9 @@ import numpy as np + + def custom_pivoting(a: np.ndarray, n: int, i: int) -> int: """ - Selects the index of the minimum absolute + Selects the index of the minimum absolute value in the i-th column of a matrix. Parameters: @@ -23,9 +25,10 @@ def custom_pivoting(a: np.ndarray, n: int, i: int) -> int: min_index = index return min_index + def custom_gauss_elimination_pivoting(a: list, b: list, n: int) -> list: """ - Solves a system of linear equations using + Solves a system of linear equations using Gaussian elimination with partial pivoting. Parameters: @@ -61,8 +64,6 @@ def custom_gauss_elimination_pivoting(a: list, b: list, n: int) -> list: return result - - # Example usage: # n_size = 3 # a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) @@ -72,5 +73,5 @@ def custom_gauss_elimination_pivoting(a: list, b: list, n: int) -> list: # print("Solution:", solution) -#URL that points to Wikipedia or another similar explanation. -#>>>>>>URL:https://courses.engr.illinois.edu/cs357/su2013/lectures/lecture07.pdf<<<<<# +# URL that points to Wikipedia or another similar explanation. +# >>>>>>URL:https://courses.engr.illinois.edu/cs357/su2013/lectures/lecture07.pdf<<<<<# From 0f62cf6157a7ace23015482e5df0bc114ac54800 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Thu, 26 Oct 2023 23:10:39 +0330 Subject: [PATCH 14/92] Update gaussianeliminationpivoting.py I changed a to matrix and coeff_matrix for better clarity --- .../src/gaussianeliminationpivoting.py | 52 ++++++++----------- 1 file changed, 23 insertions(+), 29 deletions(-) diff --git a/linear_algebra/src/gaussianeliminationpivoting.py b/linear_algebra/src/gaussianeliminationpivoting.py index 822f7c79d8c5..181654239348 100644 --- a/linear_algebra/src/gaussianeliminationpivoting.py +++ b/linear_algebra/src/gaussianeliminationpivoting.py @@ -1,13 +1,9 @@ -import numpy as np - - -def custom_pivoting(a: np.ndarray, n: int, i: int) -> int: +def custom_pivoting(matrix: np.ndarray, n: int, i: int) -> int: """ - Selects the index of the minimum absolute - value in the i-th column of a matrix. + Selects the index of the minimum absolute value in the i-th column of a matrix. Parameters: - - a (np.ndarray): The input matrix. + - matrix (np.ndarray): The input matrix. - n (int): The size of the matrix. - i (int): The column index. @@ -15,52 +11,50 @@ def custom_pivoting(a: np.ndarray, n: int, i: int) -> int: - int: The index of the minimum absolute value in the i-th column. Example: - >> a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) - >> custom_pivoting(a_matrix, 3, 1) + >>> a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) + >>> custom_pivoting(a_matrix, 3, 1) 0 - """ + """ min_index = i for index in range(i + 1, n): - if abs(a[index][i]) < abs(a[min_index][i]): + if abs(matrix[index][i]) < abs(matrix[min_index][i]): min_index = index return min_index - -def custom_gauss_elimination_pivoting(a: list, b: list, n: int) -> list: +def custom_gauss_elimination_pivoting(coeff_matrix: list, const_vector: list, n: int) -> list: """ - Solves a system of linear equations using - Gaussian elimination with partial pivoting. + Solves a system of linear equations using Gaussian elimination with partial pivoting. Parameters: - - a (list): The coefficient matrix. - - b (list): The constant vector. + - coeff_matrix (list): The coefficient matrix. + - const_vector (list): The constant vector. - n (int): The size of the system. Returns: - list: The solution vector. Example: - >>a_matrix = [[2, 3, 4], [1, -2, 3], [3, 4, 5]] - >> b_vector = [20, 9, 11] - >> custom_gauss_elimination_pivoting(a_matrix, b_vector, 3) + >>> a_matrix = [[2, 3, 4], [1, -2, 3], [3, 4, 5]] + >>> b_vector = [20, 9, 11] + >>> custom_gauss_elimination_pivoting(a_matrix, b_vector, 3) [1.0, 2.0, 3.0] """ result = [] for i in range(n - 1): - new_index = custom_pivoting(a, n, i) - a[i], a[new_index] = a[new_index], a[i] - b[i], b[new_index] = b[new_index], b[i] - pivot = a[i][i] + new_index = custom_pivoting(coeff_matrix, n, i) + coeff_matrix[i], coeff_matrix[new_index] = coeff_matrix[new_index], coeff_matrix[i] + const_vector[i], const_vector[new_index] = const_vector[new_index], const_vector[i] + pivot = coeff_matrix[i][i] for j in range(i + 1, n): - m = -1 * a[j][i] / pivot + m = -1 * coeff_matrix[j][i] / pivot for k in range(n): - a[j][k] += m * a[i][k] - b[j] += m * b[i] + coeff_matrix[j][k] += m * coeff_matrix[i][k] + const_vector[j] += m * const_vector[i] for p in range(n - 1, -1, -1): - result.append(b[p] / a[p][p]) + result.append(const_vector[p] / coeff_matrix[p][p]) for q in range(p - 1, -1, -1): - b[q] = b[q] - result[n - p - 1] * a[q][p] + const_vector[q] = const_vector[q] - result[n - p - 1] * coeff_matrix[q][p] return result From 579468b1001dcfdac64b753681f0a64dc968f729 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 26 Oct 2023 19:41:15 +0000 Subject: [PATCH 15/92] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../src/gaussianeliminationpivoting.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/linear_algebra/src/gaussianeliminationpivoting.py b/linear_algebra/src/gaussianeliminationpivoting.py index 181654239348..03f719e00cd5 100644 --- a/linear_algebra/src/gaussianeliminationpivoting.py +++ b/linear_algebra/src/gaussianeliminationpivoting.py @@ -14,14 +14,17 @@ def custom_pivoting(matrix: np.ndarray, n: int, i: int) -> int: >>> a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) >>> custom_pivoting(a_matrix, 3, 1) 0 - """ + """ min_index = i for index in range(i + 1, n): if abs(matrix[index][i]) < abs(matrix[min_index][i]): min_index = index return min_index -def custom_gauss_elimination_pivoting(coeff_matrix: list, const_vector: list, n: int) -> list: + +def custom_gauss_elimination_pivoting( + coeff_matrix: list, const_vector: list, n: int +) -> list: """ Solves a system of linear equations using Gaussian elimination with partial pivoting. @@ -42,8 +45,14 @@ def custom_gauss_elimination_pivoting(coeff_matrix: list, const_vector: list, n: result = [] for i in range(n - 1): new_index = custom_pivoting(coeff_matrix, n, i) - coeff_matrix[i], coeff_matrix[new_index] = coeff_matrix[new_index], coeff_matrix[i] - const_vector[i], const_vector[new_index] = const_vector[new_index], const_vector[i] + coeff_matrix[i], coeff_matrix[new_index] = ( + coeff_matrix[new_index], + coeff_matrix[i], + ) + const_vector[i], const_vector[new_index] = ( + const_vector[new_index], + const_vector[i], + ) pivot = coeff_matrix[i][i] for j in range(i + 1, n): m = -1 * coeff_matrix[j][i] / pivot From ec2b578fe4b5325e6013a9bab8472080473eb725 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Thu, 26 Oct 2023 23:20:21 +0330 Subject: [PATCH 16/92] Update gaussianeliminationpivoting.py --- linear_algebra/src/gaussianeliminationpivoting.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/linear_algebra/src/gaussianeliminationpivoting.py b/linear_algebra/src/gaussianeliminationpivoting.py index 03f719e00cd5..1400bf42275c 100644 --- a/linear_algebra/src/gaussianeliminationpivoting.py +++ b/linear_algebra/src/gaussianeliminationpivoting.py @@ -1,6 +1,8 @@ +import numpy as np def custom_pivoting(matrix: np.ndarray, n: int, i: int) -> int: """ - Selects the index of the minimum absolute value in the i-th column of a matrix. + Selects the index of the minimum absolute + value in the i-th column of a matrix. Parameters: - matrix (np.ndarray): The input matrix. @@ -8,10 +10,12 @@ def custom_pivoting(matrix: np.ndarray, n: int, i: int) -> int: - i (int): The column index. Returns: - - int: The index of the minimum absolute value in the i-th column. + - int: The index of the minimum absolute value in the + i-th column. Example: - >>> a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) + >>> a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], + dtype=float) >>> custom_pivoting(a_matrix, 3, 1) 0 """ @@ -26,7 +30,8 @@ def custom_gauss_elimination_pivoting( coeff_matrix: list, const_vector: list, n: int ) -> list: """ - Solves a system of linear equations using Gaussian elimination with partial pivoting. + Solves a system of linear equations + using Gaussian elimination with partial pivoting. Parameters: - coeff_matrix (list): The coefficient matrix. From 3c5344c96a79e4f1796d0d3335496c038804f460 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 26 Oct 2023 19:53:42 +0000 Subject: [PATCH 17/92] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- linear_algebra/src/gaussianeliminationpivoting.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/linear_algebra/src/gaussianeliminationpivoting.py b/linear_algebra/src/gaussianeliminationpivoting.py index 1400bf42275c..b95842edc611 100644 --- a/linear_algebra/src/gaussianeliminationpivoting.py +++ b/linear_algebra/src/gaussianeliminationpivoting.py @@ -1,7 +1,9 @@ import numpy as np + + def custom_pivoting(matrix: np.ndarray, n: int, i: int) -> int: """ - Selects the index of the minimum absolute + Selects the index of the minimum absolute value in the i-th column of a matrix. Parameters: @@ -10,11 +12,11 @@ def custom_pivoting(matrix: np.ndarray, n: int, i: int) -> int: - i (int): The column index. Returns: - - int: The index of the minimum absolute value in the + - int: The index of the minimum absolute value in the i-th column. Example: - >>> a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], + >>> a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) >>> custom_pivoting(a_matrix, 3, 1) 0 @@ -30,7 +32,7 @@ def custom_gauss_elimination_pivoting( coeff_matrix: list, const_vector: list, n: int ) -> list: """ - Solves a system of linear equations + Solves a system of linear equations using Gaussian elimination with partial pivoting. Parameters: From 4a85130c361f29b53b85a994ab26b7a1a9848ac1 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Thu, 26 Oct 2023 23:42:19 +0330 Subject: [PATCH 18/92] Update gaussianeliminationpivoting.py --- .../src/gaussianeliminationpivoting.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/linear_algebra/src/gaussianeliminationpivoting.py b/linear_algebra/src/gaussianeliminationpivoting.py index b95842edc611..ac58465c60e3 100644 --- a/linear_algebra/src/gaussianeliminationpivoting.py +++ b/linear_algebra/src/gaussianeliminationpivoting.py @@ -1,6 +1,4 @@ import numpy as np - - def custom_pivoting(matrix: np.ndarray, n: int, i: int) -> int: """ Selects the index of the minimum absolute @@ -26,8 +24,6 @@ def custom_pivoting(matrix: np.ndarray, n: int, i: int) -> int: if abs(matrix[index][i]) < abs(matrix[min_index][i]): min_index = index return min_index - - def custom_gauss_elimination_pivoting( coeff_matrix: list, const_vector: list, n: int ) -> list: @@ -52,21 +48,16 @@ def custom_gauss_elimination_pivoting( result = [] for i in range(n - 1): new_index = custom_pivoting(coeff_matrix, n, i) - coeff_matrix[i], coeff_matrix[new_index] = ( - coeff_matrix[new_index], - coeff_matrix[i], - ) - const_vector[i], const_vector[new_index] = ( - const_vector[new_index], - const_vector[i], - ) + coeff_matrix[i], coeff_matrix[new_index] = coeff_matrix[new_index], + coeff_matrix[i] + const_vector[i], const_vector[new_index] = const_vector[new_index], + const_vector[i] pivot = coeff_matrix[i][i] for j in range(i + 1, n): m = -1 * coeff_matrix[j][i] / pivot for k in range(n): coeff_matrix[j][k] += m * coeff_matrix[i][k] const_vector[j] += m * const_vector[i] - for p in range(n - 1, -1, -1): result.append(const_vector[p] / coeff_matrix[p][p]) for q in range(p - 1, -1, -1): From 19f0edfe23d89cbac0f5e06ebc4c882c4ca03f43 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 26 Oct 2023 20:13:25 +0000 Subject: [PATCH 19/92] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- linear_algebra/src/gaussianeliminationpivoting.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/linear_algebra/src/gaussianeliminationpivoting.py b/linear_algebra/src/gaussianeliminationpivoting.py index ac58465c60e3..2a0516e79a9b 100644 --- a/linear_algebra/src/gaussianeliminationpivoting.py +++ b/linear_algebra/src/gaussianeliminationpivoting.py @@ -1,4 +1,6 @@ import numpy as np + + def custom_pivoting(matrix: np.ndarray, n: int, i: int) -> int: """ Selects the index of the minimum absolute @@ -24,6 +26,8 @@ def custom_pivoting(matrix: np.ndarray, n: int, i: int) -> int: if abs(matrix[index][i]) < abs(matrix[min_index][i]): min_index = index return min_index + + def custom_gauss_elimination_pivoting( coeff_matrix: list, const_vector: list, n: int ) -> list: @@ -48,9 +52,9 @@ def custom_gauss_elimination_pivoting( result = [] for i in range(n - 1): new_index = custom_pivoting(coeff_matrix, n, i) - coeff_matrix[i], coeff_matrix[new_index] = coeff_matrix[new_index], + coeff_matrix[i], coeff_matrix[new_index] = (coeff_matrix[new_index],) coeff_matrix[i] - const_vector[i], const_vector[new_index] = const_vector[new_index], + const_vector[i], const_vector[new_index] = (const_vector[new_index],) const_vector[i] pivot = coeff_matrix[i][i] for j in range(i + 1, n): From b1bc7ff5fcd30ba4a4180b6f56421080eb1ee7d9 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Thu, 26 Oct 2023 23:49:14 +0330 Subject: [PATCH 20/92] Update gaussianeliminationpivoting.py --- .../src/gaussianeliminationpivoting.py | 59 +++++++++---------- 1 file changed, 27 insertions(+), 32 deletions(-) diff --git a/linear_algebra/src/gaussianeliminationpivoting.py b/linear_algebra/src/gaussianeliminationpivoting.py index 2a0516e79a9b..d3d7e0f3c637 100644 --- a/linear_algebra/src/gaussianeliminationpivoting.py +++ b/linear_algebra/src/gaussianeliminationpivoting.py @@ -1,44 +1,40 @@ import numpy as np - - -def custom_pivoting(matrix: np.ndarray, n: int, i: int) -> int: +def custom_pivoting(matrix: np.ndarray, num_rows: int, + column_index: int) -> int: """ - Selects the index of the minimum absolute - value in the i-th column of a matrix. + Selects the index of the minimum absolute + value in the specified column of a matrix. Parameters: - matrix (np.ndarray): The input matrix. - - n (int): The size of the matrix. - - i (int): The column index. + - num_rows (int): The number of rows in the matrix. + - column_index (int): The index of the column. Returns: - - int: The index of the minimum absolute value in the - i-th column. + - int: The index of the minimum absolute value in the + specified column. Example: - >>> a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], - dtype=float) + >>> a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) >>> custom_pivoting(a_matrix, 3, 1) 0 - """ - min_index = i - for index in range(i + 1, n): - if abs(matrix[index][i]) < abs(matrix[min_index][i]): + """ + min_index = column_index + for index in range(column_index + 1, num_rows): + if abs(matrix[index][column_index]) < abs(matrix[min_index][column_index]): min_index = index return min_index - - def custom_gauss_elimination_pivoting( - coeff_matrix: list, const_vector: list, n: int + coeff_matrix: list, const_vector: list, num_equations: int ) -> list: """ - Solves a system of linear equations + Solves a system of linear equations using Gaussian elimination with partial pivoting. Parameters: - coeff_matrix (list): The coefficient matrix. - const_vector (list): The constant vector. - - n (int): The size of the system. + - num_equations (int): The number of equations in the system. Returns: - list: The solution vector. @@ -50,25 +46,24 @@ def custom_gauss_elimination_pivoting( [1.0, 2.0, 3.0] """ result = [] - for i in range(n - 1): - new_index = custom_pivoting(coeff_matrix, n, i) - coeff_matrix[i], coeff_matrix[new_index] = (coeff_matrix[new_index],) - coeff_matrix[i] - const_vector[i], const_vector[new_index] = (const_vector[new_index],) - const_vector[i] + for i in range(num_equations - 1): + new_index = custom_pivoting(coeff_matrix, num_equations, i) + coeff_matrix[i], coeff_matrix[new_index] = coeff_matrix[new_index], coeff_matrix[i] + const_vector[i], const_vector[new_index] = const_vector[new_index], const_vector[i] pivot = coeff_matrix[i][i] - for j in range(i + 1, n): + for j in range(i + 1, num_equations): m = -1 * coeff_matrix[j][i] / pivot - for k in range(n): + for k in range(num_equations): coeff_matrix[j][k] += m * coeff_matrix[i][k] const_vector[j] += m * const_vector[i] - for p in range(n - 1, -1, -1): - result.append(const_vector[p] / coeff_matrix[p][p]) - for q in range(p - 1, -1, -1): - const_vector[q] = const_vector[q] - result[n - p - 1] * coeff_matrix[q][p] + for row_index in range(num_equations - 1, -1, -1): + result.append(const_vector[row_index] / coeff_matrix[row_index][row_index]) + for q in range(row_index - 1, -1, -1): + const_vector[q] = const_vector[q] - result[num_equations - row_index - 1] * coeff_matrix[q][row_index] return result + # Example usage: # n_size = 3 # a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) From f7900b9a538875972d834519dc6d6b3c0209007d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 26 Oct 2023 20:19:49 +0000 Subject: [PATCH 21/92] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../src/gaussianeliminationpivoting.py | 31 +++++++++++++------ 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/linear_algebra/src/gaussianeliminationpivoting.py b/linear_algebra/src/gaussianeliminationpivoting.py index d3d7e0f3c637..ba1574b2ef39 100644 --- a/linear_algebra/src/gaussianeliminationpivoting.py +++ b/linear_algebra/src/gaussianeliminationpivoting.py @@ -1,8 +1,9 @@ import numpy as np -def custom_pivoting(matrix: np.ndarray, num_rows: int, - column_index: int) -> int: + + +def custom_pivoting(matrix: np.ndarray, num_rows: int, column_index: int) -> int: """ - Selects the index of the minimum absolute + Selects the index of the minimum absolute value in the specified column of a matrix. Parameters: @@ -11,24 +12,26 @@ def custom_pivoting(matrix: np.ndarray, num_rows: int, - column_index (int): The index of the column. Returns: - - int: The index of the minimum absolute value in the + - int: The index of the minimum absolute value in the specified column. Example: >>> a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) >>> custom_pivoting(a_matrix, 3, 1) 0 - """ + """ min_index = column_index for index in range(column_index + 1, num_rows): if abs(matrix[index][column_index]) < abs(matrix[min_index][column_index]): min_index = index return min_index + + def custom_gauss_elimination_pivoting( coeff_matrix: list, const_vector: list, num_equations: int ) -> list: """ - Solves a system of linear equations + Solves a system of linear equations using Gaussian elimination with partial pivoting. Parameters: @@ -48,8 +51,14 @@ def custom_gauss_elimination_pivoting( result = [] for i in range(num_equations - 1): new_index = custom_pivoting(coeff_matrix, num_equations, i) - coeff_matrix[i], coeff_matrix[new_index] = coeff_matrix[new_index], coeff_matrix[i] - const_vector[i], const_vector[new_index] = const_vector[new_index], const_vector[i] + coeff_matrix[i], coeff_matrix[new_index] = ( + coeff_matrix[new_index], + coeff_matrix[i], + ) + const_vector[i], const_vector[new_index] = ( + const_vector[new_index], + const_vector[i], + ) pivot = coeff_matrix[i][i] for j in range(i + 1, num_equations): m = -1 * coeff_matrix[j][i] / pivot @@ -59,11 +68,13 @@ def custom_gauss_elimination_pivoting( for row_index in range(num_equations - 1, -1, -1): result.append(const_vector[row_index] / coeff_matrix[row_index][row_index]) for q in range(row_index - 1, -1, -1): - const_vector[q] = const_vector[q] - result[num_equations - row_index - 1] * coeff_matrix[q][row_index] + const_vector[q] = ( + const_vector[q] + - result[num_equations - row_index - 1] * coeff_matrix[q][row_index] + ) return result - # Example usage: # n_size = 3 # a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) From d519383e253415c62c07f3a94ea6d5e1122b04fb Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Thu, 26 Oct 2023 23:57:11 +0330 Subject: [PATCH 22/92] Update and rename gaussianeliminationpivoting.py to gaussian_elimination_pivoting.py renamed the file --- ...liminationpivoting.py => gaussian_elimination_pivoting.py} | 4 ---- 1 file changed, 4 deletions(-) rename linear_algebra/src/{gaussianeliminationpivoting.py => gaussian_elimination_pivoting.py} (99%) diff --git a/linear_algebra/src/gaussianeliminationpivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py similarity index 99% rename from linear_algebra/src/gaussianeliminationpivoting.py rename to linear_algebra/src/gaussian_elimination_pivoting.py index ba1574b2ef39..c3efa3a9048a 100644 --- a/linear_algebra/src/gaussianeliminationpivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -1,6 +1,4 @@ import numpy as np - - def custom_pivoting(matrix: np.ndarray, num_rows: int, column_index: int) -> int: """ Selects the index of the minimum absolute @@ -25,8 +23,6 @@ def custom_pivoting(matrix: np.ndarray, num_rows: int, column_index: int) -> int if abs(matrix[index][column_index]) < abs(matrix[min_index][column_index]): min_index = index return min_index - - def custom_gauss_elimination_pivoting( coeff_matrix: list, const_vector: list, num_equations: int ) -> list: From aac96ddfb6192a59a4ad527c0837d664635234ed Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 26 Oct 2023 20:27:47 +0000 Subject: [PATCH 23/92] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- linear_algebra/src/gaussian_elimination_pivoting.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index c3efa3a9048a..ba1574b2ef39 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -1,4 +1,6 @@ import numpy as np + + def custom_pivoting(matrix: np.ndarray, num_rows: int, column_index: int) -> int: """ Selects the index of the minimum absolute @@ -23,6 +25,8 @@ def custom_pivoting(matrix: np.ndarray, num_rows: int, column_index: int) -> int if abs(matrix[index][column_index]) < abs(matrix[min_index][column_index]): min_index = index return min_index + + def custom_gauss_elimination_pivoting( coeff_matrix: list, const_vector: list, num_equations: int ) -> list: From eafd037c9e0d67ee48f20809e70edb5a4f3eac8e Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Fri, 27 Oct 2023 00:08:26 +0330 Subject: [PATCH 24/92] Update gaussian_elimination_pivoting.py --- linear_algebra/src/gaussian_elimination_pivoting.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index ba1574b2ef39..9ce0a0294f99 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -1,6 +1,4 @@ import numpy as np - - def custom_pivoting(matrix: np.ndarray, num_rows: int, column_index: int) -> int: """ Selects the index of the minimum absolute @@ -25,11 +23,7 @@ def custom_pivoting(matrix: np.ndarray, num_rows: int, column_index: int) -> int if abs(matrix[index][column_index]) < abs(matrix[min_index][column_index]): min_index = index return min_index - - -def custom_gauss_elimination_pivoting( - coeff_matrix: list, const_vector: list, num_equations: int -) -> list: +def custom_gauss_elimination_pivoting(coeff_matrix: list, const_vector: list, num_equations: int) -> list: """ Solves a system of linear equations using Gaussian elimination with partial pivoting. From 769126f0ab483d2dc1ea2e20d966808256abef90 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 26 Oct 2023 20:39:09 +0000 Subject: [PATCH 25/92] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- linear_algebra/src/gaussian_elimination_pivoting.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index 9ce0a0294f99..ba1574b2ef39 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -1,4 +1,6 @@ import numpy as np + + def custom_pivoting(matrix: np.ndarray, num_rows: int, column_index: int) -> int: """ Selects the index of the minimum absolute @@ -23,7 +25,11 @@ def custom_pivoting(matrix: np.ndarray, num_rows: int, column_index: int) -> int if abs(matrix[index][column_index]) < abs(matrix[min_index][column_index]): min_index = index return min_index -def custom_gauss_elimination_pivoting(coeff_matrix: list, const_vector: list, num_equations: int) -> list: + + +def custom_gauss_elimination_pivoting( + coeff_matrix: list, const_vector: list, num_equations: int +) -> list: """ Solves a system of linear equations using Gaussian elimination with partial pivoting. From 6f00d688c31db0f9964a946db8636217f7b4b00e Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Fri, 27 Oct 2023 00:11:39 +0330 Subject: [PATCH 26/92] Update gaussian_elimination_pivoting.py From 45aefe8f7a5d6ccefa4aea334d55d6941b84017d Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Fri, 27 Oct 2023 00:36:22 +0330 Subject: [PATCH 27/92] Update gaussian_elimination_pivoting.py --- .../src/gaussian_elimination_pivoting.py | 47 +++++++------------ 1 file changed, 17 insertions(+), 30 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index ba1574b2ef39..c4ef48c6ebb3 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -1,9 +1,9 @@ import numpy as np - -def custom_pivoting(matrix: np.ndarray, num_rows: int, column_index: int) -> int: +def custom_pivoting(matrix: np.ndarray, + num_rows: int, column_index: int) -> int: """ - Selects the index of the minimum absolute + Selects the index of the minimum absolute value in the specified column of a matrix. Parameters: @@ -12,27 +12,23 @@ def custom_pivoting(matrix: np.ndarray, num_rows: int, column_index: int) -> int - column_index (int): The index of the column. Returns: - - int: The index of the minimum absolute value in the - specified column. + - int: The index of the minimum absolute value in the specified column. Example: >>> a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) >>> custom_pivoting(a_matrix, 3, 1) 0 - """ + """ min_index = column_index for index in range(column_index + 1, num_rows): if abs(matrix[index][column_index]) < abs(matrix[min_index][column_index]): min_index = index return min_index - -def custom_gauss_elimination_pivoting( - coeff_matrix: list, const_vector: list, num_equations: int -) -> list: +def custom_gauss_elimination_pivoting(matrix: list, + const_vector: list, num_equations: int) -> list: """ - Solves a system of linear equations - using Gaussian elimination with partial pivoting. + Solves a system of linear equations using Gaussian elimination with partial pivoting. Parameters: - coeff_matrix (list): The coefficient matrix. @@ -50,31 +46,22 @@ def custom_gauss_elimination_pivoting( """ result = [] for i in range(num_equations - 1): - new_index = custom_pivoting(coeff_matrix, num_equations, i) - coeff_matrix[i], coeff_matrix[new_index] = ( - coeff_matrix[new_index], - coeff_matrix[i], - ) - const_vector[i], const_vector[new_index] = ( - const_vector[new_index], - const_vector[i], - ) - pivot = coeff_matrix[i][i] + new_index = custom_pivoting(matrix, num_equations, i) + matrix[i], matrix[new_index] = matrix[new_index], matrix[i] + const_vector[i], const_vector[new_index] = const_vector[new_index], const_vector[i] + pivot = matrix[i][i] for j in range(i + 1, num_equations): - m = -1 * coeff_matrix[j][i] / pivot + m = -1 * matrix[j][i] / pivot for k in range(num_equations): - coeff_matrix[j][k] += m * coeff_matrix[i][k] + matrix[j][k] += m * matrix[i][k] const_vector[j] += m * const_vector[i] for row_index in range(num_equations - 1, -1, -1): - result.append(const_vector[row_index] / coeff_matrix[row_index][row_index]) + result.append(const_vector[row_index] / matrix[row_index][row_index]) for q in range(row_index - 1, -1, -1): - const_vector[q] = ( - const_vector[q] - - result[num_equations - row_index - 1] * coeff_matrix[q][row_index] - ) + const_vector[q] = const_vector[q] - result[num_equations - + row_index - 1] * matrix[q][row_index] return result - # Example usage: # n_size = 3 # a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) From 3865376fc5446690a166aa67d51c31d7e5a94ddb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 26 Oct 2023 21:07:16 +0000 Subject: [PATCH 28/92] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../src/gaussian_elimination_pivoting.py | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index c4ef48c6ebb3..6a0cb4ace128 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -1,9 +1,9 @@ import numpy as np -def custom_pivoting(matrix: np.ndarray, - num_rows: int, column_index: int) -> int: + +def custom_pivoting(matrix: np.ndarray, num_rows: int, column_index: int) -> int: """ - Selects the index of the minimum absolute + Selects the index of the minimum absolute value in the specified column of a matrix. Parameters: @@ -18,15 +18,17 @@ def custom_pivoting(matrix: np.ndarray, >>> a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) >>> custom_pivoting(a_matrix, 3, 1) 0 - """ + """ min_index = column_index for index in range(column_index + 1, num_rows): if abs(matrix[index][column_index]) < abs(matrix[min_index][column_index]): min_index = index return min_index -def custom_gauss_elimination_pivoting(matrix: list, - const_vector: list, num_equations: int) -> list: + +def custom_gauss_elimination_pivoting( + matrix: list, const_vector: list, num_equations: int +) -> list: """ Solves a system of linear equations using Gaussian elimination with partial pivoting. @@ -48,7 +50,10 @@ def custom_gauss_elimination_pivoting(matrix: list, for i in range(num_equations - 1): new_index = custom_pivoting(matrix, num_equations, i) matrix[i], matrix[new_index] = matrix[new_index], matrix[i] - const_vector[i], const_vector[new_index] = const_vector[new_index], const_vector[i] + const_vector[i], const_vector[new_index] = ( + const_vector[new_index], + const_vector[i], + ) pivot = matrix[i][i] for j in range(i + 1, num_equations): m = -1 * matrix[j][i] / pivot @@ -58,10 +63,13 @@ def custom_gauss_elimination_pivoting(matrix: list, for row_index in range(num_equations - 1, -1, -1): result.append(const_vector[row_index] / matrix[row_index][row_index]) for q in range(row_index - 1, -1, -1): - const_vector[q] = const_vector[q] - result[num_equations - - row_index - 1] * matrix[q][row_index] + const_vector[q] = ( + const_vector[q] + - result[num_equations - row_index - 1] * matrix[q][row_index] + ) return result + # Example usage: # n_size = 3 # a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) From 33b85e6ea9bf1ed12855bbf0915df06125b53357 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Fri, 27 Oct 2023 00:40:25 +0330 Subject: [PATCH 29/92] Update gaussian_elimination_pivoting.py --- linear_algebra/src/gaussian_elimination_pivoting.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index 6a0cb4ace128..72e789bb4e61 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -30,7 +30,8 @@ def custom_gauss_elimination_pivoting( matrix: list, const_vector: list, num_equations: int ) -> list: """ - Solves a system of linear equations using Gaussian elimination with partial pivoting. + Solves a system of linear equations using Gaussian + elimination with partial pivoting. Parameters: - coeff_matrix (list): The coefficient matrix. From d713c704c0708a60fe87ac56d4cdee831d43073a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 26 Oct 2023 21:12:04 +0000 Subject: [PATCH 30/92] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- linear_algebra/src/gaussian_elimination_pivoting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index 72e789bb4e61..6b371b8fa6e4 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -30,7 +30,7 @@ def custom_gauss_elimination_pivoting( matrix: list, const_vector: list, num_equations: int ) -> list: """ - Solves a system of linear equations using Gaussian + Solves a system of linear equations using Gaussian elimination with partial pivoting. Parameters: From cf01b7e82a966c665de581180b15702636654673 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Fri, 27 Oct 2023 00:43:15 +0330 Subject: [PATCH 31/92] Update gaussian_elimination_pivoting.py --- linear_algebra/src/gaussian_elimination_pivoting.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index 6b371b8fa6e4..992c922a4cc7 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -31,16 +31,13 @@ def custom_gauss_elimination_pivoting( ) -> list: """ Solves a system of linear equations using Gaussian - elimination with partial pivoting. - +elimination with partial pivoting. Parameters: - coeff_matrix (list): The coefficient matrix. - const_vector (list): The constant vector. - num_equations (int): The number of equations in the system. - Returns: - list: The solution vector. - Example: >>> a_matrix = [[2, 3, 4], [1, -2, 3], [3, 4, 5]] >>> b_vector = [20, 9, 11] From 0b19e0ebc74b8558230f5a836175ea06ddd881b5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 26 Oct 2023 21:13:51 +0000 Subject: [PATCH 32/92] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../src/gaussian_elimination_pivoting.py | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index 992c922a4cc7..3c361fea595b 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -30,19 +30,19 @@ def custom_gauss_elimination_pivoting( matrix: list, const_vector: list, num_equations: int ) -> list: """ - Solves a system of linear equations using Gaussian -elimination with partial pivoting. - Parameters: - - coeff_matrix (list): The coefficient matrix. - - const_vector (list): The constant vector. - - num_equations (int): The number of equations in the system. - Returns: - - list: The solution vector. - Example: - >>> a_matrix = [[2, 3, 4], [1, -2, 3], [3, 4, 5]] - >>> b_vector = [20, 9, 11] - >>> custom_gauss_elimination_pivoting(a_matrix, b_vector, 3) - [1.0, 2.0, 3.0] + Solves a system of linear equations using Gaussian + elimination with partial pivoting. + Parameters: + - coeff_matrix (list): The coefficient matrix. + - const_vector (list): The constant vector. + - num_equations (int): The number of equations in the system. + Returns: + - list: The solution vector. + Example: + >>> a_matrix = [[2, 3, 4], [1, -2, 3], [3, 4, 5]] + >>> b_vector = [20, 9, 11] + >>> custom_gauss_elimination_pivoting(a_matrix, b_vector, 3) + [1.0, 2.0, 3.0] """ result = [] for i in range(num_equations - 1): From b9e172ff35856724fd4f88ee9d5cb4731b608f4f Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Fri, 27 Oct 2023 00:50:41 +0330 Subject: [PATCH 33/92] Update gaussian_elimination_pivoting.py --- linear_algebra/src/gaussian_elimination_pivoting.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index 3c361fea595b..f30a06235047 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -1,6 +1,5 @@ import numpy as np - def custom_pivoting(matrix: np.ndarray, num_rows: int, column_index: int) -> int: """ Selects the index of the minimum absolute @@ -25,7 +24,6 @@ def custom_pivoting(matrix: np.ndarray, num_rows: int, column_index: int) -> int min_index = index return min_index - def custom_gauss_elimination_pivoting( matrix: list, const_vector: list, num_equations: int ) -> list: From c588c9da91cd0964721fbc46697f7dd060654817 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 26 Oct 2023 21:21:17 +0000 Subject: [PATCH 34/92] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- linear_algebra/src/gaussian_elimination_pivoting.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index f30a06235047..3c361fea595b 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -1,5 +1,6 @@ import numpy as np + def custom_pivoting(matrix: np.ndarray, num_rows: int, column_index: int) -> int: """ Selects the index of the minimum absolute @@ -24,6 +25,7 @@ def custom_pivoting(matrix: np.ndarray, num_rows: int, column_index: int) -> int min_index = index return min_index + def custom_gauss_elimination_pivoting( matrix: list, const_vector: list, num_equations: int ) -> list: From 3966efcdfb6d815db21e5f40434a59116b9ab443 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Fri, 27 Oct 2023 00:56:53 +0330 Subject: [PATCH 35/92] Update gaussian_elimination_pivoting.py --- .../src/gaussian_elimination_pivoting.py | 68 +++++++++---------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index 3c361fea595b..b8a392430f45 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -1,9 +1,9 @@ import numpy as np - -def custom_pivoting(matrix: np.ndarray, num_rows: int, column_index: int) -> int: +def custom_pivoting(matrix: np.ndarray, num_rows: int, + column_index: int) -> int: """ - Selects the index of the minimum absolute + Selects the index of the minimum absolute value in the specified column of a matrix. Parameters: @@ -18,56 +18,56 @@ def custom_pivoting(matrix: np.ndarray, num_rows: int, column_index: int) -> int >>> a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) >>> custom_pivoting(a_matrix, 3, 1) 0 - """ + """ min_index = column_index for index in range(column_index + 1, num_rows): if abs(matrix[index][column_index]) < abs(matrix[min_index][column_index]): min_index = index return min_index - -def custom_gauss_elimination_pivoting( - matrix: list, const_vector: list, num_equations: int -) -> list: +def custom_gauss_elimination_pivoting(coeff_matrix: list, + const_vector: list, num_equations: int) -> list: """ - Solves a system of linear equations using Gaussian + Solves a system of linear equations using Gaussian elimination with partial pivoting. - Parameters: - - coeff_matrix (list): The coefficient matrix. - - const_vector (list): The constant vector. - - num_equations (int): The number of equations in the system. - Returns: - - list: The solution vector. - Example: - >>> a_matrix = [[2, 3, 4], [1, -2, 3], [3, 4, 5]] - >>> b_vector = [20, 9, 11] - >>> custom_gauss_elimination_pivoting(a_matrix, b_vector, 3) - [1.0, 2.0, 3.0] + + Parameters: + - coeff_matrix (list): The coefficient matrix. + - const_vector (list): The constant vector. + - num_equations (int): The number of equations in the system. + + Returns: + - list: The solution vector. + + Example: + >>> a_matrix = [[2, 3, 4], [1, -2, 3], [3, 4, 5]] + >>> b_vector = [20, 9, 11] + >>> custom_gauss_elimination_pivoting(a_matrix, b_vector, 3) + [1.0, 2.0, 3.0] """ result = [] for i in range(num_equations - 1): - new_index = custom_pivoting(matrix, num_equations, i) - matrix[i], matrix[new_index] = matrix[new_index], matrix[i] - const_vector[i], const_vector[new_index] = ( - const_vector[new_index], - const_vector[i], - ) - pivot = matrix[i][i] + new_index = custom_pivoting(coeff_matrix, num_equations, i) + coeff_matrix[i], coeff_matrix[new_index] = coeff_matrix[new_index], + coeff_matrix[i] + const_vector[i], const_vector[new_index] = const_vector[new_index], + const_vector[i] + pivot = coeff_matrix[i][i] for j in range(i + 1, num_equations): - m = -1 * matrix[j][i] / pivot + m = -1 * coeff_matrix[j][i] / pivot for k in range(num_equations): - matrix[j][k] += m * matrix[i][k] + coeff_matrix[j][k] += m * coeff_matrix[i][k] const_vector[j] += m * const_vector[i] + for row_index in range(num_equations - 1, -1, -1): - result.append(const_vector[row_index] / matrix[row_index][row_index]) + result.append(const_vector[row_index] / coeff_matrix[row_index][row_index]) for q in range(row_index - 1, -1, -1): - const_vector[q] = ( - const_vector[q] - - result[num_equations - row_index - 1] * matrix[q][row_index] - ) + const_vector[q] = const_vector[q] - result[num_equations - row_index - 1] * + coeff_matrix[q][row_index] return result + # Example usage: # n_size = 3 # a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) From 07a810906069b8c67c89784bffc83085422ac261 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 26 Oct 2023 21:27:21 +0000 Subject: [PATCH 36/92] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../src/gaussian_elimination_pivoting.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index b8a392430f45..652a95250dc9 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -1,9 +1,9 @@ import numpy as np -def custom_pivoting(matrix: np.ndarray, num_rows: int, +def custom_pivoting(matrix: np.ndarray, num_rows: int, column_index: int) -> int: """ - Selects the index of the minimum absolute + Selects the index of the minimum absolute value in the specified column of a matrix. Parameters: @@ -18,17 +18,17 @@ def custom_pivoting(matrix: np.ndarray, num_rows: int, >>> a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) >>> custom_pivoting(a_matrix, 3, 1) 0 - """ + """ min_index = column_index for index in range(column_index + 1, num_rows): if abs(matrix[index][column_index]) < abs(matrix[min_index][column_index]): min_index = index return min_index -def custom_gauss_elimination_pivoting(coeff_matrix: list, +def custom_gauss_elimination_pivoting(coeff_matrix: list, const_vector: list, num_equations: int) -> list: """ - Solves a system of linear equations using Gaussian + Solves a system of linear equations using Gaussian elimination with partial pivoting. Parameters: @@ -48,9 +48,9 @@ def custom_gauss_elimination_pivoting(coeff_matrix: list, result = [] for i in range(num_equations - 1): new_index = custom_pivoting(coeff_matrix, num_equations, i) - coeff_matrix[i], coeff_matrix[new_index] = coeff_matrix[new_index], + coeff_matrix[i], coeff_matrix[new_index] = coeff_matrix[new_index], coeff_matrix[i] - const_vector[i], const_vector[new_index] = const_vector[new_index], + const_vector[i], const_vector[new_index] = const_vector[new_index], const_vector[i] pivot = coeff_matrix[i][i] for j in range(i + 1, num_equations): @@ -62,7 +62,7 @@ def custom_gauss_elimination_pivoting(coeff_matrix: list, for row_index in range(num_equations - 1, -1, -1): result.append(const_vector[row_index] / coeff_matrix[row_index][row_index]) for q in range(row_index - 1, -1, -1): - const_vector[q] = const_vector[q] - result[num_equations - row_index - 1] * + const_vector[q] = const_vector[q] - result[num_equations - row_index - 1] * coeff_matrix[q][row_index] return result From 7d9033686f1d10e4b74e30d7365ce308d888a783 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Fri, 27 Oct 2023 00:58:31 +0330 Subject: [PATCH 37/92] Update gaussian_elimination_pivoting.py --- linear_algebra/src/gaussian_elimination_pivoting.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index 652a95250dc9..9a2c7dddd1cd 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -62,8 +62,7 @@ def custom_gauss_elimination_pivoting(coeff_matrix: list, for row_index in range(num_equations - 1, -1, -1): result.append(const_vector[row_index] / coeff_matrix[row_index][row_index]) for q in range(row_index - 1, -1, -1): - const_vector[q] = const_vector[q] - result[num_equations - row_index - 1] * - coeff_matrix[q][row_index] + const_vector[q] = const_vector[q] - result[num_equations - row_index - 1] * coeff_matrix[q][row_index] return result From 2492d6001c398ec172d1e332f03ae2ba6c9cd0be Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 26 Oct 2023 21:29:07 +0000 Subject: [PATCH 38/92] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../src/gaussian_elimination_pivoting.py | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index 9a2c7dddd1cd..e2f993d835ff 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -1,7 +1,7 @@ import numpy as np -def custom_pivoting(matrix: np.ndarray, num_rows: int, - column_index: int) -> int: + +def custom_pivoting(matrix: np.ndarray, num_rows: int, column_index: int) -> int: """ Selects the index of the minimum absolute value in the specified column of a matrix. @@ -25,8 +25,10 @@ def custom_pivoting(matrix: np.ndarray, num_rows: int, min_index = index return min_index -def custom_gauss_elimination_pivoting(coeff_matrix: list, - const_vector: list, num_equations: int) -> list: + +def custom_gauss_elimination_pivoting( + coeff_matrix: list, const_vector: list, num_equations: int +) -> list: """ Solves a system of linear equations using Gaussian elimination with partial pivoting. @@ -48,9 +50,9 @@ def custom_gauss_elimination_pivoting(coeff_matrix: list, result = [] for i in range(num_equations - 1): new_index = custom_pivoting(coeff_matrix, num_equations, i) - coeff_matrix[i], coeff_matrix[new_index] = coeff_matrix[new_index], + coeff_matrix[i], coeff_matrix[new_index] = (coeff_matrix[new_index],) coeff_matrix[i] - const_vector[i], const_vector[new_index] = const_vector[new_index], + const_vector[i], const_vector[new_index] = (const_vector[new_index],) const_vector[i] pivot = coeff_matrix[i][i] for j in range(i + 1, num_equations): @@ -62,11 +64,13 @@ def custom_gauss_elimination_pivoting(coeff_matrix: list, for row_index in range(num_equations - 1, -1, -1): result.append(const_vector[row_index] / coeff_matrix[row_index][row_index]) for q in range(row_index - 1, -1, -1): - const_vector[q] = const_vector[q] - result[num_equations - row_index - 1] * coeff_matrix[q][row_index] + const_vector[q] = ( + const_vector[q] + - result[num_equations - row_index - 1] * coeff_matrix[q][row_index] + ) return result - # Example usage: # n_size = 3 # a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) From 757c23ed1418774d3cdcf31bef8910e2e4a34c14 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Fri, 27 Oct 2023 01:01:16 +0330 Subject: [PATCH 39/92] Update gaussian_elimination_pivoting.py --- linear_algebra/src/gaussian_elimination_pivoting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index e2f993d835ff..d430d3c22814 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -64,7 +64,7 @@ def custom_gauss_elimination_pivoting( for row_index in range(num_equations - 1, -1, -1): result.append(const_vector[row_index] / coeff_matrix[row_index][row_index]) for q in range(row_index - 1, -1, -1): - const_vector[q] = ( + const_vector[q] -= ( const_vector[q] - result[num_equations - row_index - 1] * coeff_matrix[q][row_index] ) From 301daa0148b77f78af66a8c0728c8d1e09deb23c Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Fri, 27 Oct 2023 01:09:12 +0330 Subject: [PATCH 40/92] Update gaussian_elimination_pivoting.py --- .../src/gaussian_elimination_pivoting.py | 27 +++++++------------ 1 file changed, 10 insertions(+), 17 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index d430d3c22814..bd99c326f6c4 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -1,9 +1,8 @@ import numpy as np - def custom_pivoting(matrix: np.ndarray, num_rows: int, column_index: int) -> int: """ - Selects the index of the minimum absolute + Selects the index of the minimum absolute value in the specified column of a matrix. Parameters: @@ -18,20 +17,18 @@ def custom_pivoting(matrix: np.ndarray, num_rows: int, column_index: int) -> int >>> a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) >>> custom_pivoting(a_matrix, 3, 1) 0 - """ + """ min_index = column_index for index in range(column_index + 1, num_rows): if abs(matrix[index][column_index]) < abs(matrix[min_index][column_index]): min_index = index return min_index - -def custom_gauss_elimination_pivoting( - coeff_matrix: list, const_vector: list, num_equations: int -) -> list: +def custom_gauss_elimination_pivoting(coeff_matrix: list, + const_vector: list, num_equations: int) -> list: """ - Solves a system of linear equations using Gaussian - elimination with partial pivoting. + Solves a system of linear equations using + Gaussian elimination with partial pivoting. Parameters: - coeff_matrix (list): The coefficient matrix. @@ -50,10 +47,8 @@ def custom_gauss_elimination_pivoting( result = [] for i in range(num_equations - 1): new_index = custom_pivoting(coeff_matrix, num_equations, i) - coeff_matrix[i], coeff_matrix[new_index] = (coeff_matrix[new_index],) - coeff_matrix[i] - const_vector[i], const_vector[new_index] = (const_vector[new_index],) - const_vector[i] + coeff_matrix[i], coeff_matrix[new_index] = coeff_matrix[new_index], coeff_matrix[i] + const_vector[i], const_vector[new_index] = const_vector[new_index], const_vector[i] pivot = coeff_matrix[i][i] for j in range(i + 1, num_equations): m = -1 * coeff_matrix[j][i] / pivot @@ -64,13 +59,11 @@ def custom_gauss_elimination_pivoting( for row_index in range(num_equations - 1, -1, -1): result.append(const_vector[row_index] / coeff_matrix[row_index][row_index]) for q in range(row_index - 1, -1, -1): - const_vector[q] -= ( - const_vector[q] - - result[num_equations - row_index - 1] * coeff_matrix[q][row_index] - ) + const_vector[q] = const_vector[q] - result[num_equations - row_index - 1] * coeff_matrix[q][row_index] return result + # Example usage: # n_size = 3 # a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) From 8c19a51566b5906e4f3a6593ff69d86ac7cd631e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 26 Oct 2023 21:39:48 +0000 Subject: [PATCH 41/92] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../src/gaussian_elimination_pivoting.py | 29 +++++++++++++------ 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index bd99c326f6c4..9b1a7986d82c 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -1,8 +1,9 @@ import numpy as np + def custom_pivoting(matrix: np.ndarray, num_rows: int, column_index: int) -> int: """ - Selects the index of the minimum absolute + Selects the index of the minimum absolute value in the specified column of a matrix. Parameters: @@ -17,17 +18,19 @@ def custom_pivoting(matrix: np.ndarray, num_rows: int, column_index: int) -> int >>> a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) >>> custom_pivoting(a_matrix, 3, 1) 0 - """ + """ min_index = column_index for index in range(column_index + 1, num_rows): if abs(matrix[index][column_index]) < abs(matrix[min_index][column_index]): min_index = index return min_index -def custom_gauss_elimination_pivoting(coeff_matrix: list, - const_vector: list, num_equations: int) -> list: + +def custom_gauss_elimination_pivoting( + coeff_matrix: list, const_vector: list, num_equations: int +) -> list: """ - Solves a system of linear equations using + Solves a system of linear equations using Gaussian elimination with partial pivoting. Parameters: @@ -47,8 +50,14 @@ def custom_gauss_elimination_pivoting(coeff_matrix: list, result = [] for i in range(num_equations - 1): new_index = custom_pivoting(coeff_matrix, num_equations, i) - coeff_matrix[i], coeff_matrix[new_index] = coeff_matrix[new_index], coeff_matrix[i] - const_vector[i], const_vector[new_index] = const_vector[new_index], const_vector[i] + coeff_matrix[i], coeff_matrix[new_index] = ( + coeff_matrix[new_index], + coeff_matrix[i], + ) + const_vector[i], const_vector[new_index] = ( + const_vector[new_index], + const_vector[i], + ) pivot = coeff_matrix[i][i] for j in range(i + 1, num_equations): m = -1 * coeff_matrix[j][i] / pivot @@ -59,11 +68,13 @@ def custom_gauss_elimination_pivoting(coeff_matrix: list, for row_index in range(num_equations - 1, -1, -1): result.append(const_vector[row_index] / coeff_matrix[row_index][row_index]) for q in range(row_index - 1, -1, -1): - const_vector[q] = const_vector[q] - result[num_equations - row_index - 1] * coeff_matrix[q][row_index] + const_vector[q] = ( + const_vector[q] + - result[num_equations - row_index - 1] * coeff_matrix[q][row_index] + ) return result - # Example usage: # n_size = 3 # a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) From 5d7dc3266de763ce974b0d0633a49f67a40672a6 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Fri, 27 Oct 2023 01:16:21 +0330 Subject: [PATCH 42/92] Update gaussian_elimination_pivoting.py --- linear_algebra/src/gaussian_elimination_pivoting.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index 9b1a7986d82c..f997b0c5ed95 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -68,10 +68,7 @@ def custom_gauss_elimination_pivoting( for row_index in range(num_equations - 1, -1, -1): result.append(const_vector[row_index] / coeff_matrix[row_index][row_index]) for q in range(row_index - 1, -1, -1): - const_vector[q] = ( - const_vector[q] - - result[num_equations - row_index - 1] * coeff_matrix[q][row_index] - ) + const_vector[q] -= result[num_equations - 1 - row_index] * coeff_matrix[q][row_index] return result From fe4352bbf42593e7ddf8881b6e80ba34064c9666 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Fri, 27 Oct 2023 01:19:22 +0330 Subject: [PATCH 43/92] Update gaussian_elimination_pivoting.py --- linear_algebra/src/gaussian_elimination_pivoting.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index f997b0c5ed95..18910a688bab 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -68,7 +68,9 @@ def custom_gauss_elimination_pivoting( for row_index in range(num_equations - 1, -1, -1): result.append(const_vector[row_index] / coeff_matrix[row_index][row_index]) for q in range(row_index - 1, -1, -1): - const_vector[q] -= result[num_equations - 1 - row_index] * coeff_matrix[q][row_index] + const_vector[q] -= result[num_equations - 1 - row_index + ] * coeff_matrix[q][row_index] + return result From a16c48e7b28cc69431d4a40b1fae17a03fc4f417 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 26 Oct 2023 21:50:05 +0000 Subject: [PATCH 44/92] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- linear_algebra/src/gaussian_elimination_pivoting.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index 18910a688bab..7f152a43de61 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -68,9 +68,10 @@ def custom_gauss_elimination_pivoting( for row_index in range(num_equations - 1, -1, -1): result.append(const_vector[row_index] / coeff_matrix[row_index][row_index]) for q in range(row_index - 1, -1, -1): - const_vector[q] -= result[num_equations - 1 - row_index - ] * coeff_matrix[q][row_index] - + const_vector[q] -= ( + result[num_equations - 1 - row_index] * coeff_matrix[q][row_index] + ) + return result From 58ad12e55e5273077b82b9c8f9936eb9bac6ab48 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Fri, 27 Oct 2023 01:20:47 +0330 Subject: [PATCH 45/92] Update gaussian_elimination_pivoting.py --- linear_algebra/src/gaussian_elimination_pivoting.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index 7f152a43de61..0f79191fd5eb 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -69,9 +69,7 @@ def custom_gauss_elimination_pivoting( result.append(const_vector[row_index] / coeff_matrix[row_index][row_index]) for q in range(row_index - 1, -1, -1): const_vector[q] -= ( - result[num_equations - 1 - row_index] * coeff_matrix[q][row_index] - ) - + result[num_equations - 1 - row_index] * coeff_matrix[q][row_index]) return result From ded4ad012b398147b979a9e68b4ffd05656f379d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 26 Oct 2023 21:51:38 +0000 Subject: [PATCH 46/92] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- linear_algebra/src/gaussian_elimination_pivoting.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index 0f79191fd5eb..ab795ca045d7 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -69,7 +69,8 @@ def custom_gauss_elimination_pivoting( result.append(const_vector[row_index] / coeff_matrix[row_index][row_index]) for q in range(row_index - 1, -1, -1): const_vector[q] -= ( - result[num_equations - 1 - row_index] * coeff_matrix[q][row_index]) + result[num_equations - 1 - row_index] * coeff_matrix[q][row_index] + ) return result From 6772aeb084021f0dc794bf5739d04e6d80a9d44a Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Fri, 27 Oct 2023 01:27:25 +0330 Subject: [PATCH 47/92] Update gaussian_elimination_pivoting.py --- .../src/gaussian_elimination_pivoting.py | 42 +++++++++---------- 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index ab795ca045d7..c1f389fa5fa1 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -1,9 +1,9 @@ import numpy as np - -def custom_pivoting(matrix: np.ndarray, num_rows: int, column_index: int) -> int: +def custom_pivoting(matrix: np.ndarray, + num_rows: int, column_index: int) -> int: """ - Selects the index of the minimum absolute + Selects the index of the minimum absolute value in the specified column of a matrix. Parameters: @@ -12,25 +12,24 @@ def custom_pivoting(matrix: np.ndarray, num_rows: int, column_index: int) -> int - column_index (int): The index of the column. Returns: - - int: The index of the minimum absolute value in the specified column. + - int: The index of the minimum absolute value + in the specified column. Example: >>> a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) >>> custom_pivoting(a_matrix, 3, 1) 0 - """ + """ min_index = column_index for index in range(column_index + 1, num_rows): if abs(matrix[index][column_index]) < abs(matrix[min_index][column_index]): min_index = index return min_index - -def custom_gauss_elimination_pivoting( - coeff_matrix: list, const_vector: list, num_equations: int -) -> list: +def custom_gauss_elimination_pivoting(coeff_matrix: list, + const_vector: list, num_equations: int) -> list: """ - Solves a system of linear equations using + Solves a system of linear equations using Gaussian elimination with partial pivoting. Parameters: @@ -48,16 +47,12 @@ def custom_gauss_elimination_pivoting( [1.0, 2.0, 3.0] """ result = [] + + # Forward elimination for i in range(num_equations - 1): new_index = custom_pivoting(coeff_matrix, num_equations, i) - coeff_matrix[i], coeff_matrix[new_index] = ( - coeff_matrix[new_index], - coeff_matrix[i], - ) - const_vector[i], const_vector[new_index] = ( - const_vector[new_index], - const_vector[i], - ) + coeff_matrix[i], coeff_matrix[new_index] = coeff_matrix[new_index], coeff_matrix[i] + const_vector[i], const_vector[new_index] = const_vector[new_index], const_vector[i] pivot = coeff_matrix[i][i] for j in range(i + 1, num_equations): m = -1 * coeff_matrix[j][i] / pivot @@ -65,15 +60,16 @@ def custom_gauss_elimination_pivoting( coeff_matrix[j][k] += m * coeff_matrix[i][k] const_vector[j] += m * const_vector[i] + # Backward substitution for row_index in range(num_equations - 1, -1, -1): - result.append(const_vector[row_index] / coeff_matrix[row_index][row_index]) - for q in range(row_index - 1, -1, -1): - const_vector[q] -= ( - result[num_equations - 1 - row_index] * coeff_matrix[q][row_index] - ) + temp_sum = sum(coeff_matrix[row_index][col] * result[col] for col in range(row_index + 1, num_equations)) + result.append((const_vector[row_index] - temp_sum) / coeff_matrix[row_index][row_index]) + + result.reverse() return result + # Example usage: # n_size = 3 # a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) From aeab7909307eca51b114bb3186901dad9fdd3758 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 26 Oct 2023 21:59:20 +0000 Subject: [PATCH 48/92] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../src/gaussian_elimination_pivoting.py | 40 ++++++++++++------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index c1f389fa5fa1..2da3c51e11fa 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -1,9 +1,9 @@ import numpy as np -def custom_pivoting(matrix: np.ndarray, - num_rows: int, column_index: int) -> int: + +def custom_pivoting(matrix: np.ndarray, num_rows: int, column_index: int) -> int: """ - Selects the index of the minimum absolute + Selects the index of the minimum absolute value in the specified column of a matrix. Parameters: @@ -12,24 +12,26 @@ def custom_pivoting(matrix: np.ndarray, - column_index (int): The index of the column. Returns: - - int: The index of the minimum absolute value + - int: The index of the minimum absolute value in the specified column. Example: >>> a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) >>> custom_pivoting(a_matrix, 3, 1) 0 - """ + """ min_index = column_index for index in range(column_index + 1, num_rows): if abs(matrix[index][column_index]) < abs(matrix[min_index][column_index]): min_index = index return min_index -def custom_gauss_elimination_pivoting(coeff_matrix: list, - const_vector: list, num_equations: int) -> list: + +def custom_gauss_elimination_pivoting( + coeff_matrix: list, const_vector: list, num_equations: int +) -> list: """ - Solves a system of linear equations using + Solves a system of linear equations using Gaussian elimination with partial pivoting. Parameters: @@ -47,12 +49,18 @@ def custom_gauss_elimination_pivoting(coeff_matrix: list, [1.0, 2.0, 3.0] """ result = [] - + # Forward elimination for i in range(num_equations - 1): new_index = custom_pivoting(coeff_matrix, num_equations, i) - coeff_matrix[i], coeff_matrix[new_index] = coeff_matrix[new_index], coeff_matrix[i] - const_vector[i], const_vector[new_index] = const_vector[new_index], const_vector[i] + coeff_matrix[i], coeff_matrix[new_index] = ( + coeff_matrix[new_index], + coeff_matrix[i], + ) + const_vector[i], const_vector[new_index] = ( + const_vector[new_index], + const_vector[i], + ) pivot = coeff_matrix[i][i] for j in range(i + 1, num_equations): m = -1 * coeff_matrix[j][i] / pivot @@ -62,14 +70,18 @@ def custom_gauss_elimination_pivoting(coeff_matrix: list, # Backward substitution for row_index in range(num_equations - 1, -1, -1): - temp_sum = sum(coeff_matrix[row_index][col] * result[col] for col in range(row_index + 1, num_equations)) - result.append((const_vector[row_index] - temp_sum) / coeff_matrix[row_index][row_index]) + temp_sum = sum( + coeff_matrix[row_index][col] * result[col] + for col in range(row_index + 1, num_equations) + ) + result.append( + (const_vector[row_index] - temp_sum) / coeff_matrix[row_index][row_index] + ) result.reverse() return result - # Example usage: # n_size = 3 # a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) From f4296b06b0d9c0432412252a5800e10b1cc4ac6a Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Fri, 27 Oct 2023 01:34:09 +0330 Subject: [PATCH 49/92] Update gaussian_elimination_pivoting.py --- .../src/gaussian_elimination_pivoting.py | 23 +++++-------------- 1 file changed, 6 insertions(+), 17 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index 2da3c51e11fa..880c65dd927c 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -48,19 +48,13 @@ def custom_gauss_elimination_pivoting( >>> custom_gauss_elimination_pivoting(a_matrix, b_vector, 3) [1.0, 2.0, 3.0] """ - result = [] - + result: List[float] = [] + # Forward elimination for i in range(num_equations - 1): new_index = custom_pivoting(coeff_matrix, num_equations, i) - coeff_matrix[i], coeff_matrix[new_index] = ( - coeff_matrix[new_index], - coeff_matrix[i], - ) - const_vector[i], const_vector[new_index] = ( - const_vector[new_index], - const_vector[i], - ) + coeff_matrix[i], coeff_matrix[new_index] = coeff_matrix[new_index], coeff_matrix[i] + const_vector[i], const_vector[new_index] = const_vector[new_index], const_vector[i] pivot = coeff_matrix[i][i] for j in range(i + 1, num_equations): m = -1 * coeff_matrix[j][i] / pivot @@ -70,13 +64,8 @@ def custom_gauss_elimination_pivoting( # Backward substitution for row_index in range(num_equations - 1, -1, -1): - temp_sum = sum( - coeff_matrix[row_index][col] * result[col] - for col in range(row_index + 1, num_equations) - ) - result.append( - (const_vector[row_index] - temp_sum) / coeff_matrix[row_index][row_index] - ) + temp_sum = sum(coeff_matrix[row_index][col] * result[col] for col in range(row_index + 1, num_equations)) + result.append((const_vector[row_index] - temp_sum) / coeff_matrix[row_index][row_index]) result.reverse() return result From 88af17d8dc26900ebaa2b167bc0db03d344645bb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 26 Oct 2023 22:04:46 +0000 Subject: [PATCH 50/92] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../src/gaussian_elimination_pivoting.py | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index 880c65dd927c..653e31b30fc4 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -49,12 +49,18 @@ def custom_gauss_elimination_pivoting( [1.0, 2.0, 3.0] """ result: List[float] = [] - + # Forward elimination for i in range(num_equations - 1): new_index = custom_pivoting(coeff_matrix, num_equations, i) - coeff_matrix[i], coeff_matrix[new_index] = coeff_matrix[new_index], coeff_matrix[i] - const_vector[i], const_vector[new_index] = const_vector[new_index], const_vector[i] + coeff_matrix[i], coeff_matrix[new_index] = ( + coeff_matrix[new_index], + coeff_matrix[i], + ) + const_vector[i], const_vector[new_index] = ( + const_vector[new_index], + const_vector[i], + ) pivot = coeff_matrix[i][i] for j in range(i + 1, num_equations): m = -1 * coeff_matrix[j][i] / pivot @@ -64,8 +70,13 @@ def custom_gauss_elimination_pivoting( # Backward substitution for row_index in range(num_equations - 1, -1, -1): - temp_sum = sum(coeff_matrix[row_index][col] * result[col] for col in range(row_index + 1, num_equations)) - result.append((const_vector[row_index] - temp_sum) / coeff_matrix[row_index][row_index]) + temp_sum = sum( + coeff_matrix[row_index][col] * result[col] + for col in range(row_index + 1, num_equations) + ) + result.append( + (const_vector[row_index] - temp_sum) / coeff_matrix[row_index][row_index] + ) result.reverse() return result From 94acda96b61fc1385c1e80212c0921c9775b983e Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Fri, 27 Oct 2023 01:44:48 +0330 Subject: [PATCH 51/92] Update gaussian_elimination_pivoting.py --- .../src/gaussian_elimination_pivoting.py | 156 +++++++++--------- 1 file changed, 75 insertions(+), 81 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index 653e31b30fc4..bde231c832a4 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -1,85 +1,79 @@ import numpy as np - - -def custom_pivoting(matrix: np.ndarray, num_rows: int, column_index: int) -> int: - """ - Selects the index of the minimum absolute - value in the specified column of a matrix. - - Parameters: - - matrix (np.ndarray): The input matrix. - - num_rows (int): The number of rows in the matrix. - - column_index (int): The index of the column. - - Returns: - - int: The index of the minimum absolute value - in the specified column. - - Example: - >>> a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) - >>> custom_pivoting(a_matrix, 3, 1) - 0 - """ - min_index = column_index - for index in range(column_index + 1, num_rows): - if abs(matrix[index][column_index]) < abs(matrix[min_index][column_index]): - min_index = index - return min_index - - -def custom_gauss_elimination_pivoting( - coeff_matrix: list, const_vector: list, num_equations: int -) -> list: - """ - Solves a system of linear equations using - Gaussian elimination with partial pivoting. - - Parameters: - - coeff_matrix (list): The coefficient matrix. - - const_vector (list): The constant vector. - - num_equations (int): The number of equations in the system. - - Returns: - - list: The solution vector. - - Example: - >>> a_matrix = [[2, 3, 4], [1, -2, 3], [3, 4, 5]] - >>> b_vector = [20, 9, 11] - >>> custom_gauss_elimination_pivoting(a_matrix, b_vector, 3) - [1.0, 2.0, 3.0] - """ - result: List[float] = [] - - # Forward elimination - for i in range(num_equations - 1): - new_index = custom_pivoting(coeff_matrix, num_equations, i) - coeff_matrix[i], coeff_matrix[new_index] = ( - coeff_matrix[new_index], - coeff_matrix[i], - ) - const_vector[i], const_vector[new_index] = ( - const_vector[new_index], - const_vector[i], - ) - pivot = coeff_matrix[i][i] - for j in range(i + 1, num_equations): - m = -1 * coeff_matrix[j][i] / pivot - for k in range(num_equations): - coeff_matrix[j][k] += m * coeff_matrix[i][k] - const_vector[j] += m * const_vector[i] - - # Backward substitution - for row_index in range(num_equations - 1, -1, -1): - temp_sum = sum( - coeff_matrix[row_index][col] * result[col] - for col in range(row_index + 1, num_equations) - ) - result.append( - (const_vector[row_index] - temp_sum) / coeff_matrix[row_index][row_index] - ) - - result.reverse() - return result +import sys +import time + +matrixAB = np.loadtxt('matrix.txt') +B = np.copy(matrixAB[:, matrixAB.shape[1] - 1]) + + +def foo(matrix): + start = time.process_time() + AB = np.copy(matrix) + numOfRows = AB.shape[0] + numOfColumns = AB.shape[1] - 1 + xLst = [] + + """"Lead element search""" + print("Matrix before leading coefficient search: ") + print(AB) + print(" ") + + """Upper triangular matrix""" + + for columnNum in range(numOfRows): + for i in range(columnNum, numOfColumns): + if abs(AB[i][columnNum]) > abs(AB[columnNum][columnNum]): + AB[[columnNum, i]] = AB[[i, columnNum]] + if AB[columnNum, columnNum] == 0.0: + sys.exit("Matrix is not correct") + else: + pass + if columnNum != 0: + for i in range(columnNum, numOfRows): + AB[i, :] = AB[i, :] - AB[i, columnNum - 1] / AB[columnNum - 1, columnNum - 1] * AB[columnNum - 1, :] + + print("Upper triangular matrix: ") + print(AB.round(3)) + print(" ") + + """Find x vector""" + columnNum = numOfRows + while columnNum != 0: + columnNum -= 1 + lineOfX = AB[columnNum, numOfRows] + if columnNum + 1 != numOfRows: + for y in range(1, numOfRows - columnNum): + lineOfX += -AB[columnNum, numOfRows - y] * xLst[y - 1] + x = lineOfX / AB[columnNum, columnNum] + xLst.append(x) + + stop = time.process_time() + xLst.reverse() + print("x vector: ") + print(xLst) + print(" ") + print("Start time: ", start, "End time: ", stop) + print("Elapsed time during the whole function in seconds:", stop - start) + + return np.asarray(xLst) + + +vectorOfXAlpha = foo(matrixAB) + +"""Cond(A)""" +modifiedB = np.copy(B) +modifiedB[np.argmax(abs(B))] = B[np.argmax(abs(B))] / 100 * 101 + +matrixAB[:, matrixAB.shape[1] - 1] = modifiedB +print() +print("Cond(A) check: ") +vectorOfXBeta = foo(matrixAB) + +deltaB = modifiedB - B +deltaX = vectorOfXAlpha - vectorOfXBeta +print(" ") +condA = abs(np.sum(deltaX) / np.sum(vectorOfXAlpha)) * (np.sum(B) / np.sum(deltaB)) +print("Cond(A) =< {:03f}".format(condA)) # Example usage: From 619cc003fd1f846e85ae577b303e633edbd400f3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 26 Oct 2023 22:15:24 +0000 Subject: [PATCH 52/92] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- linear_algebra/src/gaussian_elimination_pivoting.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index bde231c832a4..4eb1364db069 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -2,7 +2,7 @@ import sys import time -matrixAB = np.loadtxt('matrix.txt') +matrixAB = np.loadtxt("matrix.txt") B = np.copy(matrixAB[:, matrixAB.shape[1] - 1]) @@ -30,7 +30,12 @@ def foo(matrix): pass if columnNum != 0: for i in range(columnNum, numOfRows): - AB[i, :] = AB[i, :] - AB[i, columnNum - 1] / AB[columnNum - 1, columnNum - 1] * AB[columnNum - 1, :] + AB[i, :] = ( + AB[i, :] + - AB[i, columnNum - 1] + / AB[columnNum - 1, columnNum - 1] + * AB[columnNum - 1, :] + ) print("Upper triangular matrix: ") print(AB.round(3)) From a7267330519cd869d01b029283832458473d2b5b Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Fri, 27 Oct 2023 01:51:44 +0330 Subject: [PATCH 53/92] Update gaussian_elimination_pivoting.py --- .../src/gaussian_elimination_pivoting.py | 84 +++++++++---------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index 4eb1364db069..94e361cd81dd 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -1,84 +1,84 @@ -import numpy as np import sys import time +import numpy as np -matrixAB = np.loadtxt("matrix.txt") -B = np.copy(matrixAB[:, matrixAB.shape[1] - 1]) +matrixab = np.loadtxt("matrix.txt") +B = np.copy(matrixab[:, matrixab.shape[1] - 1]) def foo(matrix): start = time.process_time() - AB = np.copy(matrix) - numOfRows = AB.shape[0] - numOfColumns = AB.shape[1] - 1 - xLst = [] + ab = np.copy(matrix) + numofrows = ab.shape[0] + numofcolumns = ab.shape[1] - 1 + xlst = [] """"Lead element search""" print("Matrix before leading coefficient search: ") - print(AB) + print(ab) print(" ") """Upper triangular matrix""" - for columnNum in range(numOfRows): - for i in range(columnNum, numOfColumns): - if abs(AB[i][columnNum]) > abs(AB[columnNum][columnNum]): - AB[[columnNum, i]] = AB[[i, columnNum]] - if AB[columnNum, columnNum] == 0.0: + for columnnum in range(numofrows): + for i in range(columnnum, numofcolumns): + if abs(ab[i][columnnum]) > abs(ab[columnnum][columnnum]): + ab[[columnnum, i]] = ab[[i, columnnum]] + if ab[columnnum, columnnum] == 0.0: sys.exit("Matrix is not correct") else: pass - if columnNum != 0: - for i in range(columnNum, numOfRows): - AB[i, :] = ( - AB[i, :] - - AB[i, columnNum - 1] - / AB[columnNum - 1, columnNum - 1] - * AB[columnNum - 1, :] + if columnnum != 0: + for i in range(columnnum, numofrows): + ab[i, :] = ( + ab[i, :] + - ab[i, columnnum - 1] + / ab[columnnum - 1, columnnum - 1] + * ab[columnnum - 1, :] ) print("Upper triangular matrix: ") - print(AB.round(3)) + print(ab.round(3)) print(" ") """Find x vector""" - columnNum = numOfRows - while columnNum != 0: - columnNum -= 1 - lineOfX = AB[columnNum, numOfRows] - if columnNum + 1 != numOfRows: - for y in range(1, numOfRows - columnNum): - lineOfX += -AB[columnNum, numOfRows - y] * xLst[y - 1] - x = lineOfX / AB[columnNum, columnNum] - xLst.append(x) + columnnum = numofrows + while columnnum != 0: + columnnum -= 1 + lineofx = ab[columnnum, numofrows] + if columnnum + 1 != numofrows: + for y in range(1, numofrows - columnnum): + lineofx += -ab[columnnum, numofrows - y] * xlst[y - 1] + x = lineofx / ab[columnnum, columnnum] + xlst.append(x) stop = time.process_time() - xLst.reverse() + xlst.reverse() print("x vector: ") - print(xLst) + print(xlst) print(" ") print("Start time: ", start, "End time: ", stop) print("Elapsed time during the whole function in seconds:", stop - start) - return np.asarray(xLst) + return np.asarray(xlst) -vectorOfXAlpha = foo(matrixAB) +vectorofxalpha = foo(matrixab) """Cond(A)""" -modifiedB = np.copy(B) -modifiedB[np.argmax(abs(B))] = B[np.argmax(abs(B))] / 100 * 101 +modifiedb = np.copy(B) +modifiedb[np.argmax(abs(B))] = B[np.argmax(abs(B))] / 100 * 101 -matrixAB[:, matrixAB.shape[1] - 1] = modifiedB +matrixab[:, matrixab.shape[1] - 1] = modifiedb print() print("Cond(A) check: ") -vectorOfXBeta = foo(matrixAB) +vectorofxbeta = foo(matrixab) -deltaB = modifiedB - B -deltaX = vectorOfXAlpha - vectorOfXBeta +deltab = modifiedb - B +deltax = vectorofxalpha - vectorofxbeta print(" ") -condA = abs(np.sum(deltaX) / np.sum(vectorOfXAlpha)) * (np.sum(B) / np.sum(deltaB)) -print("Cond(A) =< {:03f}".format(condA)) +conda = abs(np.sum(deltax) / np.sum(vectorofxalpha)) * (np.sum(B) / np.sum(deltab)) +print("Cond(A) =< {:03f}".f(conda)) # Example usage: From 5e6b9faa4f32e5eff82046730992e8d7c8e8efc9 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Fri, 27 Oct 2023 01:54:24 +0330 Subject: [PATCH 54/92] Update gaussian_elimination_pivoting.py --- linear_algebra/src/gaussian_elimination_pivoting.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index 94e361cd81dd..993b94acbddf 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -1,6 +1,7 @@ -import sys -import time import numpy as np +import time +import sys + matrixab = np.loadtxt("matrix.txt") B = np.copy(matrixab[:, matrixab.shape[1] - 1]) From 0a529e85e1db52c2d67276e98eeb3910397c0917 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Fri, 27 Oct 2023 02:02:07 +0330 Subject: [PATCH 55/92] Update gaussian_elimination_pivoting.py --- linear_algebra/src/gaussian_elimination_pivoting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index 993b94acbddf..c2df4d5d7019 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -1,7 +1,7 @@ -import numpy as np -import time import sys +import time +import numpy as np matrixab = np.loadtxt("matrix.txt") B = np.copy(matrixab[:, matrixab.shape[1] - 1]) From 7e66a8e41b6d76d7bc89202ce85aa5bad9908af0 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Fri, 27 Oct 2023 02:08:36 +0330 Subject: [PATCH 56/92] Update gaussian_elimination_pivoting.py --- .../src/gaussian_elimination_pivoting.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py index c2df4d5d7019..5c6c2a40e75f 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting.py @@ -14,13 +14,12 @@ def foo(matrix): numofcolumns = ab.shape[1] - 1 xlst = [] - """"Lead element search""" + """Lead element search""" print("Matrix before leading coefficient search: ") print(ab) print(" ") """Upper triangular matrix""" - for columnnum in range(numofrows): for i in range(columnnum, numofcolumns): if abs(ab[i][columnnum]) > abs(ab[columnnum][columnnum]): @@ -31,9 +30,8 @@ def foo(matrix): pass if columnnum != 0: for i in range(columnnum, numofrows): - ab[i, :] = ( - ab[i, :] - - ab[i, columnnum - 1] + ab[i, :] -= ( + ab[i, columnnum - 1] / ab[columnnum - 1, columnnum - 1] * ab[columnnum - 1, :] ) @@ -58,8 +56,8 @@ def foo(matrix): print("x vector: ") print(xlst) print(" ") - print("Start time: ", start, "End time: ", stop) - print("Elapsed time during the whole function in seconds:", stop - start) + print(f"Start time: {start}, End time: {stop}") + print(f"Elapsed time during the whole function in seconds: {stop - start}") return np.asarray(xlst) @@ -79,7 +77,7 @@ def foo(matrix): deltax = vectorofxalpha - vectorofxbeta print(" ") conda = abs(np.sum(deltax) / np.sum(vectorofxalpha)) * (np.sum(B) / np.sum(deltab)) -print("Cond(A) =< {:03f}".f(conda)) +print(f"Cond(A) =< {conda:0.6f}") # Example usage: From 28152871c7eec7a1a9e4c29cc85f9d11949a03e4 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Fri, 27 Oct 2023 02:13:50 +0330 Subject: [PATCH 57/92] Delete linear_algebra/src/gaussian_elimination_pivoting.py --- .../src/gaussian_elimination_pivoting.py | 93 ------------------- 1 file changed, 93 deletions(-) delete mode 100644 linear_algebra/src/gaussian_elimination_pivoting.py diff --git a/linear_algebra/src/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting.py deleted file mode 100644 index 5c6c2a40e75f..000000000000 --- a/linear_algebra/src/gaussian_elimination_pivoting.py +++ /dev/null @@ -1,93 +0,0 @@ -import sys -import time - -import numpy as np - -matrixab = np.loadtxt("matrix.txt") -B = np.copy(matrixab[:, matrixab.shape[1] - 1]) - - -def foo(matrix): - start = time.process_time() - ab = np.copy(matrix) - numofrows = ab.shape[0] - numofcolumns = ab.shape[1] - 1 - xlst = [] - - """Lead element search""" - print("Matrix before leading coefficient search: ") - print(ab) - print(" ") - - """Upper triangular matrix""" - for columnnum in range(numofrows): - for i in range(columnnum, numofcolumns): - if abs(ab[i][columnnum]) > abs(ab[columnnum][columnnum]): - ab[[columnnum, i]] = ab[[i, columnnum]] - if ab[columnnum, columnnum] == 0.0: - sys.exit("Matrix is not correct") - else: - pass - if columnnum != 0: - for i in range(columnnum, numofrows): - ab[i, :] -= ( - ab[i, columnnum - 1] - / ab[columnnum - 1, columnnum - 1] - * ab[columnnum - 1, :] - ) - - print("Upper triangular matrix: ") - print(ab.round(3)) - print(" ") - - """Find x vector""" - columnnum = numofrows - while columnnum != 0: - columnnum -= 1 - lineofx = ab[columnnum, numofrows] - if columnnum + 1 != numofrows: - for y in range(1, numofrows - columnnum): - lineofx += -ab[columnnum, numofrows - y] * xlst[y - 1] - x = lineofx / ab[columnnum, columnnum] - xlst.append(x) - - stop = time.process_time() - xlst.reverse() - print("x vector: ") - print(xlst) - print(" ") - print(f"Start time: {start}, End time: {stop}") - print(f"Elapsed time during the whole function in seconds: {stop - start}") - - return np.asarray(xlst) - - -vectorofxalpha = foo(matrixab) - -"""Cond(A)""" -modifiedb = np.copy(B) -modifiedb[np.argmax(abs(B))] = B[np.argmax(abs(B))] / 100 * 101 - -matrixab[:, matrixab.shape[1] - 1] = modifiedb -print() -print("Cond(A) check: ") -vectorofxbeta = foo(matrixab) - -deltab = modifiedb - B -deltax = vectorofxalpha - vectorofxbeta -print(" ") -conda = abs(np.sum(deltax) / np.sum(vectorofxalpha)) * (np.sum(B) / np.sum(deltab)) -print(f"Cond(A) =< {conda:0.6f}") - - -# Example usage: -# n_size = 3 -# a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) -# b_vector = np.array([10, 11, 12], dtype=float) - -# solution = custom_gauss_elimination_pivoting(a_matrix, b_vector, n_size) -# print("Solution:", solution) - - -# URL that points to Wikipedia or another similar explanation. -# >>>>>>URL:https://courses.engr.illinois.edu/cs357/su2013/lectures/lecture07.pdf<<<<<# From 6d648ec9140e25562718cd4d402053f8837d1490 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Fri, 27 Oct 2023 02:14:32 +0330 Subject: [PATCH 58/92] Add files via upload --- .../gaussian_elimination_pivoting.py | 93 + .../src/gaussian_elimination_pivoting/text.py | 2161 +++++++++++++++++ 2 files changed, 2254 insertions(+) create mode 100644 linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py create mode 100644 linear_algebra/src/gaussian_elimination_pivoting/text.py diff --git a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py new file mode 100644 index 000000000000..5c6c2a40e75f --- /dev/null +++ b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py @@ -0,0 +1,93 @@ +import sys +import time + +import numpy as np + +matrixab = np.loadtxt("matrix.txt") +B = np.copy(matrixab[:, matrixab.shape[1] - 1]) + + +def foo(matrix): + start = time.process_time() + ab = np.copy(matrix) + numofrows = ab.shape[0] + numofcolumns = ab.shape[1] - 1 + xlst = [] + + """Lead element search""" + print("Matrix before leading coefficient search: ") + print(ab) + print(" ") + + """Upper triangular matrix""" + for columnnum in range(numofrows): + for i in range(columnnum, numofcolumns): + if abs(ab[i][columnnum]) > abs(ab[columnnum][columnnum]): + ab[[columnnum, i]] = ab[[i, columnnum]] + if ab[columnnum, columnnum] == 0.0: + sys.exit("Matrix is not correct") + else: + pass + if columnnum != 0: + for i in range(columnnum, numofrows): + ab[i, :] -= ( + ab[i, columnnum - 1] + / ab[columnnum - 1, columnnum - 1] + * ab[columnnum - 1, :] + ) + + print("Upper triangular matrix: ") + print(ab.round(3)) + print(" ") + + """Find x vector""" + columnnum = numofrows + while columnnum != 0: + columnnum -= 1 + lineofx = ab[columnnum, numofrows] + if columnnum + 1 != numofrows: + for y in range(1, numofrows - columnnum): + lineofx += -ab[columnnum, numofrows - y] * xlst[y - 1] + x = lineofx / ab[columnnum, columnnum] + xlst.append(x) + + stop = time.process_time() + xlst.reverse() + print("x vector: ") + print(xlst) + print(" ") + print(f"Start time: {start}, End time: {stop}") + print(f"Elapsed time during the whole function in seconds: {stop - start}") + + return np.asarray(xlst) + + +vectorofxalpha = foo(matrixab) + +"""Cond(A)""" +modifiedb = np.copy(B) +modifiedb[np.argmax(abs(B))] = B[np.argmax(abs(B))] / 100 * 101 + +matrixab[:, matrixab.shape[1] - 1] = modifiedb +print() +print("Cond(A) check: ") +vectorofxbeta = foo(matrixab) + +deltab = modifiedb - B +deltax = vectorofxalpha - vectorofxbeta +print(" ") +conda = abs(np.sum(deltax) / np.sum(vectorofxalpha)) * (np.sum(B) / np.sum(deltab)) +print(f"Cond(A) =< {conda:0.6f}") + + +# Example usage: +# n_size = 3 +# a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) +# b_vector = np.array([10, 11, 12], dtype=float) + +# solution = custom_gauss_elimination_pivoting(a_matrix, b_vector, n_size) +# print("Solution:", solution) + + +# URL that points to Wikipedia or another similar explanation. +# >>>>>>URL:https://courses.engr.illinois.edu/cs357/su2013/lectures/lecture07.pdf<<<<<# diff --git a/linear_algebra/src/gaussian_elimination_pivoting/text.py b/linear_algebra/src/gaussian_elimination_pivoting/text.py new file mode 100644 index 000000000000..0160bfeaa539 --- /dev/null +++ b/linear_algebra/src/gaussian_elimination_pivoting/text.py @@ -0,0 +1,2161 @@ +# Authors: Olivier Grisel +# Mathieu Blondel +# Lars Buitinck +# Robert Layton +# Jochen Wersdörfer +# Roman Sinayev +# +# License: BSD 3 clause +""" +The :mod:`sklearn.feature_extraction.text` submodule gathers utilities to +build feature vectors from text documents. +""" + +import array +from collections import defaultdict +from collections.abc import Mapping +from functools import partial +from numbers import Integral, Real +from operator import itemgetter +import re +import unicodedata +import warnings + +import numpy as np +import scipy.sparse as sp + +from ..base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin +from ..preprocessing import normalize +from ._hash import FeatureHasher +from ._stop_words import ENGLISH_STOP_WORDS +from ..utils.validation import check_is_fitted, check_array, FLOAT_DTYPES +from ..utils import _IS_32BIT +from ..exceptions import NotFittedError +from ..utils._param_validation import StrOptions, Interval, HasMethods + + +__all__ = [ + "HashingVectorizer", + "CountVectorizer", + "ENGLISH_STOP_WORDS", + "TfidfTransformer", + "TfidfVectorizer", + "strip_accents_ascii", + "strip_accents_unicode", + "strip_tags", +] + + +def _preprocess(doc, accent_function=None, lower=False): + """Chain together an optional series of text preprocessing steps to + apply to a document. + + Parameters + ---------- + doc: str + The string to preprocess + accent_function: callable, default=None + Function for handling accented characters. Common strategies include + normalizing and removing. + lower: bool, default=False + Whether to use str.lower to lowercase all of the text + + Returns + ------- + doc: str + preprocessed string + """ + if lower: + doc = doc.lower() + if accent_function is not None: + doc = accent_function(doc) + return doc + + +def _analyze( + doc, + analyzer=None, + tokenizer=None, + ngrams=None, + preprocessor=None, + decoder=None, + stop_words=None, +): + """Chain together an optional series of text processing steps to go from + a single document to ngrams, with or without tokenizing or preprocessing. + + If analyzer is used, only the decoder argument is used, as the analyzer is + intended to replace the preprocessor, tokenizer, and ngrams steps. + + Parameters + ---------- + analyzer: callable, default=None + tokenizer: callable, default=None + ngrams: callable, default=None + preprocessor: callable, default=None + decoder: callable, default=None + stop_words: list, default=None + + Returns + ------- + ngrams: list + A sequence of tokens, possibly with pairs, triples, etc. + """ + + if decoder is not None: + doc = decoder(doc) + if analyzer is not None: + doc = analyzer(doc) + else: + if preprocessor is not None: + doc = preprocessor(doc) + if tokenizer is not None: + doc = tokenizer(doc) + if ngrams is not None: + if stop_words is not None: + doc = ngrams(doc, stop_words) + else: + doc = ngrams(doc) + return doc + + +def strip_accents_unicode(s): + """Transform accentuated unicode symbols into their simple counterpart. + + Warning: the python-level loop and join operations make this + implementation 20 times slower than the strip_accents_ascii basic + normalization. + + Parameters + ---------- + s : str + The string to strip. + + Returns + ------- + s : str + The stripped string. + + See Also + -------- + strip_accents_ascii : Remove accentuated char for any unicode symbol that + has a direct ASCII equivalent. + """ + try: + # If `s` is ASCII-compatible, then it does not contain any accented + # characters and we can avoid an expensive list comprehension + s.encode("ASCII", errors="strict") + return s + except UnicodeEncodeError: + normalized = unicodedata.normalize("NFKD", s) + return "".join([c for c in normalized if not unicodedata.combining(c)]) + + +def strip_accents_ascii(s): + """Transform accentuated unicode symbols into ascii or nothing. + + Warning: this solution is only suited for languages that have a direct + transliteration to ASCII symbols. + + Parameters + ---------- + s : str + The string to strip. + + Returns + ------- + s : str + The stripped string. + + See Also + -------- + strip_accents_unicode : Remove accentuated char for any unicode symbol. + """ + nkfd_form = unicodedata.normalize("NFKD", s) + return nkfd_form.encode("ASCII", "ignore").decode("ASCII") + + +def strip_tags(s): + """Basic regexp based HTML / XML tag stripper function. + + For serious HTML/XML preprocessing you should rather use an external + library such as lxml or BeautifulSoup. + + Parameters + ---------- + s : str + The string to strip. + + Returns + ------- + s : str + The stripped string. + """ + return re.compile(r"<([^>]+)>", flags=re.UNICODE).sub(" ", s) + + +def _check_stop_list(stop): + if stop == "english": + return ENGLISH_STOP_WORDS + elif isinstance(stop, str): + raise ValueError("not a built-in stop list: %s" % stop) + elif stop is None: + return None + else: # assume it's a collection + return frozenset(stop) + + +class _VectorizerMixin: + """Provides common code for text vectorizers (tokenization logic).""" + + _white_spaces = re.compile(r"\s\s+") + + def decode(self, doc): + """Decode the input into a string of unicode symbols. + + The decoding strategy depends on the vectorizer parameters. + + Parameters + ---------- + doc : bytes or str + The string to decode. + + Returns + ------- + doc: str + A string of unicode symbols. + """ + if self.input == "filename": + with open(doc, "rb") as fh: + doc = fh.read() + + elif self.input == "file": + doc = doc.read() + + if isinstance(doc, bytes): + doc = doc.decode(self.encoding, self.decode_error) + + if doc is np.nan: + raise ValueError( + "np.nan is an invalid document, expected byte or unicode string." + ) + + return doc + + def _word_ngrams(self, tokens, stop_words=None): + """Turn tokens into a sequence of n-grams after stop words filtering""" + # handle stop words + if stop_words is not None: + tokens = [w for w in tokens if w not in stop_words] + + # handle token n-grams + min_n, max_n = self.ngram_range + if max_n != 1: + original_tokens = tokens + if min_n == 1: + # no need to do any slicing for unigrams + # just iterate through the original tokens + tokens = list(original_tokens) + min_n += 1 + else: + tokens = [] + + n_original_tokens = len(original_tokens) + + # bind method outside of loop to reduce overhead + tokens_append = tokens.append + space_join = " ".join + + for n in range(min_n, min(max_n + 1, n_original_tokens + 1)): + for i in range(n_original_tokens - n + 1): + tokens_append(space_join(original_tokens[i : i + n])) + + return tokens + + def _char_ngrams(self, text_document): + """Tokenize text_document into a sequence of character n-grams""" + # normalize white spaces + text_document = self._white_spaces.sub(" ", text_document) + + text_len = len(text_document) + min_n, max_n = self.ngram_range + if min_n == 1: + # no need to do any slicing for unigrams + # iterate through the string + ngrams = list(text_document) + min_n += 1 + else: + ngrams = [] + + # bind method outside of loop to reduce overhead + ngrams_append = ngrams.append + + for n in range(min_n, min(max_n + 1, text_len + 1)): + for i in range(text_len - n + 1): + ngrams_append(text_document[i : i + n]) + return ngrams + + def _char_wb_ngrams(self, text_document): + """Whitespace sensitive char-n-gram tokenization. + + Tokenize text_document into a sequence of character n-grams + operating only inside word boundaries. n-grams at the edges + of words are padded with space.""" + # normalize white spaces + text_document = self._white_spaces.sub(" ", text_document) + + min_n, max_n = self.ngram_range + ngrams = [] + + # bind method outside of loop to reduce overhead + ngrams_append = ngrams.append + + for w in text_document.split(): + w = " " + w + " " + w_len = len(w) + for n in range(min_n, max_n + 1): + offset = 0 + ngrams_append(w[offset : offset + n]) + while offset + n < w_len: + offset += 1 + ngrams_append(w[offset : offset + n]) + if offset == 0: # count a short word (w_len < n) only once + break + return ngrams + + def build_preprocessor(self): + """Return a function to preprocess the text before tokenization. + + Returns + ------- + preprocessor: callable + A function to preprocess the text before tokenization. + """ + if self.preprocessor is not None: + return self.preprocessor + + # accent stripping + if not self.strip_accents: + strip_accents = None + elif callable(self.strip_accents): + strip_accents = self.strip_accents + elif self.strip_accents == "ascii": + strip_accents = strip_accents_ascii + elif self.strip_accents == "unicode": + strip_accents = strip_accents_unicode + else: + raise ValueError( + 'Invalid value for "strip_accents": %s' % self.strip_accents + ) + + return partial(_preprocess, accent_function=strip_accents, lower=self.lowercase) + + def build_tokenizer(self): + """Return a function that splits a string into a sequence of tokens. + + Returns + ------- + tokenizer: callable + A function to split a string into a sequence of tokens. + """ + if self.tokenizer is not None: + return self.tokenizer + token_pattern = re.compile(self.token_pattern) + + if token_pattern.groups > 1: + raise ValueError( + "More than 1 capturing group in token pattern. Only a single " + "group should be captured." + ) + + return token_pattern.findall + + def get_stop_words(self): + """Build or fetch the effective stop words list. + + Returns + ------- + stop_words: list or None + A list of stop words. + """ + return _check_stop_list(self.stop_words) + + def _check_stop_words_consistency(self, stop_words, preprocess, tokenize): + """Check if stop words are consistent + + Returns + ------- + is_consistent : True if stop words are consistent with the preprocessor + and tokenizer, False if they are not, None if the check + was previously performed, "error" if it could not be + performed (e.g. because of the use of a custom + preprocessor / tokenizer) + """ + if id(self.stop_words) == getattr(self, "_stop_words_id", None): + # Stop words are were previously validated + return None + + # NB: stop_words is validated, unlike self.stop_words + try: + inconsistent = set() + for w in stop_words or (): + tokens = list(tokenize(preprocess(w))) + for token in tokens: + if token not in stop_words: + inconsistent.add(token) + self._stop_words_id = id(self.stop_words) + + if inconsistent: + warnings.warn( + "Your stop_words may be inconsistent with " + "your preprocessing. Tokenizing the stop " + "words generated tokens %r not in " + "stop_words." + % sorted(inconsistent) + ) + return not inconsistent + except Exception: + # Failed to check stop words consistency (e.g. because a custom + # preprocessor or tokenizer was used) + self._stop_words_id = id(self.stop_words) + return "error" + + def build_analyzer(self): + """Return a callable to process input data. + + The callable handles preprocessing, tokenization, and n-grams generation. + + Returns + ------- + analyzer: callable + A function to handle preprocessing, tokenization + and n-grams generation. + """ + + if callable(self.analyzer): + return partial(_analyze, analyzer=self.analyzer, decoder=self.decode) + + preprocess = self.build_preprocessor() + + if self.analyzer == "char": + return partial( + _analyze, + ngrams=self._char_ngrams, + preprocessor=preprocess, + decoder=self.decode, + ) + + elif self.analyzer == "char_wb": + + return partial( + _analyze, + ngrams=self._char_wb_ngrams, + preprocessor=preprocess, + decoder=self.decode, + ) + + elif self.analyzer == "word": + stop_words = self.get_stop_words() + tokenize = self.build_tokenizer() + self._check_stop_words_consistency(stop_words, preprocess, tokenize) + return partial( + _analyze, + ngrams=self._word_ngrams, + tokenizer=tokenize, + preprocessor=preprocess, + decoder=self.decode, + stop_words=stop_words, + ) + + else: + raise ValueError( + "%s is not a valid tokenization scheme/analyzer" % self.analyzer + ) + + def _validate_vocabulary(self): + vocabulary = self.vocabulary + if vocabulary is not None: + if isinstance(vocabulary, set): + vocabulary = sorted(vocabulary) + if not isinstance(vocabulary, Mapping): + vocab = {} + for i, t in enumerate(vocabulary): + if vocab.setdefault(t, i) != i: + msg = "Duplicate term in vocabulary: %r" % t + raise ValueError(msg) + vocabulary = vocab + else: + indices = set(vocabulary.values()) + if len(indices) != len(vocabulary): + raise ValueError("Vocabulary contains repeated indices.") + for i in range(len(vocabulary)): + if i not in indices: + msg = "Vocabulary of size %d doesn't contain index %d." % ( + len(vocabulary), + i, + ) + raise ValueError(msg) + if not vocabulary: + raise ValueError("empty vocabulary passed to fit") + self.fixed_vocabulary_ = True + self.vocabulary_ = dict(vocabulary) + else: + self.fixed_vocabulary_ = False + + def _check_vocabulary(self): + """Check if vocabulary is empty or missing (not fitted)""" + if not hasattr(self, "vocabulary_"): + self._validate_vocabulary() + if not self.fixed_vocabulary_: + raise NotFittedError("Vocabulary not fitted or provided") + + if len(self.vocabulary_) == 0: + raise ValueError("Vocabulary is empty") + + def _validate_ngram_range(self): + """Check validity of ngram_range parameter""" + min_n, max_m = self.ngram_range + if min_n > max_m: + raise ValueError( + "Invalid value for ngram_range=%s " + "lower boundary larger than the upper boundary." + % str(self.ngram_range) + ) + + def _warn_for_unused_params(self): + + if self.tokenizer is not None and self.token_pattern is not None: + warnings.warn( + "The parameter 'token_pattern' will not be used" + " since 'tokenizer' is not None'" + ) + + if self.preprocessor is not None and callable(self.analyzer): + warnings.warn( + "The parameter 'preprocessor' will not be used" + " since 'analyzer' is callable'" + ) + + if ( + self.ngram_range != (1, 1) + and self.ngram_range is not None + and callable(self.analyzer) + ): + warnings.warn( + "The parameter 'ngram_range' will not be used" + " since 'analyzer' is callable'" + ) + if self.analyzer != "word" or callable(self.analyzer): + if self.stop_words is not None: + warnings.warn( + "The parameter 'stop_words' will not be used" + " since 'analyzer' != 'word'" + ) + if ( + self.token_pattern is not None + and self.token_pattern != r"(?u)\b\w\w+\b" + ): + warnings.warn( + "The parameter 'token_pattern' will not be used" + " since 'analyzer' != 'word'" + ) + if self.tokenizer is not None: + warnings.warn( + "The parameter 'tokenizer' will not be used" + " since 'analyzer' != 'word'" + ) + + +class HashingVectorizer( + TransformerMixin, _VectorizerMixin, BaseEstimator, auto_wrap_output_keys=None +): + r"""Convert a collection of text documents to a matrix of token occurrences. + + It turns a collection of text documents into a scipy.sparse matrix holding + token occurrence counts (or binary occurrence information), possibly + normalized as token frequencies if norm='l1' or projected on the euclidean + unit sphere if norm='l2'. + + This text vectorizer implementation uses the hashing trick to find the + token string name to feature integer index mapping. + + This strategy has several advantages: + + - it is very low memory scalable to large datasets as there is no need to + store a vocabulary dictionary in memory. + + - it is fast to pickle and un-pickle as it holds no state besides the + constructor parameters. + + - it can be used in a streaming (partial fit) or parallel pipeline as there + is no state computed during fit. + + There are also a couple of cons (vs using a CountVectorizer with an + in-memory vocabulary): + + - there is no way to compute the inverse transform (from feature indices to + string feature names) which can be a problem when trying to introspect + which features are most important to a model. + + - there can be collisions: distinct tokens can be mapped to the same + feature index. However in practice this is rarely an issue if n_features + is large enough (e.g. 2 ** 18 for text classification problems). + + - no IDF weighting as this would render the transformer stateful. + + The hash function employed is the signed 32-bit version of Murmurhash3. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + input : {'filename', 'file', 'content'}, default='content' + - If `'filename'`, the sequence passed as an argument to fit is + expected to be a list of filenames that need reading to fetch + the raw content to analyze. + + - If `'file'`, the sequence items must have a 'read' method (file-like + object) that is called to fetch the bytes in memory. + + - If `'content'`, the input is expected to be a sequence of items that + can be of type string or byte. + + encoding : str, default='utf-8' + If bytes or files are given to analyze, this encoding is used to + decode. + + decode_error : {'strict', 'ignore', 'replace'}, default='strict' + Instruction on what to do if a byte sequence is given to analyze that + contains characters not of the given `encoding`. By default, it is + 'strict', meaning that a UnicodeDecodeError will be raised. Other + values are 'ignore' and 'replace'. + + strip_accents : {'ascii', 'unicode'} or callable, default=None + Remove accents and perform other character normalization + during the preprocessing step. + 'ascii' is a fast method that only works on characters that have + a direct ASCII mapping. + 'unicode' is a slightly slower method that works on any character. + None (default) does nothing. + + Both 'ascii' and 'unicode' use NFKD normalization from + :func:`unicodedata.normalize`. + + lowercase : bool, default=True + Convert all characters to lowercase before tokenizing. + + preprocessor : callable, default=None + Override the preprocessing (string transformation) stage while + preserving the tokenizing and n-grams generation steps. + Only applies if ``analyzer`` is not callable. + + tokenizer : callable, default=None + Override the string tokenization step while preserving the + preprocessing and n-grams generation steps. + Only applies if ``analyzer == 'word'``. + + stop_words : {'english'}, list, default=None + If 'english', a built-in stop word list for English is used. + There are several known issues with 'english' and you should + consider an alternative (see :ref:`stop_words`). + + If a list, that list is assumed to contain stop words, all of which + will be removed from the resulting tokens. + Only applies if ``analyzer == 'word'``. + + token_pattern : str or None, default=r"(?u)\\b\\w\\w+\\b" + Regular expression denoting what constitutes a "token", only used + if ``analyzer == 'word'``. The default regexp selects tokens of 2 + or more alphanumeric characters (punctuation is completely ignored + and always treated as a token separator). + + If there is a capturing group in token_pattern then the + captured group content, not the entire match, becomes the token. + At most one capturing group is permitted. + + ngram_range : tuple (min_n, max_n), default=(1, 1) + The lower and upper boundary of the range of n-values for different + n-grams to be extracted. All values of n such that min_n <= n <= max_n + will be used. For example an ``ngram_range`` of ``(1, 1)`` means only + unigrams, ``(1, 2)`` means unigrams and bigrams, and ``(2, 2)`` means + only bigrams. + Only applies if ``analyzer`` is not callable. + + analyzer : {'word', 'char', 'char_wb'} or callable, default='word' + Whether the feature should be made of word or character n-grams. + Option 'char_wb' creates character n-grams only from text inside + word boundaries; n-grams at the edges of words are padded with space. + + If a callable is passed it is used to extract the sequence of features + out of the raw, unprocessed input. + + .. versionchanged:: 0.21 + Since v0.21, if ``input`` is ``'filename'`` or ``'file'``, the data + is first read from the file and then passed to the given callable + analyzer. + + n_features : int, default=(2 ** 20) + The number of features (columns) in the output matrices. Small numbers + of features are likely to cause hash collisions, but large numbers + will cause larger coefficient dimensions in linear learners. + + binary : bool, default=False + If True, all non zero counts are set to 1. This is useful for discrete + probabilistic models that model binary events rather than integer + counts. + + norm : {'l1', 'l2'}, default='l2' + Norm used to normalize term vectors. None for no normalization. + + alternate_sign : bool, default=True + When True, an alternating sign is added to the features as to + approximately conserve the inner product in the hashed space even for + small n_features. This approach is similar to sparse random projection. + + .. versionadded:: 0.19 + + dtype : type, default=np.float64 + Type of the matrix returned by fit_transform() or transform(). + + See Also + -------- + CountVectorizer : Convert a collection of text documents to a matrix of + token counts. + TfidfVectorizer : Convert a collection of raw documents to a matrix of + TF-IDF features. + + Notes + ----- + This estimator is :term:`stateless` and does not need to be fitted. + However, we recommend to call :meth:`fit_transform` instead of + :meth:`transform`, as parameter validation is only performed in + :meth:`fit`. + + Examples + -------- + >>> from sklearn.feature_extraction.text import HashingVectorizer + >>> corpus = [ + ... 'This is the first document.', + ... 'This document is the second document.', + ... 'And this is the third one.', + ... 'Is this the first document?', + ... ] + >>> vectorizer = HashingVectorizer(n_features=2**4) + >>> X = vectorizer.fit_transform(corpus) + >>> print(X.shape) + (4, 16) + """ + + _parameter_constraints: dict = { + "input": [StrOptions({"filename", "file", "content"})], + "encoding": [str], + "decode_error": [StrOptions({"strict", "ignore", "replace"})], + "strip_accents": [StrOptions({"ascii", "unicode"}), None, callable], + "lowercase": ["boolean"], + "preprocessor": [callable, None], + "tokenizer": [callable, None], + "stop_words": [StrOptions({"english"}), list, None], + "token_pattern": [str, None], + "ngram_range": [tuple], + "analyzer": [StrOptions({"word", "char", "char_wb"}), callable], + "n_features": [Interval(Integral, 1, np.iinfo(np.int32).max, closed="left")], + "binary": ["boolean"], + "norm": [StrOptions({"l1", "l2"}), None], + "alternate_sign": ["boolean"], + "dtype": "no_validation", # delegate to numpy + } + + def __init__( + self, + *, + input="content", + encoding="utf-8", + decode_error="strict", + strip_accents=None, + lowercase=True, + preprocessor=None, + tokenizer=None, + stop_words=None, + token_pattern=r"(?u)\b\w\w+\b", + ngram_range=(1, 1), + analyzer="word", + n_features=(2**20), + binary=False, + norm="l2", + alternate_sign=True, + dtype=np.float64, + ): + self.input = input + self.encoding = encoding + self.decode_error = decode_error + self.strip_accents = strip_accents + self.preprocessor = preprocessor + self.tokenizer = tokenizer + self.analyzer = analyzer + self.lowercase = lowercase + self.token_pattern = token_pattern + self.stop_words = stop_words + self.n_features = n_features + self.ngram_range = ngram_range + self.binary = binary + self.norm = norm + self.alternate_sign = alternate_sign + self.dtype = dtype + + def partial_fit(self, X, y=None): + """Only validates estimator's parameters. + + This method allows to: (i) validate the estimator's parameters and + (ii) be consistent with the scikit-learn transformer API. + + Parameters + ---------- + X : ndarray of shape [n_samples, n_features] + Training data. + + y : Ignored + Not used, present for API consistency by convention. + + Returns + ------- + self : object + HashingVectorizer instance. + """ + # TODO: only validate during the first call + self._validate_params() + return self + + def fit(self, X, y=None): + """Only validates estimator's parameters. + + This method allows to: (i) validate the estimator's parameters and + (ii) be consistent with the scikit-learn transformer API. + + Parameters + ---------- + X : ndarray of shape [n_samples, n_features] + Training data. + + y : Ignored + Not used, present for API consistency by convention. + + Returns + ------- + self : object + HashingVectorizer instance. + """ + self._validate_params() + + # triggers a parameter validation + if isinstance(X, str): + raise ValueError( + "Iterable over raw text documents expected, string object received." + ) + + self._warn_for_unused_params() + self._validate_ngram_range() + + self._get_hasher().fit(X, y=y) + return self + + def transform(self, X): + """Transform a sequence of documents to a document-term matrix. + + Parameters + ---------- + X : iterable over raw text documents, length = n_samples + Samples. Each sample must be a text document (either bytes or + unicode strings, file name or file object depending on the + constructor argument) which will be tokenized and hashed. + + Returns + ------- + X : sparse matrix of shape (n_samples, n_features) + Document-term matrix. + """ + if isinstance(X, str): + raise ValueError( + "Iterable over raw text documents expected, string object received." + ) + + self._validate_ngram_range() + + analyzer = self.build_analyzer() + X = self._get_hasher().transform(analyzer(doc) for doc in X) + if self.binary: + X.data.fill(1) + if self.norm is not None: + X = normalize(X, norm=self.norm, copy=False) + return X + + def fit_transform(self, X, y=None): + """Transform a sequence of documents to a document-term matrix. + + Parameters + ---------- + X : iterable over raw text documents, length = n_samples + Samples. Each sample must be a text document (either bytes or + unicode strings, file name or file object depending on the + constructor argument) which will be tokenized and hashed. + y : any + Ignored. This parameter exists only for compatibility with + sklearn.pipeline.Pipeline. + + Returns + ------- + X : sparse matrix of shape (n_samples, n_features) + Document-term matrix. + """ + return self.fit(X, y).transform(X) + + def _get_hasher(self): + return FeatureHasher( + n_features=self.n_features, + input_type="string", + dtype=self.dtype, + alternate_sign=self.alternate_sign, + ) + + def _more_tags(self): + return {"X_types": ["string"]} + + +def _document_frequency(X): + """Count the number of non-zero values for each feature in sparse X.""" + if sp.isspmatrix_csr(X): + return np.bincount(X.indices, minlength=X.shape[1]) + else: + return np.diff(X.indptr) + + +class CountVectorizer(_VectorizerMixin, BaseEstimator): + r"""Convert a collection of text documents to a matrix of token counts. + + This implementation produces a sparse representation of the counts using + scipy.sparse.csr_matrix. + + If you do not provide an a-priori dictionary and you do not use an analyzer + that does some kind of feature selection then the number of features will + be equal to the vocabulary size found by analyzing the data. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + input : {'filename', 'file', 'content'}, default='content' + - If `'filename'`, the sequence passed as an argument to fit is + expected to be a list of filenames that need reading to fetch + the raw content to analyze. + + - If `'file'`, the sequence items must have a 'read' method (file-like + object) that is called to fetch the bytes in memory. + + - If `'content'`, the input is expected to be a sequence of items that + can be of type string or byte. + + encoding : str, default='utf-8' + If bytes or files are given to analyze, this encoding is used to + decode. + + decode_error : {'strict', 'ignore', 'replace'}, default='strict' + Instruction on what to do if a byte sequence is given to analyze that + contains characters not of the given `encoding`. By default, it is + 'strict', meaning that a UnicodeDecodeError will be raised. Other + values are 'ignore' and 'replace'. + + strip_accents : {'ascii', 'unicode'} or callable, default=None + Remove accents and perform other character normalization + during the preprocessing step. + 'ascii' is a fast method that only works on characters that have + a direct ASCII mapping. + 'unicode' is a slightly slower method that works on any characters. + None (default) does nothing. + + Both 'ascii' and 'unicode' use NFKD normalization from + :func:`unicodedata.normalize`. + + lowercase : bool, default=True + Convert all characters to lowercase before tokenizing. + + preprocessor : callable, default=None + Override the preprocessing (strip_accents and lowercase) stage while + preserving the tokenizing and n-grams generation steps. + Only applies if ``analyzer`` is not callable. + + tokenizer : callable, default=None + Override the string tokenization step while preserving the + preprocessing and n-grams generation steps. + Only applies if ``analyzer == 'word'``. + + stop_words : {'english'}, list, default=None + If 'english', a built-in stop word list for English is used. + There are several known issues with 'english' and you should + consider an alternative (see :ref:`stop_words`). + + If a list, that list is assumed to contain stop words, all of which + will be removed from the resulting tokens. + Only applies if ``analyzer == 'word'``. + + If None, no stop words will be used. In this case, setting `max_df` + to a higher value, such as in the range (0.7, 1.0), can automatically detect + and filter stop words based on intra corpus document frequency of terms. + + token_pattern : str or None, default=r"(?u)\\b\\w\\w+\\b" + Regular expression denoting what constitutes a "token", only used + if ``analyzer == 'word'``. The default regexp select tokens of 2 + or more alphanumeric characters (punctuation is completely ignored + and always treated as a token separator). + + If there is a capturing group in token_pattern then the + captured group content, not the entire match, becomes the token. + At most one capturing group is permitted. + + ngram_range : tuple (min_n, max_n), default=(1, 1) + The lower and upper boundary of the range of n-values for different + word n-grams or char n-grams to be extracted. All values of n such + such that min_n <= n <= max_n will be used. For example an + ``ngram_range`` of ``(1, 1)`` means only unigrams, ``(1, 2)`` means + unigrams and bigrams, and ``(2, 2)`` means only bigrams. + Only applies if ``analyzer`` is not callable. + + analyzer : {'word', 'char', 'char_wb'} or callable, default='word' + Whether the feature should be made of word n-gram or character + n-grams. + Option 'char_wb' creates character n-grams only from text inside + word boundaries; n-grams at the edges of words are padded with space. + + If a callable is passed it is used to extract the sequence of features + out of the raw, unprocessed input. + + .. versionchanged:: 0.21 + + Since v0.21, if ``input`` is ``filename`` or ``file``, the data is + first read from the file and then passed to the given callable + analyzer. + + max_df : float in range [0.0, 1.0] or int, default=1.0 + When building the vocabulary ignore terms that have a document + frequency strictly higher than the given threshold (corpus-specific + stop words). + If float, the parameter represents a proportion of documents, integer + absolute counts. + This parameter is ignored if vocabulary is not None. + + min_df : float in range [0.0, 1.0] or int, default=1 + When building the vocabulary ignore terms that have a document + frequency strictly lower than the given threshold. This value is also + called cut-off in the literature. + If float, the parameter represents a proportion of documents, integer + absolute counts. + This parameter is ignored if vocabulary is not None. + + max_features : int, default=None + If not None, build a vocabulary that only consider the top + `max_features` ordered by term frequency across the corpus. + Otherwise, all features are used. + + This parameter is ignored if vocabulary is not None. + + vocabulary : Mapping or iterable, default=None + Either a Mapping (e.g., a dict) where keys are terms and values are + indices in the feature matrix, or an iterable over terms. If not + given, a vocabulary is determined from the input documents. Indices + in the mapping should not be repeated and should not have any gap + between 0 and the largest index. + + binary : bool, default=False + If True, all non zero counts are set to 1. This is useful for discrete + probabilistic models that model binary events rather than integer + counts. + + dtype : dtype, default=np.int64 + Type of the matrix returned by fit_transform() or transform(). + + Attributes + ---------- + vocabulary_ : dict + A mapping of terms to feature indices. + + fixed_vocabulary_ : bool + True if a fixed vocabulary of term to indices mapping + is provided by the user. + + stop_words_ : set + Terms that were ignored because they either: + + - occurred in too many documents (`max_df`) + - occurred in too few documents (`min_df`) + - were cut off by feature selection (`max_features`). + + This is only available if no vocabulary was given. + + See Also + -------- + HashingVectorizer : Convert a collection of text documents to a + matrix of token counts. + + TfidfVectorizer : Convert a collection of raw documents to a matrix + of TF-IDF features. + + Notes + ----- + The ``stop_words_`` attribute can get large and increase the model size + when pickling. This attribute is provided only for introspection and can + be safely removed using delattr or set to None before pickling. + + Examples + -------- + >>> from sklearn.feature_extraction.text import CountVectorizer + >>> corpus = [ + ... 'This is the first document.', + ... 'This document is the second document.', + ... 'And this is the third one.', + ... 'Is this the first document?', + ... ] + >>> vectorizer = CountVectorizer() + >>> X = vectorizer.fit_transform(corpus) + >>> vectorizer.get_feature_names_out() + array(['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third', + 'this'], ...) + >>> print(X.toarray()) + [[0 1 1 1 0 0 1 0 1] + [0 2 0 1 0 1 1 0 1] + [1 0 0 1 1 0 1 1 1] + [0 1 1 1 0 0 1 0 1]] + >>> vectorizer2 = CountVectorizer(analyzer='word', ngram_range=(2, 2)) + >>> X2 = vectorizer2.fit_transform(corpus) + >>> vectorizer2.get_feature_names_out() + array(['and this', 'document is', 'first document', 'is the', 'is this', + 'second document', 'the first', 'the second', 'the third', 'third one', + 'this document', 'this is', 'this the'], ...) + >>> print(X2.toarray()) + [[0 0 1 1 0 0 1 0 0 0 0 1 0] + [0 1 0 1 0 1 0 1 0 0 1 0 0] + [1 0 0 1 0 0 0 0 1 1 0 1 0] + [0 0 1 0 1 0 1 0 0 0 0 0 1]] + """ + + _parameter_constraints: dict = { + "input": [StrOptions({"filename", "file", "content"})], + "encoding": [str], + "decode_error": [StrOptions({"strict", "ignore", "replace"})], + "strip_accents": [StrOptions({"ascii", "unicode"}), None, callable], + "lowercase": ["boolean"], + "preprocessor": [callable, None], + "tokenizer": [callable, None], + "stop_words": [StrOptions({"english"}), list, None], + "token_pattern": [str, None], + "ngram_range": [tuple], + "analyzer": [StrOptions({"word", "char", "char_wb"}), callable], + "max_df": [ + Interval(Real, 0, 1, closed="both"), + Interval(Integral, 1, None, closed="left"), + ], + "min_df": [ + Interval(Real, 0, 1, closed="both"), + Interval(Integral, 1, None, closed="left"), + ], + "max_features": [Interval(Integral, 1, None, closed="left"), None], + "vocabulary": [Mapping, HasMethods("__iter__"), None], + "binary": ["boolean"], + "dtype": "no_validation", # delegate to numpy + } + + def __init__( + self, + *, + input="content", + encoding="utf-8", + decode_error="strict", + strip_accents=None, + lowercase=True, + preprocessor=None, + tokenizer=None, + stop_words=None, + token_pattern=r"(?u)\b\w\w+\b", + ngram_range=(1, 1), + analyzer="word", + max_df=1.0, + min_df=1, + max_features=None, + vocabulary=None, + binary=False, + dtype=np.int64, + ): + self.input = input + self.encoding = encoding + self.decode_error = decode_error + self.strip_accents = strip_accents + self.preprocessor = preprocessor + self.tokenizer = tokenizer + self.analyzer = analyzer + self.lowercase = lowercase + self.token_pattern = token_pattern + self.stop_words = stop_words + self.max_df = max_df + self.min_df = min_df + self.max_features = max_features + self.ngram_range = ngram_range + self.vocabulary = vocabulary + self.binary = binary + self.dtype = dtype + + def _sort_features(self, X, vocabulary): + """Sort features by name + + Returns a reordered matrix and modifies the vocabulary in place + """ + sorted_features = sorted(vocabulary.items()) + map_index = np.empty(len(sorted_features), dtype=X.indices.dtype) + for new_val, (term, old_val) in enumerate(sorted_features): + vocabulary[term] = new_val + map_index[old_val] = new_val + + X.indices = map_index.take(X.indices, mode="clip") + return X + + def _limit_features(self, X, vocabulary, high=None, low=None, limit=None): + """Remove too rare or too common features. + + Prune features that are non zero in more samples than high or less + documents than low, modifying the vocabulary, and restricting it to + at most the limit most frequent. + + This does not prune samples with zero features. + """ + if high is None and low is None and limit is None: + return X, set() + + # Calculate a mask based on document frequencies + dfs = _document_frequency(X) + mask = np.ones(len(dfs), dtype=bool) + if high is not None: + mask &= dfs <= high + if low is not None: + mask &= dfs >= low + if limit is not None and mask.sum() > limit: + tfs = np.asarray(X.sum(axis=0)).ravel() + mask_inds = (-tfs[mask]).argsort()[:limit] + new_mask = np.zeros(len(dfs), dtype=bool) + new_mask[np.where(mask)[0][mask_inds]] = True + mask = new_mask + + new_indices = np.cumsum(mask) - 1 # maps old indices to new + removed_terms = set() + for term, old_index in list(vocabulary.items()): + if mask[old_index]: + vocabulary[term] = new_indices[old_index] + else: + del vocabulary[term] + removed_terms.add(term) + kept_indices = np.where(mask)[0] + if len(kept_indices) == 0: + raise ValueError( + "After pruning, no terms remain. Try a lower min_df or a higher max_df." + ) + return X[:, kept_indices], removed_terms + + def _count_vocab(self, raw_documents, fixed_vocab): + """Create sparse feature matrix, and vocabulary where fixed_vocab=False""" + if fixed_vocab: + vocabulary = self.vocabulary_ + else: + # Add a new value when a new vocabulary item is seen + vocabulary = defaultdict() + vocabulary.default_factory = vocabulary.__len__ + + analyze = self.build_analyzer() + j_indices = [] + indptr = [] + + values = _make_int_array() + indptr.append(0) + for doc in raw_documents: + feature_counter = {} + for feature in analyze(doc): + try: + feature_idx = vocabulary[feature] + if feature_idx not in feature_counter: + feature_counter[feature_idx] = 1 + else: + feature_counter[feature_idx] += 1 + except KeyError: + # Ignore out-of-vocabulary items for fixed_vocab=True + continue + + j_indices.extend(feature_counter.keys()) + values.extend(feature_counter.values()) + indptr.append(len(j_indices)) + + if not fixed_vocab: + # disable defaultdict behaviour + vocabulary = dict(vocabulary) + if not vocabulary: + raise ValueError( + "empty vocabulary; perhaps the documents only contain stop words" + ) + + if indptr[-1] > np.iinfo(np.int32).max: # = 2**31 - 1 + if _IS_32BIT: + raise ValueError( + ( + "sparse CSR array has {} non-zero " + "elements and requires 64 bit indexing, " + "which is unsupported with 32 bit Python." + ).format(indptr[-1]) + ) + indices_dtype = np.int64 + + else: + indices_dtype = np.int32 + j_indices = np.asarray(j_indices, dtype=indices_dtype) + indptr = np.asarray(indptr, dtype=indices_dtype) + values = np.frombuffer(values, dtype=np.intc) + + X = sp.csr_matrix( + (values, j_indices, indptr), + shape=(len(indptr) - 1, len(vocabulary)), + dtype=self.dtype, + ) + X.sort_indices() + return vocabulary, X + + def fit(self, raw_documents, y=None): + """Learn a vocabulary dictionary of all tokens in the raw documents. + + Parameters + ---------- + raw_documents : iterable + An iterable which generates either str, unicode or file objects. + + y : None + This parameter is ignored. + + Returns + ------- + self : object + Fitted vectorizer. + """ + self.fit_transform(raw_documents) + return self + + def fit_transform(self, raw_documents, y=None): + """Learn the vocabulary dictionary and return document-term matrix. + + This is equivalent to fit followed by transform, but more efficiently + implemented. + + Parameters + ---------- + raw_documents : iterable + An iterable which generates either str, unicode or file objects. + + y : None + This parameter is ignored. + + Returns + ------- + X : array of shape (n_samples, n_features) + Document-term matrix. + """ + # We intentionally don't call the transform method to make + # fit_transform overridable without unwanted side effects in + # TfidfVectorizer. + if isinstance(raw_documents, str): + raise ValueError( + "Iterable over raw text documents expected, string object received." + ) + + self._validate_params() + self._validate_ngram_range() + self._warn_for_unused_params() + self._validate_vocabulary() + max_df = self.max_df + min_df = self.min_df + max_features = self.max_features + + if self.fixed_vocabulary_ and self.lowercase: + for term in self.vocabulary: + if any(map(str.isupper, term)): + warnings.warn( + "Upper case characters found in" + " vocabulary while 'lowercase'" + " is True. These entries will not" + " be matched with any documents" + ) + break + + vocabulary, X = self._count_vocab(raw_documents, self.fixed_vocabulary_) + + if self.binary: + X.data.fill(1) + + if not self.fixed_vocabulary_: + n_doc = X.shape[0] + max_doc_count = max_df if isinstance(max_df, Integral) else max_df * n_doc + min_doc_count = min_df if isinstance(min_df, Integral) else min_df * n_doc + if max_doc_count < min_doc_count: + raise ValueError("max_df corresponds to < documents than min_df") + if max_features is not None: + X = self._sort_features(X, vocabulary) + X, self.stop_words_ = self._limit_features( + X, vocabulary, max_doc_count, min_doc_count, max_features + ) + if max_features is None: + X = self._sort_features(X, vocabulary) + self.vocabulary_ = vocabulary + + return X + + def transform(self, raw_documents): + """Transform documents to document-term matrix. + + Extract token counts out of raw text documents using the vocabulary + fitted with fit or the one provided to the constructor. + + Parameters + ---------- + raw_documents : iterable + An iterable which generates either str, unicode or file objects. + + Returns + ------- + X : sparse matrix of shape (n_samples, n_features) + Document-term matrix. + """ + if isinstance(raw_documents, str): + raise ValueError( + "Iterable over raw text documents expected, string object received." + ) + self._check_vocabulary() + + # use the same matrix-building strategy as fit_transform + _, X = self._count_vocab(raw_documents, fixed_vocab=True) + if self.binary: + X.data.fill(1) + return X + + def inverse_transform(self, X): + """Return terms per document with nonzero entries in X. + + Parameters + ---------- + X : {array-like, sparse matrix} of shape (n_samples, n_features) + Document-term matrix. + + Returns + ------- + X_inv : list of arrays of shape (n_samples,) + List of arrays of terms. + """ + self._check_vocabulary() + # We need CSR format for fast row manipulations. + X = check_array(X, accept_sparse="csr") + n_samples = X.shape[0] + + terms = np.array(list(self.vocabulary_.keys())) + indices = np.array(list(self.vocabulary_.values())) + inverse_vocabulary = terms[np.argsort(indices)] + + if sp.issparse(X): + return [ + inverse_vocabulary[X[i, :].nonzero()[1]].ravel() + for i in range(n_samples) + ] + else: + return [ + inverse_vocabulary[np.flatnonzero(X[i, :])].ravel() + for i in range(n_samples) + ] + + def get_feature_names_out(self, input_features=None): + """Get output feature names for transformation. + + Parameters + ---------- + input_features : array-like of str or None, default=None + Not used, present here for API consistency by convention. + + Returns + ------- + feature_names_out : ndarray of str objects + Transformed feature names. + """ + self._check_vocabulary() + return np.asarray( + [t for t, i in sorted(self.vocabulary_.items(), key=itemgetter(1))], + dtype=object, + ) + + def _more_tags(self): + return {"X_types": ["string"]} + + +def _make_int_array(): + """Construct an array.array of a type suitable for scipy.sparse indices.""" + return array.array(str("i")) + + +class TfidfTransformer( + OneToOneFeatureMixin, TransformerMixin, BaseEstimator, auto_wrap_output_keys=None +): + """Transform a count matrix to a normalized tf or tf-idf representation. + + Tf means term-frequency while tf-idf means term-frequency times inverse + document-frequency. This is a common term weighting scheme in information + retrieval, that has also found good use in document classification. + + The goal of using tf-idf instead of the raw frequencies of occurrence of a + token in a given document is to scale down the impact of tokens that occur + very frequently in a given corpus and that are hence empirically less + informative than features that occur in a small fraction of the training + corpus. + + The formula that is used to compute the tf-idf for a term t of a document d + in a document set is tf-idf(t, d) = tf(t, d) * idf(t), and the idf is + computed as idf(t) = log [ n / df(t) ] + 1 (if ``smooth_idf=False``), where + n is the total number of documents in the document set and df(t) is the + document frequency of t; the document frequency is the number of documents + in the document set that contain the term t. The effect of adding "1" to + the idf in the equation above is that terms with zero idf, i.e., terms + that occur in all documents in a training set, will not be entirely + ignored. + (Note that the idf formula above differs from the standard textbook + notation that defines the idf as + idf(t) = log [ n / (df(t) + 1) ]). + + If ``smooth_idf=True`` (the default), the constant "1" is added to the + numerator and denominator of the idf as if an extra document was seen + containing every term in the collection exactly once, which prevents + zero divisions: idf(t) = log [ (1 + n) / (1 + df(t)) ] + 1. + + Furthermore, the formulas used to compute tf and idf depend + on parameter settings that correspond to the SMART notation used in IR + as follows: + + Tf is "n" (natural) by default, "l" (logarithmic) when + ``sublinear_tf=True``. + Idf is "t" when use_idf is given, "n" (none) otherwise. + Normalization is "c" (cosine) when ``norm='l2'``, "n" (none) + when ``norm=None``. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + norm : {'l1', 'l2'} or None, default='l2' + Each output row will have unit norm, either: + + - 'l2': Sum of squares of vector elements is 1. The cosine + similarity between two vectors is their dot product when l2 norm has + been applied. + - 'l1': Sum of absolute values of vector elements is 1. + See :func:`preprocessing.normalize`. + - None: No normalization. + + use_idf : bool, default=True + Enable inverse-document-frequency reweighting. If False, idf(t) = 1. + + smooth_idf : bool, default=True + Smooth idf weights by adding one to document frequencies, as if an + extra document was seen containing every term in the collection + exactly once. Prevents zero divisions. + + sublinear_tf : bool, default=False + Apply sublinear tf scaling, i.e. replace tf with 1 + log(tf). + + Attributes + ---------- + idf_ : array of shape (n_features) + The inverse document frequency (IDF) vector; only defined + if ``use_idf`` is True. + + .. versionadded:: 0.20 + + n_features_in_ : int + Number of features seen during :term:`fit`. + + .. versionadded:: 1.0 + + feature_names_in_ : ndarray of shape (`n_features_in_`,) + Names of features seen during :term:`fit`. Defined only when `X` + has feature names that are all strings. + + .. versionadded:: 1.0 + + See Also + -------- + CountVectorizer : Transforms text into a sparse matrix of n-gram counts. + + TfidfVectorizer : Convert a collection of raw documents to a matrix of + TF-IDF features. + + HashingVectorizer : Convert a collection of text documents to a matrix + of token occurrences. + + References + ---------- + .. [Yates2011] R. Baeza-Yates and B. Ribeiro-Neto (2011). Modern + Information Retrieval. Addison Wesley, pp. 68-74. + + .. [MRS2008] C.D. Manning, P. Raghavan and H. Schütze (2008). + Introduction to Information Retrieval. Cambridge University + Press, pp. 118-120. + + Examples + -------- + >>> from sklearn.feature_extraction.text import TfidfTransformer + >>> from sklearn.feature_extraction.text import CountVectorizer + >>> from sklearn.pipeline import Pipeline + >>> corpus = ['this is the first document', + ... 'this document is the second document', + ... 'and this is the third one', + ... 'is this the first document'] + >>> vocabulary = ['this', 'document', 'first', 'is', 'second', 'the', + ... 'and', 'one'] + >>> pipe = Pipeline([('count', CountVectorizer(vocabulary=vocabulary)), + ... ('tfid', TfidfTransformer())]).fit(corpus) + >>> pipe['count'].transform(corpus).toarray() + array([[1, 1, 1, 1, 0, 1, 0, 0], + [1, 2, 0, 1, 1, 1, 0, 0], + [1, 0, 0, 1, 0, 1, 1, 1], + [1, 1, 1, 1, 0, 1, 0, 0]]) + >>> pipe['tfid'].idf_ + array([1. , 1.22314355, 1.51082562, 1. , 1.91629073, + 1. , 1.91629073, 1.91629073]) + >>> pipe.transform(corpus).shape + (4, 8) + """ + + _parameter_constraints: dict = { + "norm": [StrOptions({"l1", "l2"}), None], + "use_idf": ["boolean"], + "smooth_idf": ["boolean"], + "sublinear_tf": ["boolean"], + } + + def __init__(self, *, norm="l2", use_idf=True, smooth_idf=True, sublinear_tf=False): + self.norm = norm + self.use_idf = use_idf + self.smooth_idf = smooth_idf + self.sublinear_tf = sublinear_tf + + def fit(self, X, y=None): + """Learn the idf vector (global term weights). + + Parameters + ---------- + X : sparse matrix of shape n_samples, n_features) + A matrix of term/token counts. + + y : None + This parameter is not needed to compute tf-idf. + + Returns + ------- + self : object + Fitted transformer. + """ + self._validate_params() + + # large sparse data is not supported for 32bit platforms because + # _document_frequency uses np.bincount which works on arrays of + # dtype NPY_INTP which is int32 for 32bit platforms. See #20923 + X = self._validate_data( + X, accept_sparse=("csr", "csc"), accept_large_sparse=not _IS_32BIT + ) + if not sp.issparse(X): + X = sp.csr_matrix(X) + dtype = X.dtype if X.dtype in FLOAT_DTYPES else np.float64 + + if self.use_idf: + n_samples, n_features = X.shape + df = _document_frequency(X) + df = df.astype(dtype, copy=False) + + # perform idf smoothing if required + df += int(self.smooth_idf) + n_samples += int(self.smooth_idf) + + # log+1 instead of log makes sure terms with zero idf don't get + # suppressed entirely. + idf = np.log(n_samples / df) + 1 + self._idf_diag = sp.diags( + idf, + offsets=0, + shape=(n_features, n_features), + format="csr", + dtype=dtype, + ) + + return self + + def transform(self, X, copy=True): + """Transform a count matrix to a tf or tf-idf representation. + + Parameters + ---------- + X : sparse matrix of (n_samples, n_features) + A matrix of term/token counts. + + copy : bool, default=True + Whether to copy X and operate on the copy or perform in-place + operations. + + Returns + ------- + vectors : sparse matrix of shape (n_samples, n_features) + Tf-idf-weighted document-term matrix. + """ + X = self._validate_data( + X, accept_sparse="csr", dtype=FLOAT_DTYPES, copy=copy, reset=False + ) + if not sp.issparse(X): + X = sp.csr_matrix(X, dtype=np.float64) + + if self.sublinear_tf: + np.log(X.data, X.data) + X.data += 1 + + if self.use_idf: + # idf_ being a property, the automatic attributes detection + # does not work as usual and we need to specify the attribute + # name: + check_is_fitted(self, attributes=["idf_"], msg="idf vector is not fitted") + + # *= doesn't work + X = X * self._idf_diag + + if self.norm is not None: + X = normalize(X, norm=self.norm, copy=False) + + return X + + @property + def idf_(self): + """Inverse document frequency vector, only defined if `use_idf=True`. + + Returns + ------- + ndarray of shape (n_features,) + """ + # if _idf_diag is not set, this will raise an attribute error, + # which means hasattr(self, "idf_") is False + return np.ravel(self._idf_diag.sum(axis=0)) + + @idf_.setter + def idf_(self, value): + value = np.asarray(value, dtype=np.float64) + n_features = value.shape[0] + self._idf_diag = sp.spdiags( + value, diags=0, m=n_features, n=n_features, format="csr" + ) + + def _more_tags(self): + return {"X_types": ["2darray", "sparse"]} + + +class TfidfVectorizer(CountVectorizer): + r"""Convert a collection of raw documents to a matrix of TF-IDF features. + + Equivalent to :class:`CountVectorizer` followed by + :class:`TfidfTransformer`. + + Read more in the :ref:`User Guide `. + + Parameters + ---------- + input : {'filename', 'file', 'content'}, default='content' + - If `'filename'`, the sequence passed as an argument to fit is + expected to be a list of filenames that need reading to fetch + the raw content to analyze. + + - If `'file'`, the sequence items must have a 'read' method (file-like + object) that is called to fetch the bytes in memory. + + - If `'content'`, the input is expected to be a sequence of items that + can be of type string or byte. + + encoding : str, default='utf-8' + If bytes or files are given to analyze, this encoding is used to + decode. + + decode_error : {'strict', 'ignore', 'replace'}, default='strict' + Instruction on what to do if a byte sequence is given to analyze that + contains characters not of the given `encoding`. By default, it is + 'strict', meaning that a UnicodeDecodeError will be raised. Other + values are 'ignore' and 'replace'. + + strip_accents : {'ascii', 'unicode'} or callable, default=None + Remove accents and perform other character normalization + during the preprocessing step. + 'ascii' is a fast method that only works on characters that have + a direct ASCII mapping. + 'unicode' is a slightly slower method that works on any characters. + None (default) does nothing. + + Both 'ascii' and 'unicode' use NFKD normalization from + :func:`unicodedata.normalize`. + + lowercase : bool, default=True + Convert all characters to lowercase before tokenizing. + + preprocessor : callable, default=None + Override the preprocessing (string transformation) stage while + preserving the tokenizing and n-grams generation steps. + Only applies if ``analyzer`` is not callable. + + tokenizer : callable, default=None + Override the string tokenization step while preserving the + preprocessing and n-grams generation steps. + Only applies if ``analyzer == 'word'``. + + analyzer : {'word', 'char', 'char_wb'} or callable, default='word' + Whether the feature should be made of word or character n-grams. + Option 'char_wb' creates character n-grams only from text inside + word boundaries; n-grams at the edges of words are padded with space. + + If a callable is passed it is used to extract the sequence of features + out of the raw, unprocessed input. + + .. versionchanged:: 0.21 + Since v0.21, if ``input`` is ``'filename'`` or ``'file'``, the data + is first read from the file and then passed to the given callable + analyzer. + + stop_words : {'english'}, list, default=None + If a string, it is passed to _check_stop_list and the appropriate stop + list is returned. 'english' is currently the only supported string + value. + There are several known issues with 'english' and you should + consider an alternative (see :ref:`stop_words`). + + If a list, that list is assumed to contain stop words, all of which + will be removed from the resulting tokens. + Only applies if ``analyzer == 'word'``. + + If None, no stop words will be used. In this case, setting `max_df` + to a higher value, such as in the range (0.7, 1.0), can automatically detect + and filter stop words based on intra corpus document frequency of terms. + + token_pattern : str, default=r"(?u)\\b\\w\\w+\\b" + Regular expression denoting what constitutes a "token", only used + if ``analyzer == 'word'``. The default regexp selects tokens of 2 + or more alphanumeric characters (punctuation is completely ignored + and always treated as a token separator). + + If there is a capturing group in token_pattern then the + captured group content, not the entire match, becomes the token. + At most one capturing group is permitted. + + ngram_range : tuple (min_n, max_n), default=(1, 1) + The lower and upper boundary of the range of n-values for different + n-grams to be extracted. All values of n such that min_n <= n <= max_n + will be used. For example an ``ngram_range`` of ``(1, 1)`` means only + unigrams, ``(1, 2)`` means unigrams and bigrams, and ``(2, 2)`` means + only bigrams. + Only applies if ``analyzer`` is not callable. + + max_df : float or int, default=1.0 + When building the vocabulary ignore terms that have a document + frequency strictly higher than the given threshold (corpus-specific + stop words). + If float in range [0.0, 1.0], the parameter represents a proportion of + documents, integer absolute counts. + This parameter is ignored if vocabulary is not None. + + min_df : float or int, default=1 + When building the vocabulary ignore terms that have a document + frequency strictly lower than the given threshold. This value is also + called cut-off in the literature. + If float in range of [0.0, 1.0], the parameter represents a proportion + of documents, integer absolute counts. + This parameter is ignored if vocabulary is not None. + + max_features : int, default=None + If not None, build a vocabulary that only consider the top + `max_features` ordered by term frequency across the corpus. + Otherwise, all features are used. + + This parameter is ignored if vocabulary is not None. + + vocabulary : Mapping or iterable, default=None + Either a Mapping (e.g., a dict) where keys are terms and values are + indices in the feature matrix, or an iterable over terms. If not + given, a vocabulary is determined from the input documents. + + binary : bool, default=False + If True, all non-zero term counts are set to 1. This does not mean + outputs will have only 0/1 values, only that the tf term in tf-idf + is binary. (Set idf and normalization to False to get 0/1 outputs). + + dtype : dtype, default=float64 + Type of the matrix returned by fit_transform() or transform(). + + norm : {'l1', 'l2'} or None, default='l2' + Each output row will have unit norm, either: + + - 'l2': Sum of squares of vector elements is 1. The cosine + similarity between two vectors is their dot product when l2 norm has + been applied. + - 'l1': Sum of absolute values of vector elements is 1. + See :func:`preprocessing.normalize`. + - None: No normalization. + + use_idf : bool, default=True + Enable inverse-document-frequency reweighting. If False, idf(t) = 1. + + smooth_idf : bool, default=True + Smooth idf weights by adding one to document frequencies, as if an + extra document was seen containing every term in the collection + exactly once. Prevents zero divisions. + + sublinear_tf : bool, default=False + Apply sublinear tf scaling, i.e. replace tf with 1 + log(tf). + + Attributes + ---------- + vocabulary_ : dict + A mapping of terms to feature indices. + + fixed_vocabulary_ : bool + True if a fixed vocabulary of term to indices mapping + is provided by the user. + + idf_ : array of shape (n_features,) + The inverse document frequency (IDF) vector; only defined + if ``use_idf`` is True. + + stop_words_ : set + Terms that were ignored because they either: + + - occurred in too many documents (`max_df`) + - occurred in too few documents (`min_df`) + - were cut off by feature selection (`max_features`). + + This is only available if no vocabulary was given. + + See Also + -------- + CountVectorizer : Transforms text into a sparse matrix of n-gram counts. + + TfidfTransformer : Performs the TF-IDF transformation from a provided + matrix of counts. + + Notes + ----- + The ``stop_words_`` attribute can get large and increase the model size + when pickling. This attribute is provided only for introspection and can + be safely removed using delattr or set to None before pickling. + + Examples + -------- + >>> from sklearn.feature_extraction.text import TfidfVectorizer + >>> corpus = [ + ... 'This is the first document.', + ... 'This document is the second document.', + ... 'And this is the third one.', + ... 'Is this the first document?', + ... ] + >>> vectorizer = TfidfVectorizer() + >>> X = vectorizer.fit_transform(corpus) + >>> vectorizer.get_feature_names_out() + array(['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third', + 'this'], ...) + >>> print(X.shape) + (4, 9) + """ + + _parameter_constraints: dict = {**CountVectorizer._parameter_constraints} + _parameter_constraints.update( + { + "norm": [StrOptions({"l1", "l2"}), None], + "use_idf": ["boolean"], + "smooth_idf": ["boolean"], + "sublinear_tf": ["boolean"], + } + ) + + def __init__( + self, + *, + input="content", + encoding="utf-8", + decode_error="strict", + strip_accents=None, + lowercase=True, + preprocessor=None, + tokenizer=None, + analyzer="word", + stop_words=None, + token_pattern=r"(?u)\b\w\w+\b", + ngram_range=(1, 1), + max_df=1.0, + min_df=1, + max_features=None, + vocabulary=None, + binary=False, + dtype=np.float64, + norm="l2", + use_idf=True, + smooth_idf=True, + sublinear_tf=False, + ): + + super().__init__( + input=input, + encoding=encoding, + decode_error=decode_error, + strip_accents=strip_accents, + lowercase=lowercase, + preprocessor=preprocessor, + tokenizer=tokenizer, + analyzer=analyzer, + stop_words=stop_words, + token_pattern=token_pattern, + ngram_range=ngram_range, + max_df=max_df, + min_df=min_df, + max_features=max_features, + vocabulary=vocabulary, + binary=binary, + dtype=dtype, + ) + self.norm = norm + self.use_idf = use_idf + self.smooth_idf = smooth_idf + self.sublinear_tf = sublinear_tf + + # Broadcast the TF-IDF parameters to the underlying transformer instance + # for easy grid search and repr + + @property + def idf_(self): + """Inverse document frequency vector, only defined if `use_idf=True`. + + Returns + ------- + ndarray of shape (n_features,) + """ + if not hasattr(self, "_tfidf"): + raise NotFittedError( + f"{self.__class__.__name__} is not fitted yet. Call 'fit' with " + "appropriate arguments before using this attribute." + ) + return self._tfidf.idf_ + + @idf_.setter + def idf_(self, value): + if not self.use_idf: + raise ValueError("`idf_` cannot be set when `user_idf=False`.") + if not hasattr(self, "_tfidf"): + # We should support transferring `idf_` from another `TfidfTransformer` + # and therefore, we need to create the transformer instance it does not + # exist yet. + self._tfidf = TfidfTransformer( + norm=self.norm, + use_idf=self.use_idf, + smooth_idf=self.smooth_idf, + sublinear_tf=self.sublinear_tf, + ) + self._validate_vocabulary() + if hasattr(self, "vocabulary_"): + if len(self.vocabulary_) != len(value): + raise ValueError( + "idf length = %d must be equal to vocabulary size = %d" + % (len(value), len(self.vocabulary)) + ) + self._tfidf.idf_ = value + + def _check_params(self): + if self.dtype not in FLOAT_DTYPES: + warnings.warn( + "Only {} 'dtype' should be used. {} 'dtype' will " + "be converted to np.float64.".format(FLOAT_DTYPES, self.dtype), + UserWarning, + ) + + def fit(self, raw_documents, y=None): + """Learn vocabulary and idf from training set. + + Parameters + ---------- + raw_documents : iterable + An iterable which generates either str, unicode or file objects. + + y : None + This parameter is not needed to compute tfidf. + + Returns + ------- + self : object + Fitted vectorizer. + """ + self._validate_params() + self._check_params() + self._warn_for_unused_params() + self._tfidf = TfidfTransformer( + norm=self.norm, + use_idf=self.use_idf, + smooth_idf=self.smooth_idf, + sublinear_tf=self.sublinear_tf, + ) + X = super().fit_transform(raw_documents) + self._tfidf.fit(X) + return self + + def fit_transform(self, raw_documents, y=None): + """Learn vocabulary and idf, return document-term matrix. + + This is equivalent to fit followed by transform, but more efficiently + implemented. + + Parameters + ---------- + raw_documents : iterable + An iterable which generates either str, unicode or file objects. + + y : None + This parameter is ignored. + + Returns + ------- + X : sparse matrix of (n_samples, n_features) + Tf-idf-weighted document-term matrix. + """ + self._check_params() + self._tfidf = TfidfTransformer( + norm=self.norm, + use_idf=self.use_idf, + smooth_idf=self.smooth_idf, + sublinear_tf=self.sublinear_tf, + ) + X = super().fit_transform(raw_documents) + self._tfidf.fit(X) + # X is already a transformed view of raw_documents so + # we set copy to False + return self._tfidf.transform(X, copy=False) + + def transform(self, raw_documents): + """Transform documents to document-term matrix. + + Uses the vocabulary and document frequencies (df) learned by fit (or + fit_transform). + + Parameters + ---------- + raw_documents : iterable + An iterable which generates either str, unicode or file objects. + + Returns + ------- + X : sparse matrix of (n_samples, n_features) + Tf-idf-weighted document-term matrix. + """ + check_is_fitted(self, msg="The TF-IDF vectorizer is not fitted") + + X = super().transform(raw_documents) + return self._tfidf.transform(X, copy=False) + + def _more_tags(self): + return {"X_types": ["string"], "_skip_test": True} From 9f6326e4e40f24545bbaab070b9870349596a407 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 26 Oct 2023 22:45:31 +0000 Subject: [PATCH 59/92] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../src/gaussian_elimination_pivoting/text.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting/text.py b/linear_algebra/src/gaussian_elimination_pivoting/text.py index 0160bfeaa539..766ed785f616 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting/text.py +++ b/linear_algebra/src/gaussian_elimination_pivoting/text.py @@ -410,8 +410,7 @@ def _check_stop_words_consistency(self, stop_words, preprocess, tokenize): "Your stop_words may be inconsistent with " "your preprocessing. Tokenizing the stop " "words generated tokens %r not in " - "stop_words." - % sorted(inconsistent) + "stop_words." % sorted(inconsistent) ) return not inconsistent except Exception: @@ -446,7 +445,6 @@ def build_analyzer(self): ) elif self.analyzer == "char_wb": - return partial( _analyze, ngrams=self._char_wb_ngrams, @@ -473,8 +471,7 @@ def build_analyzer(self): ) def _validate_vocabulary(self): - vocabulary = self.vocabulary - if vocabulary is not None: + if (vocabulary := self.vocabulary) is not None: if isinstance(vocabulary, set): vocabulary = sorted(vocabulary) if not isinstance(vocabulary, Mapping): @@ -518,12 +515,10 @@ def _validate_ngram_range(self): if min_n > max_m: raise ValueError( "Invalid value for ngram_range=%s " - "lower boundary larger than the upper boundary." - % str(self.ngram_range) + "lower boundary larger than the upper boundary." % str(self.ngram_range) ) def _warn_for_unused_params(self): - if self.tokenizer is not None and self.token_pattern is not None: warnings.warn( "The parameter 'token_pattern' will not be used" @@ -2001,7 +1996,6 @@ def __init__( smooth_idf=True, sublinear_tf=False, ): - super().__init__( input=input, encoding=encoding, From 07f5e9b8527cbd85f072aaf4ffdbc2736d110f19 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Fri, 27 Oct 2023 02:16:40 +0330 Subject: [PATCH 60/92] Update gaussian_elimination_pivoting.py From 3b2ae9b7ec0ccb4550242014e29f65bfde050704 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Fri, 27 Oct 2023 02:18:35 +0330 Subject: [PATCH 61/92] Update gaussian_elimination_pivoting.py --- .../gaussian_elimination_pivoting.py | 1 + 1 file changed, 1 insertion(+) diff --git a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py index 5c6c2a40e75f..0bc47bd623fe 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py @@ -80,6 +80,7 @@ def foo(matrix): print(f"Cond(A) =< {conda:0.6f}") + # Example usage: # n_size = 3 # a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) From a5667349d5fb792761b211c30c8d2c3f04a04386 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 26 Oct 2023 22:50:23 +0000 Subject: [PATCH 62/92] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../gaussian_elimination_pivoting.py | 1 - 1 file changed, 1 deletion(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py index 0bc47bd623fe..5c6c2a40e75f 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py @@ -80,7 +80,6 @@ def foo(matrix): print(f"Cond(A) =< {conda:0.6f}") - # Example usage: # n_size = 3 # a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) From 348c630fb10687c40d59e5ba5de6c2ad7b562518 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Fri, 27 Oct 2023 02:20:51 +0330 Subject: [PATCH 63/92] Delete linear_algebra/src/gaussian_elimination_pivoting/text.py --- .../src/gaussian_elimination_pivoting/text.py | 2155 ----------------- 1 file changed, 2155 deletions(-) delete mode 100644 linear_algebra/src/gaussian_elimination_pivoting/text.py diff --git a/linear_algebra/src/gaussian_elimination_pivoting/text.py b/linear_algebra/src/gaussian_elimination_pivoting/text.py deleted file mode 100644 index 766ed785f616..000000000000 --- a/linear_algebra/src/gaussian_elimination_pivoting/text.py +++ /dev/null @@ -1,2155 +0,0 @@ -# Authors: Olivier Grisel -# Mathieu Blondel -# Lars Buitinck -# Robert Layton -# Jochen Wersdörfer -# Roman Sinayev -# -# License: BSD 3 clause -""" -The :mod:`sklearn.feature_extraction.text` submodule gathers utilities to -build feature vectors from text documents. -""" - -import array -from collections import defaultdict -from collections.abc import Mapping -from functools import partial -from numbers import Integral, Real -from operator import itemgetter -import re -import unicodedata -import warnings - -import numpy as np -import scipy.sparse as sp - -from ..base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin -from ..preprocessing import normalize -from ._hash import FeatureHasher -from ._stop_words import ENGLISH_STOP_WORDS -from ..utils.validation import check_is_fitted, check_array, FLOAT_DTYPES -from ..utils import _IS_32BIT -from ..exceptions import NotFittedError -from ..utils._param_validation import StrOptions, Interval, HasMethods - - -__all__ = [ - "HashingVectorizer", - "CountVectorizer", - "ENGLISH_STOP_WORDS", - "TfidfTransformer", - "TfidfVectorizer", - "strip_accents_ascii", - "strip_accents_unicode", - "strip_tags", -] - - -def _preprocess(doc, accent_function=None, lower=False): - """Chain together an optional series of text preprocessing steps to - apply to a document. - - Parameters - ---------- - doc: str - The string to preprocess - accent_function: callable, default=None - Function for handling accented characters. Common strategies include - normalizing and removing. - lower: bool, default=False - Whether to use str.lower to lowercase all of the text - - Returns - ------- - doc: str - preprocessed string - """ - if lower: - doc = doc.lower() - if accent_function is not None: - doc = accent_function(doc) - return doc - - -def _analyze( - doc, - analyzer=None, - tokenizer=None, - ngrams=None, - preprocessor=None, - decoder=None, - stop_words=None, -): - """Chain together an optional series of text processing steps to go from - a single document to ngrams, with or without tokenizing or preprocessing. - - If analyzer is used, only the decoder argument is used, as the analyzer is - intended to replace the preprocessor, tokenizer, and ngrams steps. - - Parameters - ---------- - analyzer: callable, default=None - tokenizer: callable, default=None - ngrams: callable, default=None - preprocessor: callable, default=None - decoder: callable, default=None - stop_words: list, default=None - - Returns - ------- - ngrams: list - A sequence of tokens, possibly with pairs, triples, etc. - """ - - if decoder is not None: - doc = decoder(doc) - if analyzer is not None: - doc = analyzer(doc) - else: - if preprocessor is not None: - doc = preprocessor(doc) - if tokenizer is not None: - doc = tokenizer(doc) - if ngrams is not None: - if stop_words is not None: - doc = ngrams(doc, stop_words) - else: - doc = ngrams(doc) - return doc - - -def strip_accents_unicode(s): - """Transform accentuated unicode symbols into their simple counterpart. - - Warning: the python-level loop and join operations make this - implementation 20 times slower than the strip_accents_ascii basic - normalization. - - Parameters - ---------- - s : str - The string to strip. - - Returns - ------- - s : str - The stripped string. - - See Also - -------- - strip_accents_ascii : Remove accentuated char for any unicode symbol that - has a direct ASCII equivalent. - """ - try: - # If `s` is ASCII-compatible, then it does not contain any accented - # characters and we can avoid an expensive list comprehension - s.encode("ASCII", errors="strict") - return s - except UnicodeEncodeError: - normalized = unicodedata.normalize("NFKD", s) - return "".join([c for c in normalized if not unicodedata.combining(c)]) - - -def strip_accents_ascii(s): - """Transform accentuated unicode symbols into ascii or nothing. - - Warning: this solution is only suited for languages that have a direct - transliteration to ASCII symbols. - - Parameters - ---------- - s : str - The string to strip. - - Returns - ------- - s : str - The stripped string. - - See Also - -------- - strip_accents_unicode : Remove accentuated char for any unicode symbol. - """ - nkfd_form = unicodedata.normalize("NFKD", s) - return nkfd_form.encode("ASCII", "ignore").decode("ASCII") - - -def strip_tags(s): - """Basic regexp based HTML / XML tag stripper function. - - For serious HTML/XML preprocessing you should rather use an external - library such as lxml or BeautifulSoup. - - Parameters - ---------- - s : str - The string to strip. - - Returns - ------- - s : str - The stripped string. - """ - return re.compile(r"<([^>]+)>", flags=re.UNICODE).sub(" ", s) - - -def _check_stop_list(stop): - if stop == "english": - return ENGLISH_STOP_WORDS - elif isinstance(stop, str): - raise ValueError("not a built-in stop list: %s" % stop) - elif stop is None: - return None - else: # assume it's a collection - return frozenset(stop) - - -class _VectorizerMixin: - """Provides common code for text vectorizers (tokenization logic).""" - - _white_spaces = re.compile(r"\s\s+") - - def decode(self, doc): - """Decode the input into a string of unicode symbols. - - The decoding strategy depends on the vectorizer parameters. - - Parameters - ---------- - doc : bytes or str - The string to decode. - - Returns - ------- - doc: str - A string of unicode symbols. - """ - if self.input == "filename": - with open(doc, "rb") as fh: - doc = fh.read() - - elif self.input == "file": - doc = doc.read() - - if isinstance(doc, bytes): - doc = doc.decode(self.encoding, self.decode_error) - - if doc is np.nan: - raise ValueError( - "np.nan is an invalid document, expected byte or unicode string." - ) - - return doc - - def _word_ngrams(self, tokens, stop_words=None): - """Turn tokens into a sequence of n-grams after stop words filtering""" - # handle stop words - if stop_words is not None: - tokens = [w for w in tokens if w not in stop_words] - - # handle token n-grams - min_n, max_n = self.ngram_range - if max_n != 1: - original_tokens = tokens - if min_n == 1: - # no need to do any slicing for unigrams - # just iterate through the original tokens - tokens = list(original_tokens) - min_n += 1 - else: - tokens = [] - - n_original_tokens = len(original_tokens) - - # bind method outside of loop to reduce overhead - tokens_append = tokens.append - space_join = " ".join - - for n in range(min_n, min(max_n + 1, n_original_tokens + 1)): - for i in range(n_original_tokens - n + 1): - tokens_append(space_join(original_tokens[i : i + n])) - - return tokens - - def _char_ngrams(self, text_document): - """Tokenize text_document into a sequence of character n-grams""" - # normalize white spaces - text_document = self._white_spaces.sub(" ", text_document) - - text_len = len(text_document) - min_n, max_n = self.ngram_range - if min_n == 1: - # no need to do any slicing for unigrams - # iterate through the string - ngrams = list(text_document) - min_n += 1 - else: - ngrams = [] - - # bind method outside of loop to reduce overhead - ngrams_append = ngrams.append - - for n in range(min_n, min(max_n + 1, text_len + 1)): - for i in range(text_len - n + 1): - ngrams_append(text_document[i : i + n]) - return ngrams - - def _char_wb_ngrams(self, text_document): - """Whitespace sensitive char-n-gram tokenization. - - Tokenize text_document into a sequence of character n-grams - operating only inside word boundaries. n-grams at the edges - of words are padded with space.""" - # normalize white spaces - text_document = self._white_spaces.sub(" ", text_document) - - min_n, max_n = self.ngram_range - ngrams = [] - - # bind method outside of loop to reduce overhead - ngrams_append = ngrams.append - - for w in text_document.split(): - w = " " + w + " " - w_len = len(w) - for n in range(min_n, max_n + 1): - offset = 0 - ngrams_append(w[offset : offset + n]) - while offset + n < w_len: - offset += 1 - ngrams_append(w[offset : offset + n]) - if offset == 0: # count a short word (w_len < n) only once - break - return ngrams - - def build_preprocessor(self): - """Return a function to preprocess the text before tokenization. - - Returns - ------- - preprocessor: callable - A function to preprocess the text before tokenization. - """ - if self.preprocessor is not None: - return self.preprocessor - - # accent stripping - if not self.strip_accents: - strip_accents = None - elif callable(self.strip_accents): - strip_accents = self.strip_accents - elif self.strip_accents == "ascii": - strip_accents = strip_accents_ascii - elif self.strip_accents == "unicode": - strip_accents = strip_accents_unicode - else: - raise ValueError( - 'Invalid value for "strip_accents": %s' % self.strip_accents - ) - - return partial(_preprocess, accent_function=strip_accents, lower=self.lowercase) - - def build_tokenizer(self): - """Return a function that splits a string into a sequence of tokens. - - Returns - ------- - tokenizer: callable - A function to split a string into a sequence of tokens. - """ - if self.tokenizer is not None: - return self.tokenizer - token_pattern = re.compile(self.token_pattern) - - if token_pattern.groups > 1: - raise ValueError( - "More than 1 capturing group in token pattern. Only a single " - "group should be captured." - ) - - return token_pattern.findall - - def get_stop_words(self): - """Build or fetch the effective stop words list. - - Returns - ------- - stop_words: list or None - A list of stop words. - """ - return _check_stop_list(self.stop_words) - - def _check_stop_words_consistency(self, stop_words, preprocess, tokenize): - """Check if stop words are consistent - - Returns - ------- - is_consistent : True if stop words are consistent with the preprocessor - and tokenizer, False if they are not, None if the check - was previously performed, "error" if it could not be - performed (e.g. because of the use of a custom - preprocessor / tokenizer) - """ - if id(self.stop_words) == getattr(self, "_stop_words_id", None): - # Stop words are were previously validated - return None - - # NB: stop_words is validated, unlike self.stop_words - try: - inconsistent = set() - for w in stop_words or (): - tokens = list(tokenize(preprocess(w))) - for token in tokens: - if token not in stop_words: - inconsistent.add(token) - self._stop_words_id = id(self.stop_words) - - if inconsistent: - warnings.warn( - "Your stop_words may be inconsistent with " - "your preprocessing. Tokenizing the stop " - "words generated tokens %r not in " - "stop_words." % sorted(inconsistent) - ) - return not inconsistent - except Exception: - # Failed to check stop words consistency (e.g. because a custom - # preprocessor or tokenizer was used) - self._stop_words_id = id(self.stop_words) - return "error" - - def build_analyzer(self): - """Return a callable to process input data. - - The callable handles preprocessing, tokenization, and n-grams generation. - - Returns - ------- - analyzer: callable - A function to handle preprocessing, tokenization - and n-grams generation. - """ - - if callable(self.analyzer): - return partial(_analyze, analyzer=self.analyzer, decoder=self.decode) - - preprocess = self.build_preprocessor() - - if self.analyzer == "char": - return partial( - _analyze, - ngrams=self._char_ngrams, - preprocessor=preprocess, - decoder=self.decode, - ) - - elif self.analyzer == "char_wb": - return partial( - _analyze, - ngrams=self._char_wb_ngrams, - preprocessor=preprocess, - decoder=self.decode, - ) - - elif self.analyzer == "word": - stop_words = self.get_stop_words() - tokenize = self.build_tokenizer() - self._check_stop_words_consistency(stop_words, preprocess, tokenize) - return partial( - _analyze, - ngrams=self._word_ngrams, - tokenizer=tokenize, - preprocessor=preprocess, - decoder=self.decode, - stop_words=stop_words, - ) - - else: - raise ValueError( - "%s is not a valid tokenization scheme/analyzer" % self.analyzer - ) - - def _validate_vocabulary(self): - if (vocabulary := self.vocabulary) is not None: - if isinstance(vocabulary, set): - vocabulary = sorted(vocabulary) - if not isinstance(vocabulary, Mapping): - vocab = {} - for i, t in enumerate(vocabulary): - if vocab.setdefault(t, i) != i: - msg = "Duplicate term in vocabulary: %r" % t - raise ValueError(msg) - vocabulary = vocab - else: - indices = set(vocabulary.values()) - if len(indices) != len(vocabulary): - raise ValueError("Vocabulary contains repeated indices.") - for i in range(len(vocabulary)): - if i not in indices: - msg = "Vocabulary of size %d doesn't contain index %d." % ( - len(vocabulary), - i, - ) - raise ValueError(msg) - if not vocabulary: - raise ValueError("empty vocabulary passed to fit") - self.fixed_vocabulary_ = True - self.vocabulary_ = dict(vocabulary) - else: - self.fixed_vocabulary_ = False - - def _check_vocabulary(self): - """Check if vocabulary is empty or missing (not fitted)""" - if not hasattr(self, "vocabulary_"): - self._validate_vocabulary() - if not self.fixed_vocabulary_: - raise NotFittedError("Vocabulary not fitted or provided") - - if len(self.vocabulary_) == 0: - raise ValueError("Vocabulary is empty") - - def _validate_ngram_range(self): - """Check validity of ngram_range parameter""" - min_n, max_m = self.ngram_range - if min_n > max_m: - raise ValueError( - "Invalid value for ngram_range=%s " - "lower boundary larger than the upper boundary." % str(self.ngram_range) - ) - - def _warn_for_unused_params(self): - if self.tokenizer is not None and self.token_pattern is not None: - warnings.warn( - "The parameter 'token_pattern' will not be used" - " since 'tokenizer' is not None'" - ) - - if self.preprocessor is not None and callable(self.analyzer): - warnings.warn( - "The parameter 'preprocessor' will not be used" - " since 'analyzer' is callable'" - ) - - if ( - self.ngram_range != (1, 1) - and self.ngram_range is not None - and callable(self.analyzer) - ): - warnings.warn( - "The parameter 'ngram_range' will not be used" - " since 'analyzer' is callable'" - ) - if self.analyzer != "word" or callable(self.analyzer): - if self.stop_words is not None: - warnings.warn( - "The parameter 'stop_words' will not be used" - " since 'analyzer' != 'word'" - ) - if ( - self.token_pattern is not None - and self.token_pattern != r"(?u)\b\w\w+\b" - ): - warnings.warn( - "The parameter 'token_pattern' will not be used" - " since 'analyzer' != 'word'" - ) - if self.tokenizer is not None: - warnings.warn( - "The parameter 'tokenizer' will not be used" - " since 'analyzer' != 'word'" - ) - - -class HashingVectorizer( - TransformerMixin, _VectorizerMixin, BaseEstimator, auto_wrap_output_keys=None -): - r"""Convert a collection of text documents to a matrix of token occurrences. - - It turns a collection of text documents into a scipy.sparse matrix holding - token occurrence counts (or binary occurrence information), possibly - normalized as token frequencies if norm='l1' or projected on the euclidean - unit sphere if norm='l2'. - - This text vectorizer implementation uses the hashing trick to find the - token string name to feature integer index mapping. - - This strategy has several advantages: - - - it is very low memory scalable to large datasets as there is no need to - store a vocabulary dictionary in memory. - - - it is fast to pickle and un-pickle as it holds no state besides the - constructor parameters. - - - it can be used in a streaming (partial fit) or parallel pipeline as there - is no state computed during fit. - - There are also a couple of cons (vs using a CountVectorizer with an - in-memory vocabulary): - - - there is no way to compute the inverse transform (from feature indices to - string feature names) which can be a problem when trying to introspect - which features are most important to a model. - - - there can be collisions: distinct tokens can be mapped to the same - feature index. However in practice this is rarely an issue if n_features - is large enough (e.g. 2 ** 18 for text classification problems). - - - no IDF weighting as this would render the transformer stateful. - - The hash function employed is the signed 32-bit version of Murmurhash3. - - Read more in the :ref:`User Guide `. - - Parameters - ---------- - input : {'filename', 'file', 'content'}, default='content' - - If `'filename'`, the sequence passed as an argument to fit is - expected to be a list of filenames that need reading to fetch - the raw content to analyze. - - - If `'file'`, the sequence items must have a 'read' method (file-like - object) that is called to fetch the bytes in memory. - - - If `'content'`, the input is expected to be a sequence of items that - can be of type string or byte. - - encoding : str, default='utf-8' - If bytes or files are given to analyze, this encoding is used to - decode. - - decode_error : {'strict', 'ignore', 'replace'}, default='strict' - Instruction on what to do if a byte sequence is given to analyze that - contains characters not of the given `encoding`. By default, it is - 'strict', meaning that a UnicodeDecodeError will be raised. Other - values are 'ignore' and 'replace'. - - strip_accents : {'ascii', 'unicode'} or callable, default=None - Remove accents and perform other character normalization - during the preprocessing step. - 'ascii' is a fast method that only works on characters that have - a direct ASCII mapping. - 'unicode' is a slightly slower method that works on any character. - None (default) does nothing. - - Both 'ascii' and 'unicode' use NFKD normalization from - :func:`unicodedata.normalize`. - - lowercase : bool, default=True - Convert all characters to lowercase before tokenizing. - - preprocessor : callable, default=None - Override the preprocessing (string transformation) stage while - preserving the tokenizing and n-grams generation steps. - Only applies if ``analyzer`` is not callable. - - tokenizer : callable, default=None - Override the string tokenization step while preserving the - preprocessing and n-grams generation steps. - Only applies if ``analyzer == 'word'``. - - stop_words : {'english'}, list, default=None - If 'english', a built-in stop word list for English is used. - There are several known issues with 'english' and you should - consider an alternative (see :ref:`stop_words`). - - If a list, that list is assumed to contain stop words, all of which - will be removed from the resulting tokens. - Only applies if ``analyzer == 'word'``. - - token_pattern : str or None, default=r"(?u)\\b\\w\\w+\\b" - Regular expression denoting what constitutes a "token", only used - if ``analyzer == 'word'``. The default regexp selects tokens of 2 - or more alphanumeric characters (punctuation is completely ignored - and always treated as a token separator). - - If there is a capturing group in token_pattern then the - captured group content, not the entire match, becomes the token. - At most one capturing group is permitted. - - ngram_range : tuple (min_n, max_n), default=(1, 1) - The lower and upper boundary of the range of n-values for different - n-grams to be extracted. All values of n such that min_n <= n <= max_n - will be used. For example an ``ngram_range`` of ``(1, 1)`` means only - unigrams, ``(1, 2)`` means unigrams and bigrams, and ``(2, 2)`` means - only bigrams. - Only applies if ``analyzer`` is not callable. - - analyzer : {'word', 'char', 'char_wb'} or callable, default='word' - Whether the feature should be made of word or character n-grams. - Option 'char_wb' creates character n-grams only from text inside - word boundaries; n-grams at the edges of words are padded with space. - - If a callable is passed it is used to extract the sequence of features - out of the raw, unprocessed input. - - .. versionchanged:: 0.21 - Since v0.21, if ``input`` is ``'filename'`` or ``'file'``, the data - is first read from the file and then passed to the given callable - analyzer. - - n_features : int, default=(2 ** 20) - The number of features (columns) in the output matrices. Small numbers - of features are likely to cause hash collisions, but large numbers - will cause larger coefficient dimensions in linear learners. - - binary : bool, default=False - If True, all non zero counts are set to 1. This is useful for discrete - probabilistic models that model binary events rather than integer - counts. - - norm : {'l1', 'l2'}, default='l2' - Norm used to normalize term vectors. None for no normalization. - - alternate_sign : bool, default=True - When True, an alternating sign is added to the features as to - approximately conserve the inner product in the hashed space even for - small n_features. This approach is similar to sparse random projection. - - .. versionadded:: 0.19 - - dtype : type, default=np.float64 - Type of the matrix returned by fit_transform() or transform(). - - See Also - -------- - CountVectorizer : Convert a collection of text documents to a matrix of - token counts. - TfidfVectorizer : Convert a collection of raw documents to a matrix of - TF-IDF features. - - Notes - ----- - This estimator is :term:`stateless` and does not need to be fitted. - However, we recommend to call :meth:`fit_transform` instead of - :meth:`transform`, as parameter validation is only performed in - :meth:`fit`. - - Examples - -------- - >>> from sklearn.feature_extraction.text import HashingVectorizer - >>> corpus = [ - ... 'This is the first document.', - ... 'This document is the second document.', - ... 'And this is the third one.', - ... 'Is this the first document?', - ... ] - >>> vectorizer = HashingVectorizer(n_features=2**4) - >>> X = vectorizer.fit_transform(corpus) - >>> print(X.shape) - (4, 16) - """ - - _parameter_constraints: dict = { - "input": [StrOptions({"filename", "file", "content"})], - "encoding": [str], - "decode_error": [StrOptions({"strict", "ignore", "replace"})], - "strip_accents": [StrOptions({"ascii", "unicode"}), None, callable], - "lowercase": ["boolean"], - "preprocessor": [callable, None], - "tokenizer": [callable, None], - "stop_words": [StrOptions({"english"}), list, None], - "token_pattern": [str, None], - "ngram_range": [tuple], - "analyzer": [StrOptions({"word", "char", "char_wb"}), callable], - "n_features": [Interval(Integral, 1, np.iinfo(np.int32).max, closed="left")], - "binary": ["boolean"], - "norm": [StrOptions({"l1", "l2"}), None], - "alternate_sign": ["boolean"], - "dtype": "no_validation", # delegate to numpy - } - - def __init__( - self, - *, - input="content", - encoding="utf-8", - decode_error="strict", - strip_accents=None, - lowercase=True, - preprocessor=None, - tokenizer=None, - stop_words=None, - token_pattern=r"(?u)\b\w\w+\b", - ngram_range=(1, 1), - analyzer="word", - n_features=(2**20), - binary=False, - norm="l2", - alternate_sign=True, - dtype=np.float64, - ): - self.input = input - self.encoding = encoding - self.decode_error = decode_error - self.strip_accents = strip_accents - self.preprocessor = preprocessor - self.tokenizer = tokenizer - self.analyzer = analyzer - self.lowercase = lowercase - self.token_pattern = token_pattern - self.stop_words = stop_words - self.n_features = n_features - self.ngram_range = ngram_range - self.binary = binary - self.norm = norm - self.alternate_sign = alternate_sign - self.dtype = dtype - - def partial_fit(self, X, y=None): - """Only validates estimator's parameters. - - This method allows to: (i) validate the estimator's parameters and - (ii) be consistent with the scikit-learn transformer API. - - Parameters - ---------- - X : ndarray of shape [n_samples, n_features] - Training data. - - y : Ignored - Not used, present for API consistency by convention. - - Returns - ------- - self : object - HashingVectorizer instance. - """ - # TODO: only validate during the first call - self._validate_params() - return self - - def fit(self, X, y=None): - """Only validates estimator's parameters. - - This method allows to: (i) validate the estimator's parameters and - (ii) be consistent with the scikit-learn transformer API. - - Parameters - ---------- - X : ndarray of shape [n_samples, n_features] - Training data. - - y : Ignored - Not used, present for API consistency by convention. - - Returns - ------- - self : object - HashingVectorizer instance. - """ - self._validate_params() - - # triggers a parameter validation - if isinstance(X, str): - raise ValueError( - "Iterable over raw text documents expected, string object received." - ) - - self._warn_for_unused_params() - self._validate_ngram_range() - - self._get_hasher().fit(X, y=y) - return self - - def transform(self, X): - """Transform a sequence of documents to a document-term matrix. - - Parameters - ---------- - X : iterable over raw text documents, length = n_samples - Samples. Each sample must be a text document (either bytes or - unicode strings, file name or file object depending on the - constructor argument) which will be tokenized and hashed. - - Returns - ------- - X : sparse matrix of shape (n_samples, n_features) - Document-term matrix. - """ - if isinstance(X, str): - raise ValueError( - "Iterable over raw text documents expected, string object received." - ) - - self._validate_ngram_range() - - analyzer = self.build_analyzer() - X = self._get_hasher().transform(analyzer(doc) for doc in X) - if self.binary: - X.data.fill(1) - if self.norm is not None: - X = normalize(X, norm=self.norm, copy=False) - return X - - def fit_transform(self, X, y=None): - """Transform a sequence of documents to a document-term matrix. - - Parameters - ---------- - X : iterable over raw text documents, length = n_samples - Samples. Each sample must be a text document (either bytes or - unicode strings, file name or file object depending on the - constructor argument) which will be tokenized and hashed. - y : any - Ignored. This parameter exists only for compatibility with - sklearn.pipeline.Pipeline. - - Returns - ------- - X : sparse matrix of shape (n_samples, n_features) - Document-term matrix. - """ - return self.fit(X, y).transform(X) - - def _get_hasher(self): - return FeatureHasher( - n_features=self.n_features, - input_type="string", - dtype=self.dtype, - alternate_sign=self.alternate_sign, - ) - - def _more_tags(self): - return {"X_types": ["string"]} - - -def _document_frequency(X): - """Count the number of non-zero values for each feature in sparse X.""" - if sp.isspmatrix_csr(X): - return np.bincount(X.indices, minlength=X.shape[1]) - else: - return np.diff(X.indptr) - - -class CountVectorizer(_VectorizerMixin, BaseEstimator): - r"""Convert a collection of text documents to a matrix of token counts. - - This implementation produces a sparse representation of the counts using - scipy.sparse.csr_matrix. - - If you do not provide an a-priori dictionary and you do not use an analyzer - that does some kind of feature selection then the number of features will - be equal to the vocabulary size found by analyzing the data. - - Read more in the :ref:`User Guide `. - - Parameters - ---------- - input : {'filename', 'file', 'content'}, default='content' - - If `'filename'`, the sequence passed as an argument to fit is - expected to be a list of filenames that need reading to fetch - the raw content to analyze. - - - If `'file'`, the sequence items must have a 'read' method (file-like - object) that is called to fetch the bytes in memory. - - - If `'content'`, the input is expected to be a sequence of items that - can be of type string or byte. - - encoding : str, default='utf-8' - If bytes or files are given to analyze, this encoding is used to - decode. - - decode_error : {'strict', 'ignore', 'replace'}, default='strict' - Instruction on what to do if a byte sequence is given to analyze that - contains characters not of the given `encoding`. By default, it is - 'strict', meaning that a UnicodeDecodeError will be raised. Other - values are 'ignore' and 'replace'. - - strip_accents : {'ascii', 'unicode'} or callable, default=None - Remove accents and perform other character normalization - during the preprocessing step. - 'ascii' is a fast method that only works on characters that have - a direct ASCII mapping. - 'unicode' is a slightly slower method that works on any characters. - None (default) does nothing. - - Both 'ascii' and 'unicode' use NFKD normalization from - :func:`unicodedata.normalize`. - - lowercase : bool, default=True - Convert all characters to lowercase before tokenizing. - - preprocessor : callable, default=None - Override the preprocessing (strip_accents and lowercase) stage while - preserving the tokenizing and n-grams generation steps. - Only applies if ``analyzer`` is not callable. - - tokenizer : callable, default=None - Override the string tokenization step while preserving the - preprocessing and n-grams generation steps. - Only applies if ``analyzer == 'word'``. - - stop_words : {'english'}, list, default=None - If 'english', a built-in stop word list for English is used. - There are several known issues with 'english' and you should - consider an alternative (see :ref:`stop_words`). - - If a list, that list is assumed to contain stop words, all of which - will be removed from the resulting tokens. - Only applies if ``analyzer == 'word'``. - - If None, no stop words will be used. In this case, setting `max_df` - to a higher value, such as in the range (0.7, 1.0), can automatically detect - and filter stop words based on intra corpus document frequency of terms. - - token_pattern : str or None, default=r"(?u)\\b\\w\\w+\\b" - Regular expression denoting what constitutes a "token", only used - if ``analyzer == 'word'``. The default regexp select tokens of 2 - or more alphanumeric characters (punctuation is completely ignored - and always treated as a token separator). - - If there is a capturing group in token_pattern then the - captured group content, not the entire match, becomes the token. - At most one capturing group is permitted. - - ngram_range : tuple (min_n, max_n), default=(1, 1) - The lower and upper boundary of the range of n-values for different - word n-grams or char n-grams to be extracted. All values of n such - such that min_n <= n <= max_n will be used. For example an - ``ngram_range`` of ``(1, 1)`` means only unigrams, ``(1, 2)`` means - unigrams and bigrams, and ``(2, 2)`` means only bigrams. - Only applies if ``analyzer`` is not callable. - - analyzer : {'word', 'char', 'char_wb'} or callable, default='word' - Whether the feature should be made of word n-gram or character - n-grams. - Option 'char_wb' creates character n-grams only from text inside - word boundaries; n-grams at the edges of words are padded with space. - - If a callable is passed it is used to extract the sequence of features - out of the raw, unprocessed input. - - .. versionchanged:: 0.21 - - Since v0.21, if ``input`` is ``filename`` or ``file``, the data is - first read from the file and then passed to the given callable - analyzer. - - max_df : float in range [0.0, 1.0] or int, default=1.0 - When building the vocabulary ignore terms that have a document - frequency strictly higher than the given threshold (corpus-specific - stop words). - If float, the parameter represents a proportion of documents, integer - absolute counts. - This parameter is ignored if vocabulary is not None. - - min_df : float in range [0.0, 1.0] or int, default=1 - When building the vocabulary ignore terms that have a document - frequency strictly lower than the given threshold. This value is also - called cut-off in the literature. - If float, the parameter represents a proportion of documents, integer - absolute counts. - This parameter is ignored if vocabulary is not None. - - max_features : int, default=None - If not None, build a vocabulary that only consider the top - `max_features` ordered by term frequency across the corpus. - Otherwise, all features are used. - - This parameter is ignored if vocabulary is not None. - - vocabulary : Mapping or iterable, default=None - Either a Mapping (e.g., a dict) where keys are terms and values are - indices in the feature matrix, or an iterable over terms. If not - given, a vocabulary is determined from the input documents. Indices - in the mapping should not be repeated and should not have any gap - between 0 and the largest index. - - binary : bool, default=False - If True, all non zero counts are set to 1. This is useful for discrete - probabilistic models that model binary events rather than integer - counts. - - dtype : dtype, default=np.int64 - Type of the matrix returned by fit_transform() or transform(). - - Attributes - ---------- - vocabulary_ : dict - A mapping of terms to feature indices. - - fixed_vocabulary_ : bool - True if a fixed vocabulary of term to indices mapping - is provided by the user. - - stop_words_ : set - Terms that were ignored because they either: - - - occurred in too many documents (`max_df`) - - occurred in too few documents (`min_df`) - - were cut off by feature selection (`max_features`). - - This is only available if no vocabulary was given. - - See Also - -------- - HashingVectorizer : Convert a collection of text documents to a - matrix of token counts. - - TfidfVectorizer : Convert a collection of raw documents to a matrix - of TF-IDF features. - - Notes - ----- - The ``stop_words_`` attribute can get large and increase the model size - when pickling. This attribute is provided only for introspection and can - be safely removed using delattr or set to None before pickling. - - Examples - -------- - >>> from sklearn.feature_extraction.text import CountVectorizer - >>> corpus = [ - ... 'This is the first document.', - ... 'This document is the second document.', - ... 'And this is the third one.', - ... 'Is this the first document?', - ... ] - >>> vectorizer = CountVectorizer() - >>> X = vectorizer.fit_transform(corpus) - >>> vectorizer.get_feature_names_out() - array(['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third', - 'this'], ...) - >>> print(X.toarray()) - [[0 1 1 1 0 0 1 0 1] - [0 2 0 1 0 1 1 0 1] - [1 0 0 1 1 0 1 1 1] - [0 1 1 1 0 0 1 0 1]] - >>> vectorizer2 = CountVectorizer(analyzer='word', ngram_range=(2, 2)) - >>> X2 = vectorizer2.fit_transform(corpus) - >>> vectorizer2.get_feature_names_out() - array(['and this', 'document is', 'first document', 'is the', 'is this', - 'second document', 'the first', 'the second', 'the third', 'third one', - 'this document', 'this is', 'this the'], ...) - >>> print(X2.toarray()) - [[0 0 1 1 0 0 1 0 0 0 0 1 0] - [0 1 0 1 0 1 0 1 0 0 1 0 0] - [1 0 0 1 0 0 0 0 1 1 0 1 0] - [0 0 1 0 1 0 1 0 0 0 0 0 1]] - """ - - _parameter_constraints: dict = { - "input": [StrOptions({"filename", "file", "content"})], - "encoding": [str], - "decode_error": [StrOptions({"strict", "ignore", "replace"})], - "strip_accents": [StrOptions({"ascii", "unicode"}), None, callable], - "lowercase": ["boolean"], - "preprocessor": [callable, None], - "tokenizer": [callable, None], - "stop_words": [StrOptions({"english"}), list, None], - "token_pattern": [str, None], - "ngram_range": [tuple], - "analyzer": [StrOptions({"word", "char", "char_wb"}), callable], - "max_df": [ - Interval(Real, 0, 1, closed="both"), - Interval(Integral, 1, None, closed="left"), - ], - "min_df": [ - Interval(Real, 0, 1, closed="both"), - Interval(Integral, 1, None, closed="left"), - ], - "max_features": [Interval(Integral, 1, None, closed="left"), None], - "vocabulary": [Mapping, HasMethods("__iter__"), None], - "binary": ["boolean"], - "dtype": "no_validation", # delegate to numpy - } - - def __init__( - self, - *, - input="content", - encoding="utf-8", - decode_error="strict", - strip_accents=None, - lowercase=True, - preprocessor=None, - tokenizer=None, - stop_words=None, - token_pattern=r"(?u)\b\w\w+\b", - ngram_range=(1, 1), - analyzer="word", - max_df=1.0, - min_df=1, - max_features=None, - vocabulary=None, - binary=False, - dtype=np.int64, - ): - self.input = input - self.encoding = encoding - self.decode_error = decode_error - self.strip_accents = strip_accents - self.preprocessor = preprocessor - self.tokenizer = tokenizer - self.analyzer = analyzer - self.lowercase = lowercase - self.token_pattern = token_pattern - self.stop_words = stop_words - self.max_df = max_df - self.min_df = min_df - self.max_features = max_features - self.ngram_range = ngram_range - self.vocabulary = vocabulary - self.binary = binary - self.dtype = dtype - - def _sort_features(self, X, vocabulary): - """Sort features by name - - Returns a reordered matrix and modifies the vocabulary in place - """ - sorted_features = sorted(vocabulary.items()) - map_index = np.empty(len(sorted_features), dtype=X.indices.dtype) - for new_val, (term, old_val) in enumerate(sorted_features): - vocabulary[term] = new_val - map_index[old_val] = new_val - - X.indices = map_index.take(X.indices, mode="clip") - return X - - def _limit_features(self, X, vocabulary, high=None, low=None, limit=None): - """Remove too rare or too common features. - - Prune features that are non zero in more samples than high or less - documents than low, modifying the vocabulary, and restricting it to - at most the limit most frequent. - - This does not prune samples with zero features. - """ - if high is None and low is None and limit is None: - return X, set() - - # Calculate a mask based on document frequencies - dfs = _document_frequency(X) - mask = np.ones(len(dfs), dtype=bool) - if high is not None: - mask &= dfs <= high - if low is not None: - mask &= dfs >= low - if limit is not None and mask.sum() > limit: - tfs = np.asarray(X.sum(axis=0)).ravel() - mask_inds = (-tfs[mask]).argsort()[:limit] - new_mask = np.zeros(len(dfs), dtype=bool) - new_mask[np.where(mask)[0][mask_inds]] = True - mask = new_mask - - new_indices = np.cumsum(mask) - 1 # maps old indices to new - removed_terms = set() - for term, old_index in list(vocabulary.items()): - if mask[old_index]: - vocabulary[term] = new_indices[old_index] - else: - del vocabulary[term] - removed_terms.add(term) - kept_indices = np.where(mask)[0] - if len(kept_indices) == 0: - raise ValueError( - "After pruning, no terms remain. Try a lower min_df or a higher max_df." - ) - return X[:, kept_indices], removed_terms - - def _count_vocab(self, raw_documents, fixed_vocab): - """Create sparse feature matrix, and vocabulary where fixed_vocab=False""" - if fixed_vocab: - vocabulary = self.vocabulary_ - else: - # Add a new value when a new vocabulary item is seen - vocabulary = defaultdict() - vocabulary.default_factory = vocabulary.__len__ - - analyze = self.build_analyzer() - j_indices = [] - indptr = [] - - values = _make_int_array() - indptr.append(0) - for doc in raw_documents: - feature_counter = {} - for feature in analyze(doc): - try: - feature_idx = vocabulary[feature] - if feature_idx not in feature_counter: - feature_counter[feature_idx] = 1 - else: - feature_counter[feature_idx] += 1 - except KeyError: - # Ignore out-of-vocabulary items for fixed_vocab=True - continue - - j_indices.extend(feature_counter.keys()) - values.extend(feature_counter.values()) - indptr.append(len(j_indices)) - - if not fixed_vocab: - # disable defaultdict behaviour - vocabulary = dict(vocabulary) - if not vocabulary: - raise ValueError( - "empty vocabulary; perhaps the documents only contain stop words" - ) - - if indptr[-1] > np.iinfo(np.int32).max: # = 2**31 - 1 - if _IS_32BIT: - raise ValueError( - ( - "sparse CSR array has {} non-zero " - "elements and requires 64 bit indexing, " - "which is unsupported with 32 bit Python." - ).format(indptr[-1]) - ) - indices_dtype = np.int64 - - else: - indices_dtype = np.int32 - j_indices = np.asarray(j_indices, dtype=indices_dtype) - indptr = np.asarray(indptr, dtype=indices_dtype) - values = np.frombuffer(values, dtype=np.intc) - - X = sp.csr_matrix( - (values, j_indices, indptr), - shape=(len(indptr) - 1, len(vocabulary)), - dtype=self.dtype, - ) - X.sort_indices() - return vocabulary, X - - def fit(self, raw_documents, y=None): - """Learn a vocabulary dictionary of all tokens in the raw documents. - - Parameters - ---------- - raw_documents : iterable - An iterable which generates either str, unicode or file objects. - - y : None - This parameter is ignored. - - Returns - ------- - self : object - Fitted vectorizer. - """ - self.fit_transform(raw_documents) - return self - - def fit_transform(self, raw_documents, y=None): - """Learn the vocabulary dictionary and return document-term matrix. - - This is equivalent to fit followed by transform, but more efficiently - implemented. - - Parameters - ---------- - raw_documents : iterable - An iterable which generates either str, unicode or file objects. - - y : None - This parameter is ignored. - - Returns - ------- - X : array of shape (n_samples, n_features) - Document-term matrix. - """ - # We intentionally don't call the transform method to make - # fit_transform overridable without unwanted side effects in - # TfidfVectorizer. - if isinstance(raw_documents, str): - raise ValueError( - "Iterable over raw text documents expected, string object received." - ) - - self._validate_params() - self._validate_ngram_range() - self._warn_for_unused_params() - self._validate_vocabulary() - max_df = self.max_df - min_df = self.min_df - max_features = self.max_features - - if self.fixed_vocabulary_ and self.lowercase: - for term in self.vocabulary: - if any(map(str.isupper, term)): - warnings.warn( - "Upper case characters found in" - " vocabulary while 'lowercase'" - " is True. These entries will not" - " be matched with any documents" - ) - break - - vocabulary, X = self._count_vocab(raw_documents, self.fixed_vocabulary_) - - if self.binary: - X.data.fill(1) - - if not self.fixed_vocabulary_: - n_doc = X.shape[0] - max_doc_count = max_df if isinstance(max_df, Integral) else max_df * n_doc - min_doc_count = min_df if isinstance(min_df, Integral) else min_df * n_doc - if max_doc_count < min_doc_count: - raise ValueError("max_df corresponds to < documents than min_df") - if max_features is not None: - X = self._sort_features(X, vocabulary) - X, self.stop_words_ = self._limit_features( - X, vocabulary, max_doc_count, min_doc_count, max_features - ) - if max_features is None: - X = self._sort_features(X, vocabulary) - self.vocabulary_ = vocabulary - - return X - - def transform(self, raw_documents): - """Transform documents to document-term matrix. - - Extract token counts out of raw text documents using the vocabulary - fitted with fit or the one provided to the constructor. - - Parameters - ---------- - raw_documents : iterable - An iterable which generates either str, unicode or file objects. - - Returns - ------- - X : sparse matrix of shape (n_samples, n_features) - Document-term matrix. - """ - if isinstance(raw_documents, str): - raise ValueError( - "Iterable over raw text documents expected, string object received." - ) - self._check_vocabulary() - - # use the same matrix-building strategy as fit_transform - _, X = self._count_vocab(raw_documents, fixed_vocab=True) - if self.binary: - X.data.fill(1) - return X - - def inverse_transform(self, X): - """Return terms per document with nonzero entries in X. - - Parameters - ---------- - X : {array-like, sparse matrix} of shape (n_samples, n_features) - Document-term matrix. - - Returns - ------- - X_inv : list of arrays of shape (n_samples,) - List of arrays of terms. - """ - self._check_vocabulary() - # We need CSR format for fast row manipulations. - X = check_array(X, accept_sparse="csr") - n_samples = X.shape[0] - - terms = np.array(list(self.vocabulary_.keys())) - indices = np.array(list(self.vocabulary_.values())) - inverse_vocabulary = terms[np.argsort(indices)] - - if sp.issparse(X): - return [ - inverse_vocabulary[X[i, :].nonzero()[1]].ravel() - for i in range(n_samples) - ] - else: - return [ - inverse_vocabulary[np.flatnonzero(X[i, :])].ravel() - for i in range(n_samples) - ] - - def get_feature_names_out(self, input_features=None): - """Get output feature names for transformation. - - Parameters - ---------- - input_features : array-like of str or None, default=None - Not used, present here for API consistency by convention. - - Returns - ------- - feature_names_out : ndarray of str objects - Transformed feature names. - """ - self._check_vocabulary() - return np.asarray( - [t for t, i in sorted(self.vocabulary_.items(), key=itemgetter(1))], - dtype=object, - ) - - def _more_tags(self): - return {"X_types": ["string"]} - - -def _make_int_array(): - """Construct an array.array of a type suitable for scipy.sparse indices.""" - return array.array(str("i")) - - -class TfidfTransformer( - OneToOneFeatureMixin, TransformerMixin, BaseEstimator, auto_wrap_output_keys=None -): - """Transform a count matrix to a normalized tf or tf-idf representation. - - Tf means term-frequency while tf-idf means term-frequency times inverse - document-frequency. This is a common term weighting scheme in information - retrieval, that has also found good use in document classification. - - The goal of using tf-idf instead of the raw frequencies of occurrence of a - token in a given document is to scale down the impact of tokens that occur - very frequently in a given corpus and that are hence empirically less - informative than features that occur in a small fraction of the training - corpus. - - The formula that is used to compute the tf-idf for a term t of a document d - in a document set is tf-idf(t, d) = tf(t, d) * idf(t), and the idf is - computed as idf(t) = log [ n / df(t) ] + 1 (if ``smooth_idf=False``), where - n is the total number of documents in the document set and df(t) is the - document frequency of t; the document frequency is the number of documents - in the document set that contain the term t. The effect of adding "1" to - the idf in the equation above is that terms with zero idf, i.e., terms - that occur in all documents in a training set, will not be entirely - ignored. - (Note that the idf formula above differs from the standard textbook - notation that defines the idf as - idf(t) = log [ n / (df(t) + 1) ]). - - If ``smooth_idf=True`` (the default), the constant "1" is added to the - numerator and denominator of the idf as if an extra document was seen - containing every term in the collection exactly once, which prevents - zero divisions: idf(t) = log [ (1 + n) / (1 + df(t)) ] + 1. - - Furthermore, the formulas used to compute tf and idf depend - on parameter settings that correspond to the SMART notation used in IR - as follows: - - Tf is "n" (natural) by default, "l" (logarithmic) when - ``sublinear_tf=True``. - Idf is "t" when use_idf is given, "n" (none) otherwise. - Normalization is "c" (cosine) when ``norm='l2'``, "n" (none) - when ``norm=None``. - - Read more in the :ref:`User Guide `. - - Parameters - ---------- - norm : {'l1', 'l2'} or None, default='l2' - Each output row will have unit norm, either: - - - 'l2': Sum of squares of vector elements is 1. The cosine - similarity between two vectors is their dot product when l2 norm has - been applied. - - 'l1': Sum of absolute values of vector elements is 1. - See :func:`preprocessing.normalize`. - - None: No normalization. - - use_idf : bool, default=True - Enable inverse-document-frequency reweighting. If False, idf(t) = 1. - - smooth_idf : bool, default=True - Smooth idf weights by adding one to document frequencies, as if an - extra document was seen containing every term in the collection - exactly once. Prevents zero divisions. - - sublinear_tf : bool, default=False - Apply sublinear tf scaling, i.e. replace tf with 1 + log(tf). - - Attributes - ---------- - idf_ : array of shape (n_features) - The inverse document frequency (IDF) vector; only defined - if ``use_idf`` is True. - - .. versionadded:: 0.20 - - n_features_in_ : int - Number of features seen during :term:`fit`. - - .. versionadded:: 1.0 - - feature_names_in_ : ndarray of shape (`n_features_in_`,) - Names of features seen during :term:`fit`. Defined only when `X` - has feature names that are all strings. - - .. versionadded:: 1.0 - - See Also - -------- - CountVectorizer : Transforms text into a sparse matrix of n-gram counts. - - TfidfVectorizer : Convert a collection of raw documents to a matrix of - TF-IDF features. - - HashingVectorizer : Convert a collection of text documents to a matrix - of token occurrences. - - References - ---------- - .. [Yates2011] R. Baeza-Yates and B. Ribeiro-Neto (2011). Modern - Information Retrieval. Addison Wesley, pp. 68-74. - - .. [MRS2008] C.D. Manning, P. Raghavan and H. Schütze (2008). - Introduction to Information Retrieval. Cambridge University - Press, pp. 118-120. - - Examples - -------- - >>> from sklearn.feature_extraction.text import TfidfTransformer - >>> from sklearn.feature_extraction.text import CountVectorizer - >>> from sklearn.pipeline import Pipeline - >>> corpus = ['this is the first document', - ... 'this document is the second document', - ... 'and this is the third one', - ... 'is this the first document'] - >>> vocabulary = ['this', 'document', 'first', 'is', 'second', 'the', - ... 'and', 'one'] - >>> pipe = Pipeline([('count', CountVectorizer(vocabulary=vocabulary)), - ... ('tfid', TfidfTransformer())]).fit(corpus) - >>> pipe['count'].transform(corpus).toarray() - array([[1, 1, 1, 1, 0, 1, 0, 0], - [1, 2, 0, 1, 1, 1, 0, 0], - [1, 0, 0, 1, 0, 1, 1, 1], - [1, 1, 1, 1, 0, 1, 0, 0]]) - >>> pipe['tfid'].idf_ - array([1. , 1.22314355, 1.51082562, 1. , 1.91629073, - 1. , 1.91629073, 1.91629073]) - >>> pipe.transform(corpus).shape - (4, 8) - """ - - _parameter_constraints: dict = { - "norm": [StrOptions({"l1", "l2"}), None], - "use_idf": ["boolean"], - "smooth_idf": ["boolean"], - "sublinear_tf": ["boolean"], - } - - def __init__(self, *, norm="l2", use_idf=True, smooth_idf=True, sublinear_tf=False): - self.norm = norm - self.use_idf = use_idf - self.smooth_idf = smooth_idf - self.sublinear_tf = sublinear_tf - - def fit(self, X, y=None): - """Learn the idf vector (global term weights). - - Parameters - ---------- - X : sparse matrix of shape n_samples, n_features) - A matrix of term/token counts. - - y : None - This parameter is not needed to compute tf-idf. - - Returns - ------- - self : object - Fitted transformer. - """ - self._validate_params() - - # large sparse data is not supported for 32bit platforms because - # _document_frequency uses np.bincount which works on arrays of - # dtype NPY_INTP which is int32 for 32bit platforms. See #20923 - X = self._validate_data( - X, accept_sparse=("csr", "csc"), accept_large_sparse=not _IS_32BIT - ) - if not sp.issparse(X): - X = sp.csr_matrix(X) - dtype = X.dtype if X.dtype in FLOAT_DTYPES else np.float64 - - if self.use_idf: - n_samples, n_features = X.shape - df = _document_frequency(X) - df = df.astype(dtype, copy=False) - - # perform idf smoothing if required - df += int(self.smooth_idf) - n_samples += int(self.smooth_idf) - - # log+1 instead of log makes sure terms with zero idf don't get - # suppressed entirely. - idf = np.log(n_samples / df) + 1 - self._idf_diag = sp.diags( - idf, - offsets=0, - shape=(n_features, n_features), - format="csr", - dtype=dtype, - ) - - return self - - def transform(self, X, copy=True): - """Transform a count matrix to a tf or tf-idf representation. - - Parameters - ---------- - X : sparse matrix of (n_samples, n_features) - A matrix of term/token counts. - - copy : bool, default=True - Whether to copy X and operate on the copy or perform in-place - operations. - - Returns - ------- - vectors : sparse matrix of shape (n_samples, n_features) - Tf-idf-weighted document-term matrix. - """ - X = self._validate_data( - X, accept_sparse="csr", dtype=FLOAT_DTYPES, copy=copy, reset=False - ) - if not sp.issparse(X): - X = sp.csr_matrix(X, dtype=np.float64) - - if self.sublinear_tf: - np.log(X.data, X.data) - X.data += 1 - - if self.use_idf: - # idf_ being a property, the automatic attributes detection - # does not work as usual and we need to specify the attribute - # name: - check_is_fitted(self, attributes=["idf_"], msg="idf vector is not fitted") - - # *= doesn't work - X = X * self._idf_diag - - if self.norm is not None: - X = normalize(X, norm=self.norm, copy=False) - - return X - - @property - def idf_(self): - """Inverse document frequency vector, only defined if `use_idf=True`. - - Returns - ------- - ndarray of shape (n_features,) - """ - # if _idf_diag is not set, this will raise an attribute error, - # which means hasattr(self, "idf_") is False - return np.ravel(self._idf_diag.sum(axis=0)) - - @idf_.setter - def idf_(self, value): - value = np.asarray(value, dtype=np.float64) - n_features = value.shape[0] - self._idf_diag = sp.spdiags( - value, diags=0, m=n_features, n=n_features, format="csr" - ) - - def _more_tags(self): - return {"X_types": ["2darray", "sparse"]} - - -class TfidfVectorizer(CountVectorizer): - r"""Convert a collection of raw documents to a matrix of TF-IDF features. - - Equivalent to :class:`CountVectorizer` followed by - :class:`TfidfTransformer`. - - Read more in the :ref:`User Guide `. - - Parameters - ---------- - input : {'filename', 'file', 'content'}, default='content' - - If `'filename'`, the sequence passed as an argument to fit is - expected to be a list of filenames that need reading to fetch - the raw content to analyze. - - - If `'file'`, the sequence items must have a 'read' method (file-like - object) that is called to fetch the bytes in memory. - - - If `'content'`, the input is expected to be a sequence of items that - can be of type string or byte. - - encoding : str, default='utf-8' - If bytes or files are given to analyze, this encoding is used to - decode. - - decode_error : {'strict', 'ignore', 'replace'}, default='strict' - Instruction on what to do if a byte sequence is given to analyze that - contains characters not of the given `encoding`. By default, it is - 'strict', meaning that a UnicodeDecodeError will be raised. Other - values are 'ignore' and 'replace'. - - strip_accents : {'ascii', 'unicode'} or callable, default=None - Remove accents and perform other character normalization - during the preprocessing step. - 'ascii' is a fast method that only works on characters that have - a direct ASCII mapping. - 'unicode' is a slightly slower method that works on any characters. - None (default) does nothing. - - Both 'ascii' and 'unicode' use NFKD normalization from - :func:`unicodedata.normalize`. - - lowercase : bool, default=True - Convert all characters to lowercase before tokenizing. - - preprocessor : callable, default=None - Override the preprocessing (string transformation) stage while - preserving the tokenizing and n-grams generation steps. - Only applies if ``analyzer`` is not callable. - - tokenizer : callable, default=None - Override the string tokenization step while preserving the - preprocessing and n-grams generation steps. - Only applies if ``analyzer == 'word'``. - - analyzer : {'word', 'char', 'char_wb'} or callable, default='word' - Whether the feature should be made of word or character n-grams. - Option 'char_wb' creates character n-grams only from text inside - word boundaries; n-grams at the edges of words are padded with space. - - If a callable is passed it is used to extract the sequence of features - out of the raw, unprocessed input. - - .. versionchanged:: 0.21 - Since v0.21, if ``input`` is ``'filename'`` or ``'file'``, the data - is first read from the file and then passed to the given callable - analyzer. - - stop_words : {'english'}, list, default=None - If a string, it is passed to _check_stop_list and the appropriate stop - list is returned. 'english' is currently the only supported string - value. - There are several known issues with 'english' and you should - consider an alternative (see :ref:`stop_words`). - - If a list, that list is assumed to contain stop words, all of which - will be removed from the resulting tokens. - Only applies if ``analyzer == 'word'``. - - If None, no stop words will be used. In this case, setting `max_df` - to a higher value, such as in the range (0.7, 1.0), can automatically detect - and filter stop words based on intra corpus document frequency of terms. - - token_pattern : str, default=r"(?u)\\b\\w\\w+\\b" - Regular expression denoting what constitutes a "token", only used - if ``analyzer == 'word'``. The default regexp selects tokens of 2 - or more alphanumeric characters (punctuation is completely ignored - and always treated as a token separator). - - If there is a capturing group in token_pattern then the - captured group content, not the entire match, becomes the token. - At most one capturing group is permitted. - - ngram_range : tuple (min_n, max_n), default=(1, 1) - The lower and upper boundary of the range of n-values for different - n-grams to be extracted. All values of n such that min_n <= n <= max_n - will be used. For example an ``ngram_range`` of ``(1, 1)`` means only - unigrams, ``(1, 2)`` means unigrams and bigrams, and ``(2, 2)`` means - only bigrams. - Only applies if ``analyzer`` is not callable. - - max_df : float or int, default=1.0 - When building the vocabulary ignore terms that have a document - frequency strictly higher than the given threshold (corpus-specific - stop words). - If float in range [0.0, 1.0], the parameter represents a proportion of - documents, integer absolute counts. - This parameter is ignored if vocabulary is not None. - - min_df : float or int, default=1 - When building the vocabulary ignore terms that have a document - frequency strictly lower than the given threshold. This value is also - called cut-off in the literature. - If float in range of [0.0, 1.0], the parameter represents a proportion - of documents, integer absolute counts. - This parameter is ignored if vocabulary is not None. - - max_features : int, default=None - If not None, build a vocabulary that only consider the top - `max_features` ordered by term frequency across the corpus. - Otherwise, all features are used. - - This parameter is ignored if vocabulary is not None. - - vocabulary : Mapping or iterable, default=None - Either a Mapping (e.g., a dict) where keys are terms and values are - indices in the feature matrix, or an iterable over terms. If not - given, a vocabulary is determined from the input documents. - - binary : bool, default=False - If True, all non-zero term counts are set to 1. This does not mean - outputs will have only 0/1 values, only that the tf term in tf-idf - is binary. (Set idf and normalization to False to get 0/1 outputs). - - dtype : dtype, default=float64 - Type of the matrix returned by fit_transform() or transform(). - - norm : {'l1', 'l2'} or None, default='l2' - Each output row will have unit norm, either: - - - 'l2': Sum of squares of vector elements is 1. The cosine - similarity between two vectors is their dot product when l2 norm has - been applied. - - 'l1': Sum of absolute values of vector elements is 1. - See :func:`preprocessing.normalize`. - - None: No normalization. - - use_idf : bool, default=True - Enable inverse-document-frequency reweighting. If False, idf(t) = 1. - - smooth_idf : bool, default=True - Smooth idf weights by adding one to document frequencies, as if an - extra document was seen containing every term in the collection - exactly once. Prevents zero divisions. - - sublinear_tf : bool, default=False - Apply sublinear tf scaling, i.e. replace tf with 1 + log(tf). - - Attributes - ---------- - vocabulary_ : dict - A mapping of terms to feature indices. - - fixed_vocabulary_ : bool - True if a fixed vocabulary of term to indices mapping - is provided by the user. - - idf_ : array of shape (n_features,) - The inverse document frequency (IDF) vector; only defined - if ``use_idf`` is True. - - stop_words_ : set - Terms that were ignored because they either: - - - occurred in too many documents (`max_df`) - - occurred in too few documents (`min_df`) - - were cut off by feature selection (`max_features`). - - This is only available if no vocabulary was given. - - See Also - -------- - CountVectorizer : Transforms text into a sparse matrix of n-gram counts. - - TfidfTransformer : Performs the TF-IDF transformation from a provided - matrix of counts. - - Notes - ----- - The ``stop_words_`` attribute can get large and increase the model size - when pickling. This attribute is provided only for introspection and can - be safely removed using delattr or set to None before pickling. - - Examples - -------- - >>> from sklearn.feature_extraction.text import TfidfVectorizer - >>> corpus = [ - ... 'This is the first document.', - ... 'This document is the second document.', - ... 'And this is the third one.', - ... 'Is this the first document?', - ... ] - >>> vectorizer = TfidfVectorizer() - >>> X = vectorizer.fit_transform(corpus) - >>> vectorizer.get_feature_names_out() - array(['and', 'document', 'first', 'is', 'one', 'second', 'the', 'third', - 'this'], ...) - >>> print(X.shape) - (4, 9) - """ - - _parameter_constraints: dict = {**CountVectorizer._parameter_constraints} - _parameter_constraints.update( - { - "norm": [StrOptions({"l1", "l2"}), None], - "use_idf": ["boolean"], - "smooth_idf": ["boolean"], - "sublinear_tf": ["boolean"], - } - ) - - def __init__( - self, - *, - input="content", - encoding="utf-8", - decode_error="strict", - strip_accents=None, - lowercase=True, - preprocessor=None, - tokenizer=None, - analyzer="word", - stop_words=None, - token_pattern=r"(?u)\b\w\w+\b", - ngram_range=(1, 1), - max_df=1.0, - min_df=1, - max_features=None, - vocabulary=None, - binary=False, - dtype=np.float64, - norm="l2", - use_idf=True, - smooth_idf=True, - sublinear_tf=False, - ): - super().__init__( - input=input, - encoding=encoding, - decode_error=decode_error, - strip_accents=strip_accents, - lowercase=lowercase, - preprocessor=preprocessor, - tokenizer=tokenizer, - analyzer=analyzer, - stop_words=stop_words, - token_pattern=token_pattern, - ngram_range=ngram_range, - max_df=max_df, - min_df=min_df, - max_features=max_features, - vocabulary=vocabulary, - binary=binary, - dtype=dtype, - ) - self.norm = norm - self.use_idf = use_idf - self.smooth_idf = smooth_idf - self.sublinear_tf = sublinear_tf - - # Broadcast the TF-IDF parameters to the underlying transformer instance - # for easy grid search and repr - - @property - def idf_(self): - """Inverse document frequency vector, only defined if `use_idf=True`. - - Returns - ------- - ndarray of shape (n_features,) - """ - if not hasattr(self, "_tfidf"): - raise NotFittedError( - f"{self.__class__.__name__} is not fitted yet. Call 'fit' with " - "appropriate arguments before using this attribute." - ) - return self._tfidf.idf_ - - @idf_.setter - def idf_(self, value): - if not self.use_idf: - raise ValueError("`idf_` cannot be set when `user_idf=False`.") - if not hasattr(self, "_tfidf"): - # We should support transferring `idf_` from another `TfidfTransformer` - # and therefore, we need to create the transformer instance it does not - # exist yet. - self._tfidf = TfidfTransformer( - norm=self.norm, - use_idf=self.use_idf, - smooth_idf=self.smooth_idf, - sublinear_tf=self.sublinear_tf, - ) - self._validate_vocabulary() - if hasattr(self, "vocabulary_"): - if len(self.vocabulary_) != len(value): - raise ValueError( - "idf length = %d must be equal to vocabulary size = %d" - % (len(value), len(self.vocabulary)) - ) - self._tfidf.idf_ = value - - def _check_params(self): - if self.dtype not in FLOAT_DTYPES: - warnings.warn( - "Only {} 'dtype' should be used. {} 'dtype' will " - "be converted to np.float64.".format(FLOAT_DTYPES, self.dtype), - UserWarning, - ) - - def fit(self, raw_documents, y=None): - """Learn vocabulary and idf from training set. - - Parameters - ---------- - raw_documents : iterable - An iterable which generates either str, unicode or file objects. - - y : None - This parameter is not needed to compute tfidf. - - Returns - ------- - self : object - Fitted vectorizer. - """ - self._validate_params() - self._check_params() - self._warn_for_unused_params() - self._tfidf = TfidfTransformer( - norm=self.norm, - use_idf=self.use_idf, - smooth_idf=self.smooth_idf, - sublinear_tf=self.sublinear_tf, - ) - X = super().fit_transform(raw_documents) - self._tfidf.fit(X) - return self - - def fit_transform(self, raw_documents, y=None): - """Learn vocabulary and idf, return document-term matrix. - - This is equivalent to fit followed by transform, but more efficiently - implemented. - - Parameters - ---------- - raw_documents : iterable - An iterable which generates either str, unicode or file objects. - - y : None - This parameter is ignored. - - Returns - ------- - X : sparse matrix of (n_samples, n_features) - Tf-idf-weighted document-term matrix. - """ - self._check_params() - self._tfidf = TfidfTransformer( - norm=self.norm, - use_idf=self.use_idf, - smooth_idf=self.smooth_idf, - sublinear_tf=self.sublinear_tf, - ) - X = super().fit_transform(raw_documents) - self._tfidf.fit(X) - # X is already a transformed view of raw_documents so - # we set copy to False - return self._tfidf.transform(X, copy=False) - - def transform(self, raw_documents): - """Transform documents to document-term matrix. - - Uses the vocabulary and document frequencies (df) learned by fit (or - fit_transform). - - Parameters - ---------- - raw_documents : iterable - An iterable which generates either str, unicode or file objects. - - Returns - ------- - X : sparse matrix of (n_samples, n_features) - Tf-idf-weighted document-term matrix. - """ - check_is_fitted(self, msg="The TF-IDF vectorizer is not fitted") - - X = super().transform(raw_documents) - return self._tfidf.transform(X, copy=False) - - def _more_tags(self): - return {"X_types": ["string"], "_skip_test": True} From 4b824d73875021278c0285f8e45de7dbe7d75a93 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Fri, 27 Oct 2023 02:21:25 +0330 Subject: [PATCH 64/92] Add files via upload --- linear_algebra/src/gaussian_elimination_pivoting/matrix.txt | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 linear_algebra/src/gaussian_elimination_pivoting/matrix.txt diff --git a/linear_algebra/src/gaussian_elimination_pivoting/matrix.txt b/linear_algebra/src/gaussian_elimination_pivoting/matrix.txt new file mode 100644 index 000000000000..6d21d53bbef4 --- /dev/null +++ b/linear_algebra/src/gaussian_elimination_pivoting/matrix.txt @@ -0,0 +1,4 @@ +5.0 -5.0 -3.0 4.0 -11.0 +1.0 -4.0 6.0 -4.0 -10.0 +-2.0 -5.0 4.0 -5.0 -12.0 +-3.0 -3.0 5.0 -5.0 8.0 \ No newline at end of file From 13fff37b2f7a7f7d70eca35573f55016d1523801 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 26 Oct 2023 22:53:16 +0000 Subject: [PATCH 65/92] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- linear_algebra/src/gaussian_elimination_pivoting/matrix.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting/matrix.txt b/linear_algebra/src/gaussian_elimination_pivoting/matrix.txt index 6d21d53bbef4..dd895ad856ee 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting/matrix.txt +++ b/linear_algebra/src/gaussian_elimination_pivoting/matrix.txt @@ -1,4 +1,4 @@ 5.0 -5.0 -3.0 4.0 -11.0 -1.0 -4.0 6.0 -4.0 -10.0 +1.0 -4.0 6.0 -4.0 -10.0 -2.0 -5.0 4.0 -5.0 -12.0 -3.0 -3.0 5.0 -5.0 8.0 \ No newline at end of file From df346438fdab4c2f3c36d736c93bd8e1a1383662 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Sat, 28 Oct 2023 09:26:58 +0330 Subject: [PATCH 66/92] Update linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py Co-authored-by: Christian Clauss --- .../gaussian_elimination_pivoting.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py index 5c6c2a40e75f..9d6110166c4b 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py @@ -62,7 +62,8 @@ def foo(matrix): return np.asarray(xlst) -vectorofxalpha = foo(matrixab) +if __name__ == "__main__": + vectorofxalpha = foo(matrixab) """Cond(A)""" modifiedb = np.copy(B) From f55e7acf1a9c2fb85f25961cc0a9eae68fb8e6b9 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Sat, 28 Oct 2023 09:39:03 +0330 Subject: [PATCH 67/92] Update gaussian_elimination_pivoting.py --- .../gaussian_elimination_pivoting.py | 139 +++++++++--------- 1 file changed, 69 insertions(+), 70 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py index 9d6110166c4b..fcbfbb9e0c23 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py @@ -1,94 +1,93 @@ import sys import time +from typing import List, Union import numpy as np -matrixab = np.loadtxt("matrix.txt") -B = np.copy(matrixab[:, matrixab.shape[1] - 1]) +def solve_linear_system(matrix: np.ndarray) -> np.ndarray: + """ + Solves a linear system of equations using Gaussian elimination with partial pivoting. + Args: + - matrix (np.ndarray): Coefficient matrix with the last column representing the constants. -def foo(matrix): - start = time.process_time() - ab = np.copy(matrix) - numofrows = ab.shape[0] - numofcolumns = ab.shape[1] - 1 - xlst = [] + Returns: + - np.ndarray: Solution vector. - """Lead element search""" - print("Matrix before leading coefficient search: ") - print(ab) - print(" ") + Raises: + - sys.exit: If the matrix is not correct (i.e., singular). - """Upper triangular matrix""" - for columnnum in range(numofrows): - for i in range(columnnum, numofcolumns): - if abs(ab[i][columnnum]) > abs(ab[columnnum][columnnum]): - ab[[columnnum, i]] = ab[[i, columnnum]] - if ab[columnnum, columnnum] == 0.0: - sys.exit("Matrix is not correct") + Example: + >>> A = np.array([[2, 1, -1], [-3, -1, 2], [-2, 1, 2]], dtype=float) + >>> B = np.array([8, -11, -3], dtype=float) + >>> solution = solve_linear_system(np.column_stack((A, B))) + >>> np.allclose(solution, np.array([2., 3., -1.])) + True + """ + start = time.process_time() + ab = np.copy(matrix) + num_of_rows = ab.shape[0] + num_of_columns = ab.shape[1] - 1 + x_lst: List[Union[int, float]] = [] + + # Lead element search + for column_num in range(num_of_rows): + for i in range(column_num, num_of_columns): + if abs(ab[i][column_num]) > abs(ab[column_num][column_num]): + ab[[column_num, i]] = ab[[i, column_num]] + if ab[column_num, column_num] == 0.0: + raise sys.exit("Matrix is not correct") else: pass - if columnnum != 0: - for i in range(columnnum, numofrows): + if column_num != 0: + for i in range(column_num, num_of_rows): ab[i, :] -= ( - ab[i, columnnum - 1] - / ab[columnnum - 1, columnnum - 1] - * ab[columnnum - 1, :] + ab[i, column_num - 1] + / ab[column_num - 1, column_num - 1] + * ab[column_num - 1, :] ) - print("Upper triangular matrix: ") - print(ab.round(3)) - print(" ") + # Upper triangular matrix + for column_num in range(num_of_rows): + for i in range(column_num, num_of_columns): + if abs(ab[i][column_num]) > abs(ab[column_num][column_num]): + ab[[column_num, i]] = ab[[i, column_num]] + if ab[column_num, column_num] == 0.0: + raise sys.exit("Matrix is not correct") + else: + pass + if column_num != 0: + for i in range(column_num, num_of_rows): + ab[i, :] -= ( + ab[i, column_num - 1] + / ab[column_num - 1, column_num - 1] + * ab[column_num - 1, :] + ) - """Find x vector""" - columnnum = numofrows - while columnnum != 0: - columnnum -= 1 - lineofx = ab[columnnum, numofrows] - if columnnum + 1 != numofrows: - for y in range(1, numofrows - columnnum): - lineofx += -ab[columnnum, numofrows - y] * xlst[y - 1] - x = lineofx / ab[columnnum, columnnum] - xlst.append(x) + # Find x vector + column_num = num_of_rows + while column_num != 0: + column_num -= 1 + line_of_x = ab[column_num, num_of_rows] + if column_num + 1 != num_of_rows: + for y in range(1, num_of_rows - column_num): + line_of_x += -ab[column_num, num_of_rows - y] * x_lst[y - 1] + x = line_of_x / ab[column_num, column_num] + x_lst.append(x) stop = time.process_time() - xlst.reverse() - print("x vector: ") - print(xlst) - print(" ") - print(f"Start time: {start}, End time: {stop}") - print(f"Elapsed time during the whole function in seconds: {stop - start}") - - return np.asarray(xlst) + # Return the solution vector + return np.asarray(x_lst) if __name__ == "__main__": - vectorofxalpha = foo(matrixab) - -"""Cond(A)""" -modifiedb = np.copy(B) -modifiedb[np.argmax(abs(B))] = B[np.argmax(abs(B))] / 100 * 101 - -matrixab[:, matrixab.shape[1] - 1] = modifiedb -print() -print("Cond(A) check: ") -vectorofxbeta = foo(matrixab) - -deltab = modifiedb - B -deltax = vectorofxalpha - vectorofxbeta -print(" ") -conda = abs(np.sum(deltax) / np.sum(vectorofxalpha)) * (np.sum(B) / np.sum(deltab)) -print(f"Cond(A) =< {conda:0.6f}") - - -# Example usage: -# n_size = 3 -# a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) -# b_vector = np.array([10, 11, 12], dtype=float) - -# solution = custom_gauss_elimination_pivoting(a_matrix, b_vector, n_size) -# print("Solution:", solution) + # Example usage: + n_size = 3 + a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) + b_vector = np.array([10, 11, 12], dtype=float) + solution = solve_linear_system(np.column_stack((a_matrix, b_vector))) + print("Solution:", solution) # URL that points to Wikipedia or another similar explanation. # >>>>>>URL:https://courses.engr.illinois.edu/cs357/su2013/lectures/lecture07.pdf<<<<<# From 28a5410e89b3c894741fb591a6abb86d6a61f1be Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 28 Oct 2023 06:09:38 +0000 Subject: [PATCH 68/92] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../gaussian_elimination_pivoting.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py index fcbfbb9e0c23..73d7774c9b24 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py @@ -4,6 +4,7 @@ import numpy as np + def solve_linear_system(matrix: np.ndarray) -> np.ndarray: """ Solves a linear system of equations using Gaussian elimination with partial pivoting. @@ -80,6 +81,7 @@ def solve_linear_system(matrix: np.ndarray) -> np.ndarray: # Return the solution vector return np.asarray(x_lst) + if __name__ == "__main__": # Example usage: n_size = 3 From 30933f41eca287683212f54c272b047a8e076007 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Sat, 28 Oct 2023 09:45:23 +0330 Subject: [PATCH 69/92] Update gaussian_elimination_pivoting.py --- .../gaussian_elimination_pivoting.py | 46 ++++++++++++++----- 1 file changed, 34 insertions(+), 12 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py index 73d7774c9b24..b0423944f8d8 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py @@ -1,25 +1,28 @@ import sys import time -from typing import List, Union import numpy as np def solve_linear_system(matrix: np.ndarray) -> np.ndarray: """ - Solves a linear system of equations using Gaussian elimination with partial pivoting. + Solves a linear system of equations using + Gaussian elimination with partial pivoting. Args: - - matrix (np.ndarray): Coefficient matrix with the last column representing the constants. + - matrix (np.ndarray): Coefficient matrix + with the last column representing the constants. Returns: - np.ndarray: Solution vector. Raises: - - sys.exit: If the matrix is not correct (i.e., singular). + - sys.exit: If the matrix is not correct + (i.e., singular). Example: - >>> A = np.array([[2, 1, -1], [-3, -1, 2], [-2, 1, 2]], dtype=float) + >>> A = np.array([[2, 1, -1], [-3, -1, 2] + , [-2, 1, 2]], dtype=float) >>> B = np.array([8, -11, -3], dtype=float) >>> solution = solve_linear_system(np.column_stack((A, B))) >>> np.allclose(solution, np.array([2., 3., -1.])) @@ -81,15 +84,34 @@ def solve_linear_system(matrix: np.ndarray) -> np.ndarray: # Return the solution vector return np.asarray(x_lst) - if __name__ == "__main__": - # Example usage: - n_size = 3 - a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) - b_vector = np.array([10, 11, 12], dtype=float) + vectorofxalpha = foo(matrixab) + + + """Cond(A)""" + modifiedb = np.copy(B) + modifiedb[np.argmax(abs(B))] = B[np.argmax(abs(B))] / 100 * 101 + + matrixab[:, matrixab.shape[1] - 1] = modifiedb + print() + print("Cond(A) check: ") + vectorofxbeta = foo(matrixab) + + deltab = modifiedb - B + deltax = vectorofxalpha - vectorofxbeta + print(" ") + conda = abs(np.sum(deltax) / np.sum(vectorofxalpha)) * (np.sum(B) / np.sum(deltab)) + print(f"Cond(A) =< {conda:0.6f}") + + +# Example usage: +# n_size = 3 +# a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) +# b_vector = np.array([10, 11, 12], dtype=float) + +# solution = custom_gauss_elimination_pivoting(a_matrix, b_vector, n_size) +# print("Solution:", solution) - solution = solve_linear_system(np.column_stack((a_matrix, b_vector))) - print("Solution:", solution) # URL that points to Wikipedia or another similar explanation. # >>>>>>URL:https://courses.engr.illinois.edu/cs357/su2013/lectures/lecture07.pdf<<<<<# From ea6ad3b2fd4ec59ba18ca9a8835b8f0889c466d3 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 28 Oct 2023 06:15:58 +0000 Subject: [PATCH 70/92] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../gaussian_elimination_pivoting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py index b0423944f8d8..c37d3dcca308 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py @@ -84,10 +84,10 @@ def solve_linear_system(matrix: np.ndarray) -> np.ndarray: # Return the solution vector return np.asarray(x_lst) + if __name__ == "__main__": vectorofxalpha = foo(matrixab) - """Cond(A)""" modifiedb = np.copy(B) modifiedb[np.argmax(abs(B))] = B[np.argmax(abs(B))] / 100 * 101 From d5f04f698e87003a65814b6677f4765ea2d06eb1 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Sat, 28 Oct 2023 09:47:40 +0330 Subject: [PATCH 71/92] Update gaussian_elimination_pivoting.py --- .../gaussian_elimination_pivoting.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py index c37d3dcca308..0f544adfd775 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py @@ -1,8 +1,12 @@ import sys import time +from typing import List, Union import numpy as np +matrixab = np.loadtxt("matrix.txt") +B = np.copy(matrixab[:, matrixab.shape[1] - 1]) + def solve_linear_system(matrix: np.ndarray) -> np.ndarray: """ From caf3a973012417173d3ca76c9c7a2af03af33ffb Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Sat, 28 Oct 2023 09:52:50 +0330 Subject: [PATCH 72/92] Update gaussian_elimination_pivoting.py --- .../gaussian_elimination_pivoting.py | 39 ++++++------------- 1 file changed, 11 insertions(+), 28 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py index 0f544adfd775..65fdc796eab8 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py @@ -1,13 +1,9 @@ import sys import time -from typing import List, Union +from typing import Union import numpy as np -matrixab = np.loadtxt("matrix.txt") -B = np.copy(matrixab[:, matrixab.shape[1] - 1]) - - def solve_linear_system(matrix: np.ndarray) -> np.ndarray: """ Solves a linear system of equations using @@ -25,18 +21,17 @@ def solve_linear_system(matrix: np.ndarray) -> np.ndarray: (i.e., singular). Example: - >>> A = np.array([[2, 1, -1], [-3, -1, 2] - , [-2, 1, 2]], dtype=float) + >>> A = np.array([[2, 1, -1], [-3, -1, 2], + [-2, 1, 2]], dtype=float) >>> B = np.array([8, -11, -3], dtype=float) >>> solution = solve_linear_system(np.column_stack((A, B))) >>> np.allclose(solution, np.array([2., 3., -1.])) True """ - start = time.process_time() ab = np.copy(matrix) num_of_rows = ab.shape[0] num_of_columns = ab.shape[1] - 1 - x_lst: List[Union[int, float]] = [] + x_lst = [] # Lead element search for column_num in range(num_of_rows): @@ -83,29 +78,17 @@ def solve_linear_system(matrix: np.ndarray) -> np.ndarray: x = line_of_x / ab[column_num, column_num] x_lst.append(x) - stop = time.process_time() - # Return the solution vector return np.asarray(x_lst) - if __name__ == "__main__": - vectorofxalpha = foo(matrixab) - - """Cond(A)""" - modifiedb = np.copy(B) - modifiedb[np.argmax(abs(B))] = B[np.argmax(abs(B))] / 100 * 101 - - matrixab[:, matrixab.shape[1] - 1] = modifiedb - print() - print("Cond(A) check: ") - vectorofxbeta = foo(matrixab) - - deltab = modifiedb - B - deltax = vectorofxalpha - vectorofxbeta - print(" ") - conda = abs(np.sum(deltax) / np.sum(vectorofxalpha)) * (np.sum(B) / np.sum(deltab)) - print(f"Cond(A) =< {conda:0.6f}") + # Example usage: + n_size = 3 + a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) + b_vector = np.array([10, 11, 12], dtype=float) + + solution = solve_linear_system(np.column_stack((a_matrix, b_vector))) + print("Solution:", solution) # Example usage: From 6096e35ba317c942f36a7eae63c974f4d9a3a75d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 28 Oct 2023 06:23:25 +0000 Subject: [PATCH 73/92] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../gaussian_elimination_pivoting.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py index 65fdc796eab8..0cf754100681 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py @@ -4,6 +4,7 @@ import numpy as np + def solve_linear_system(matrix: np.ndarray) -> np.ndarray: """ Solves a linear system of equations using @@ -81,6 +82,7 @@ def solve_linear_system(matrix: np.ndarray) -> np.ndarray: # Return the solution vector return np.asarray(x_lst) + if __name__ == "__main__": # Example usage: n_size = 3 From 606667c06f0b685aa94f4333c2a9cda24022a4cd Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Sat, 28 Oct 2023 09:56:44 +0330 Subject: [PATCH 74/92] Update gaussian_elimination_pivoting.py --- .../gaussian_elimination_pivoting.py | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py index 0cf754100681..6a206de22d7e 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py @@ -1,29 +1,21 @@ import sys -import time -from typing import Union - import numpy as np - def solve_linear_system(matrix: np.ndarray) -> np.ndarray: """ - Solves a linear system of equations using - Gaussian elimination with partial pivoting. + Solves a linear system of equations using Gaussian elimination with partial pivoting. Args: - - matrix (np.ndarray): Coefficient matrix - with the last column representing the constants. + - matrix (np.ndarray): Coefficient matrix with the last column representing the constants. Returns: - np.ndarray: Solution vector. Raises: - - sys.exit: If the matrix is not correct - (i.e., singular). + - sys.exit: If the matrix is not correct (i.e., singular). Example: - >>> A = np.array([[2, 1, -1], [-3, -1, 2], - [-2, 1, 2]], dtype=float) + >>> A = np.array([[2, 1, -1], [-3, -1, 2], [-2, 1, 2]], dtype=float) >>> B = np.array([8, -11, -3], dtype=float) >>> solution = solve_linear_system(np.column_stack((A, B))) >>> np.allclose(solution, np.array([2., 3., -1.])) @@ -82,7 +74,6 @@ def solve_linear_system(matrix: np.ndarray) -> np.ndarray: # Return the solution vector return np.asarray(x_lst) - if __name__ == "__main__": # Example usage: n_size = 3 From 8594cc039731c4b262511c81ac66f860aae02b2c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 28 Oct 2023 06:27:19 +0000 Subject: [PATCH 75/92] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../gaussian_elimination_pivoting.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py index 6a206de22d7e..d606168ab862 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py @@ -1,6 +1,7 @@ import sys import numpy as np + def solve_linear_system(matrix: np.ndarray) -> np.ndarray: """ Solves a linear system of equations using Gaussian elimination with partial pivoting. @@ -74,6 +75,7 @@ def solve_linear_system(matrix: np.ndarray) -> np.ndarray: # Return the solution vector return np.asarray(x_lst) + if __name__ == "__main__": # Example usage: n_size = 3 From ed58bab2c34eb04c862646cf6b3a8b2911579bc7 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Sat, 28 Oct 2023 09:57:54 +0330 Subject: [PATCH 76/92] Update gaussian_elimination_pivoting.py --- .../gaussian_elimination_pivoting.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py index d606168ab862..a2760621e12f 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py @@ -1,13 +1,16 @@ import sys + import numpy as np def solve_linear_system(matrix: np.ndarray) -> np.ndarray: """ - Solves a linear system of equations using Gaussian elimination with partial pivoting. + Solves a linear system of equations using + Gaussian elimination with partial pivoting. Args: - - matrix (np.ndarray): Coefficient matrix with the last column representing the constants. + - matrix (np.ndarray): Coefficient matrix + with the last column representing the constants. Returns: - np.ndarray: Solution vector. From 29f3dffc530ad6101d6b83ebe51c90db3071eb93 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 28 Oct 2023 06:28:28 +0000 Subject: [PATCH 77/92] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../gaussian_elimination_pivoting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py index a2760621e12f..c164909d524a 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py @@ -5,11 +5,11 @@ def solve_linear_system(matrix: np.ndarray) -> np.ndarray: """ - Solves a linear system of equations using + Solves a linear system of equations using Gaussian elimination with partial pivoting. Args: - - matrix (np.ndarray): Coefficient matrix + - matrix (np.ndarray): Coefficient matrix with the last column representing the constants. Returns: From c5981eec1724b2d8573eb2347ab2ed52a10e8261 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Sat, 28 Oct 2023 09:58:59 +0330 Subject: [PATCH 78/92] Update gaussian_elimination_pivoting.py From bc53093d60629c15e92c700941325190b3866eae Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Sat, 28 Oct 2023 10:02:12 +0330 Subject: [PATCH 79/92] Update gaussian_elimination_pivoting.py --- .../gaussian_elimination_pivoting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py index c164909d524a..bf8d90e8a6b6 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py @@ -28,7 +28,7 @@ def solve_linear_system(matrix: np.ndarray) -> np.ndarray: ab = np.copy(matrix) num_of_rows = ab.shape[0] num_of_columns = ab.shape[1] - 1 - x_lst = [] + x_lst: List[float] = [] # Lead element search for column_num in range(num_of_rows): From 074737bc45966c7167cab0149ecd74fc212bc0be Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Sat, 28 Oct 2023 10:04:56 +0330 Subject: [PATCH 80/92] Update gaussian_elimination_pivoting.py --- .../gaussian_elimination_pivoting.py | 1 + 1 file changed, 1 insertion(+) diff --git a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py index bf8d90e8a6b6..81d4e12e8fe7 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py @@ -1,4 +1,5 @@ import sys +from typing import List import numpy as np From 5148009de68f4fbd225d06967c321b69b384156e Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Sat, 28 Oct 2023 10:08:35 +0330 Subject: [PATCH 81/92] Update gaussian_elimination_pivoting.py --- .../gaussian_elimination_pivoting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py index 81d4e12e8fe7..a945c1556d87 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py @@ -29,7 +29,7 @@ def solve_linear_system(matrix: np.ndarray) -> np.ndarray: ab = np.copy(matrix) num_of_rows = ab.shape[0] num_of_columns = ab.shape[1] - 1 - x_lst: List[float] = [] + x_lst: list[float] = [] # Lead element search for column_num in range(num_of_rows): From 52d7cfa62638407af63f118b4652ef809b9f51fc Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Sat, 28 Oct 2023 10:10:38 +0330 Subject: [PATCH 82/92] Update gaussian_elimination_pivoting.py --- .../gaussian_elimination_pivoting.py | 1 - 1 file changed, 1 deletion(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py index a945c1556d87..a5b76c72f9ce 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py @@ -1,5 +1,4 @@ import sys -from typing import List import numpy as np From 1d628aec54e978c5d13c2f80a139f2e23e3d2432 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Mon, 30 Oct 2023 09:19:39 +0330 Subject: [PATCH 83/92] Update gaussian_elimination_pivoting.py --- .../gaussian_elimination_pivoting.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py index a5b76c72f9ce..7ed10bdc6d5d 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py @@ -80,12 +80,15 @@ def solve_linear_system(matrix: np.ndarray) -> np.ndarray: if __name__ == "__main__": - # Example usage: - n_size = 3 - a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) - b_vector = np.array([10, 11, 12], dtype=float) + # Read the matrix from a .txt file in the same folder as the source code + file_path = "matrix.txt" + try: + matrix = np.loadtxt(file_path) + except FileNotFoundError: + sys.exit(f"Error: File '{file_path}' not found.") - solution = solve_linear_system(np.column_stack((a_matrix, b_vector))) + # Example usage: + solution = solve_linear_system(matrix) print("Solution:", solution) From 65a782b1594e860f10649158da43dddff843a932 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Mon, 30 Oct 2023 09:25:09 +0330 Subject: [PATCH 84/92] Update gaussian_elimination_pivoting.py --- .../gaussian_elimination_pivoting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py index 7ed10bdc6d5d..6d69a2c5e6d1 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py @@ -83,9 +83,9 @@ def solve_linear_system(matrix: np.ndarray) -> np.ndarray: # Read the matrix from a .txt file in the same folder as the source code file_path = "matrix.txt" try: - matrix = np.loadtxt(file_path) + matrix = np.loadtxt(linear_algebra/src/gaussian_elimination_pivoting/matrix.txt) except FileNotFoundError: - sys.exit(f"Error: File '{file_path}' not found.") + sys.exit(f"Error: File '{linear_algebra/src/gaussian_elimination_pivoting/matrix.txt}' not found.") # Example usage: solution = solve_linear_system(matrix) From dd169557a10df338ccae7aa01e4eb84a5e613af5 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 30 Oct 2023 05:55:44 +0000 Subject: [PATCH 85/92] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../gaussian_elimination_pivoting.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py index 6d69a2c5e6d1..db620555089c 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py @@ -83,9 +83,13 @@ def solve_linear_system(matrix: np.ndarray) -> np.ndarray: # Read the matrix from a .txt file in the same folder as the source code file_path = "matrix.txt" try: - matrix = np.loadtxt(linear_algebra/src/gaussian_elimination_pivoting/matrix.txt) + matrix = np.loadtxt( + linear_algebra / src / gaussian_elimination_pivoting / matrix.txt + ) except FileNotFoundError: - sys.exit(f"Error: File '{linear_algebra/src/gaussian_elimination_pivoting/matrix.txt}' not found.") + sys.exit( + f"Error: File '{linear_algebra/src/gaussian_elimination_pivoting/matrix.txt}' not found." + ) # Example usage: solution = solve_linear_system(matrix) From b81729fc11b63aa3c6a1915a7e3a4e3be9cb7614 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Mon, 30 Oct 2023 09:27:42 +0330 Subject: [PATCH 86/92] Update gaussian_elimination_pivoting.py --- .../gaussian_elimination_pivoting.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py index db620555089c..9f4791ca5cdc 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py @@ -83,13 +83,9 @@ def solve_linear_system(matrix: np.ndarray) -> np.ndarray: # Read the matrix from a .txt file in the same folder as the source code file_path = "matrix.txt" try: - matrix = np.loadtxt( - linear_algebra / src / gaussian_elimination_pivoting / matrix.txt - ) + matrixab = np.loadtxt(Path(__file__).parent / "matrix.txt") except FileNotFoundError: - sys.exit( - f"Error: File '{linear_algebra/src/gaussian_elimination_pivoting/matrix.txt}' not found." - ) + sys.exit(f"Error: File not found.") # Example usage: solution = solve_linear_system(matrix) From 8cc0a36cb602c12bfbec49fe38afca23a414ceeb Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Mon, 30 Oct 2023 09:34:12 +0330 Subject: [PATCH 87/92] Update gaussian_elimination_pivoting.py --- .../gaussian_elimination_pivoting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py index 9f4791ca5cdc..70aafc501d04 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py @@ -1,4 +1,5 @@ import sys +from pathlib import Path import numpy as np @@ -80,10 +81,9 @@ def solve_linear_system(matrix: np.ndarray) -> np.ndarray: if __name__ == "__main__": - # Read the matrix from a .txt file in the same folder as the source code file_path = "matrix.txt" try: - matrixab = np.loadtxt(Path(__file__).parent / "matrix.txt") + matrix = np.loadtxt(Path(__file__).parent / "matrix.txt") except FileNotFoundError: sys.exit(f"Error: File not found.") From 2b8c8b03bbee8e3a15f60371482aa143664d9ced Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Mon, 30 Oct 2023 09:37:36 +0330 Subject: [PATCH 88/92] Update gaussian_elimination_pivoting.py --- .../gaussian_elimination_pivoting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py index 70aafc501d04..57553fbd0466 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py @@ -85,7 +85,7 @@ def solve_linear_system(matrix: np.ndarray) -> np.ndarray: try: matrix = np.loadtxt(Path(__file__).parent / "matrix.txt") except FileNotFoundError: - sys.exit(f"Error: File not found.") + sys.exit("Error: File not found.") # Example usage: solution = solve_linear_system(matrix) From ec6aedd0f5d79981a87ecc71d2e4fce7f1376151 Mon Sep 17 00:00:00 2001 From: Mohammad Esfandiyar Date: Mon, 30 Oct 2023 12:32:43 +0330 Subject: [PATCH 89/92] Update gaussian_elimination_pivoting.py --- .../gaussian_elimination_pivoting.py | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py index 57553fbd0466..a98c8ce9d00f 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py @@ -65,16 +65,12 @@ def solve_linear_system(matrix: np.ndarray) -> np.ndarray: * ab[column_num - 1, :] ) - # Find x vector - column_num = num_of_rows - while column_num != 0: - column_num -= 1 - line_of_x = ab[column_num, num_of_rows] - if column_num + 1 != num_of_rows: - for y in range(1, num_of_rows - column_num): - line_of_x += -ab[column_num, num_of_rows - y] * x_lst[y - 1] - x = line_of_x / ab[column_num, column_num] - x_lst.append(x) + # Find x vector (Back Substitution) + for column_num in range(num_of_rows - 1, -1, -1): + x = ab[column_num, -1] / ab[column_num, column_num] + x_lst.insert(0, x) + for i in range(column_num - 1, -1, -1): + ab[i, -1] -= ab[i, column_num] * x # Return the solution vector return np.asarray(x_lst) @@ -92,6 +88,7 @@ def solve_linear_system(matrix: np.ndarray) -> np.ndarray: print("Solution:", solution) + # Example usage: # n_size = 3 # a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) From a5b345f0f341331d9193cfe52328c09244a4cd67 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 30 Oct 2023 09:03:49 +0000 Subject: [PATCH 90/92] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../gaussian_elimination_pivoting.py | 1 - 1 file changed, 1 deletion(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py index a98c8ce9d00f..c71a43516c15 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py @@ -88,7 +88,6 @@ def solve_linear_system(matrix: np.ndarray) -> np.ndarray: print("Solution:", solution) - # Example usage: # n_size = 3 # a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) From 20462d152d9cfc765e075f9a1c46cbdd858a7827 Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Mon, 30 Oct 2023 14:10:14 +0100 Subject: [PATCH 91/92] Update gaussian_elimination_pivoting.py --- .../gaussian_elimination_pivoting.py | 56 +++++++++---------- 1 file changed, 26 insertions(+), 30 deletions(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py index c71a43516c15..c77dd9e2472f 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py @@ -1,23 +1,30 @@ -import sys -from pathlib import Path - import numpy as np +matrix = np.array( + [ + [5.0, -5.0, -3.0, 4.0, -11.0], + [1.0, -4.0, 6.0, -4.0, -10.0], + [-2.0, -5.0, 4.0, -5.0, -12.0], + [-3.0, -3.0, 5.0, -5.0, 8.0], + ], + dtype=float, +) + def solve_linear_system(matrix: np.ndarray) -> np.ndarray: """ - Solves a linear system of equations using - Gaussian elimination with partial pivoting. + Solve a linear system of equations using Gaussian elimination with partial pivoting Args: - - matrix (np.ndarray): Coefficient matrix - with the last column representing the constants. + - matrix: Coefficient matrix with the last column representing the constants. Returns: - - np.ndarray: Solution vector. + - Solution vector. Raises: - - sys.exit: If the matrix is not correct (i.e., singular). + - ValueError: If the matrix is not correct (i.e., singular). + + https://courses.engr.illinois.edu/cs357/su2013/lect.htm Lecture 7 Example: >>> A = np.array([[2, 1, -1], [-3, -1, 2], [-2, 1, 2]], dtype=float) @@ -25,6 +32,7 @@ def solve_linear_system(matrix: np.ndarray) -> np.ndarray: >>> solution = solve_linear_system(np.column_stack((A, B))) >>> np.allclose(solution, np.array([2., 3., -1.])) True + >>> solve_linear_system(np.column_stack(([[]], [[1]]))) """ ab = np.copy(matrix) num_of_rows = ab.shape[0] @@ -37,7 +45,7 @@ def solve_linear_system(matrix: np.ndarray) -> np.ndarray: if abs(ab[i][column_num]) > abs(ab[column_num][column_num]): ab[[column_num, i]] = ab[[i, column_num]] if ab[column_num, column_num] == 0.0: - raise sys.exit("Matrix is not correct") + raise ValueError("Matrix is not correct") else: pass if column_num != 0: @@ -54,7 +62,7 @@ def solve_linear_system(matrix: np.ndarray) -> np.ndarray: if abs(ab[i][column_num]) > abs(ab[column_num][column_num]): ab[[column_num, i]] = ab[[i, column_num]] if ab[column_num, column_num] == 0.0: - raise sys.exit("Matrix is not correct") + raise ValueError("Matrix is not correct") else: pass if column_num != 0: @@ -77,25 +85,13 @@ def solve_linear_system(matrix: np.ndarray) -> np.ndarray: if __name__ == "__main__": - file_path = "matrix.txt" + from pathlib import Path + + file_path = Path(__file__).parent / "matrix.txt" try: - matrix = np.loadtxt(Path(__file__).parent / "matrix.txt") + matrix = np.loadtxt(file_path) except FileNotFoundError: - sys.exit("Error: File not found.") - - # Example usage: - solution = solve_linear_system(matrix) - print("Solution:", solution) - - -# Example usage: -# n_size = 3 -# a_matrix = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) -# b_vector = np.array([10, 11, 12], dtype=float) - -# solution = custom_gauss_elimination_pivoting(a_matrix, b_vector, n_size) -# print("Solution:", solution) - + print(f"Error: {file_path} not found. Using default matrix instead.") -# URL that points to Wikipedia or another similar explanation. -# >>>>>>URL:https://courses.engr.illinois.edu/cs357/su2013/lectures/lecture07.pdf<<<<<# + print(f"Matrix:\n{matrix}") + print(f"{solve_linear_system(matrix) = }") From 7ff1a46db771789e5eb40a9d25606af27aede935 Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Mon, 30 Oct 2023 14:17:37 +0100 Subject: [PATCH 92/92] Update gaussian_elimination_pivoting.py --- .../gaussian_elimination_pivoting.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py index c77dd9e2472f..2a86350e9fc6 100644 --- a/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py +++ b/linear_algebra/src/gaussian_elimination_pivoting/gaussian_elimination_pivoting.py @@ -32,7 +32,8 @@ def solve_linear_system(matrix: np.ndarray) -> np.ndarray: >>> solution = solve_linear_system(np.column_stack((A, B))) >>> np.allclose(solution, np.array([2., 3., -1.])) True - >>> solve_linear_system(np.column_stack(([[]], [[1]]))) + >>> solve_linear_system(np.array([[0, 0], [0, 0]], dtype=float)) + array([nan, nan]) """ ab = np.copy(matrix) num_of_rows = ab.shape[0] @@ -85,13 +86,16 @@ def solve_linear_system(matrix: np.ndarray) -> np.ndarray: if __name__ == "__main__": + from doctest import testmod from pathlib import Path + testmod() file_path = Path(__file__).parent / "matrix.txt" try: matrix = np.loadtxt(file_path) except FileNotFoundError: print(f"Error: {file_path} not found. Using default matrix instead.") + # Example usage: print(f"Matrix:\n{matrix}") print(f"{solve_linear_system(matrix) = }")