cristianoc · cristianoc · Aug 8, 2024 · Aug 8, 2024 · Aug 8, 2024 · Aug 8, 2024
diff --git a/augment.py b/augment.py
@@ -0,0 +1,41 @@
+import numpy as np
+from sklearn.linear_model import LogisticRegression
+
+# Augmentation parameters
+n_values = [2, 3, 4]
+
+# Generate a larger training dataset
+x_large_train = np.arange(0, 1000)
+X_large_augmented = np.hstack([x_large_train.reshape(-1, 1) % n for n in n_values])
+y_large_mod_3 = x_large_train % 3
+
+# Train the logistic regression model for x % 3 with more data
+logistic_large_mod_3 = LogisticRegression(multi_class='multinomial', max_iter=1000, solver='lbfgs')
+logistic_large_mod_3.fit(X_large_augmented, y_large_mod_3)
+
+# Generate test data far outside the training range
+x_far_test = np.arange(10000, 10100)
+X_far_test_augmented = np.hstack([x_far_test.reshape(-1, 1) % n for n in n_values])
+y_far_test = x_far_test % 3
+
+# Evaluate the logistic regression model for x % 3 on far away test data
+probs_far_mod_3 = logistic_large_mod_3.predict_proba(X_far_test_augmented)
+far_test_accuracy_mod_3 = logistic_large_mod_3.score(X_far_test_augmented, y_far_test)
+
+# Calculate confidence levels for the far away test data
+confidence_levels_far_mod_3 = []
+for prob in probs_far_mod_3:
+    max_prob = max(prob)
+    if max_prob >= 0.8:
+        confidence_levels_far_mod_3.append("High")
+    elif max_prob >= 0.6:
+        confidence_levels_far_mod_3.append("Medium")
+    else:
+        confidence_levels_far_mod_3.append("Low")
+
+# Display the test accuracy and the confidence levels for the first few instances
+print("Test Accuracy for x % 3:", far_test_accuracy_mod_3)
+print("Prediction Probabilities for first 5 instances:")
+print(probs_far_mod_3[:5])
+print("Confidence Levels for first 5 instances:")
+print(confidence_levels_far_mod_3[:5])
diff --git a/augment2d.py b/augment2d.py
@@ -0,0 +1,222 @@
+from matplotlib import colors
+import numpy as np
+from sklearn.linear_model import LogisticRegression
+import matplotlib.pyplot as plt
+from typing import Any, List, Tuple, Callable
+
+# Type aliases for readability
+Array = np.ndarray[np.int64]
+FeatureFunction = Callable[[Array, Array, int], Array]
+plt: Any = plt
+
+# Augmentation parameters
+modulo_n_values: List[int] = [2, 3, 4]
+
+# Grid pattern generation
+
+
+def generate_grid(size: int, modulo: int) -> Tuple[Array, Array, Array]:
+    x_: Array = np.arange(size)
+    y_: Array = np.arange(size)
+    x, y = np.meshgrid(x_, y_) # type: ignore
+    color_indices : Array = (x + y) % modulo # type: ignore
+    return x, y, color_indices # type: ignore
+
+
+# List of operations to be applied for augmentation and their corresponding names
+operations: List[Tuple[FeatureFunction, str]] = [
+    (lambda x, y, n: x % n, "x % {n}"),
+    (lambda x, y, n: y % n, "y % {n}"),
+    (lambda x, y, n: (x + y) % n, "(x + y) % {n}"),
+    (lambda x, y, n: (x - y + n) % n, "(x - y + {n}) % {n}"),
+    (lambda x, y, n: (y - x + n) % n, "(y - x + {n}) % {n}")
+] # type: ignore
+
+# Feature augmentation
+
+
+def augment_features(X: Array, Y: Array, n_values: List[int]) -> Array:
+    X_flat = X.flatten()
+    Y_flat = Y.flatten()
+
+    features: List[Array] = []
+    for op, _ in operations:
+        for n in n_values:
+            features.append(op(X_flat, Y_flat, n))
+
+    return np.column_stack(features) # type: ignore
+
+# Generate feature function names
+
+
+def generate_feature_functions(n_values: List[int]) -> List[str]:
+    feature_functions: List[str] = []
+    for _, name_template in operations:
+        for n in n_values:
+            feature_functions.append(name_template.format(n=n))
+    return feature_functions
+
+# Train logistic regression model
+
+
+def train_model(X: Array, y: Array) -> LogisticRegression:
+    model = LogisticRegression(
+        multi_class='multinomial',
+        max_iter=1000,
+        solver='lbfgs'
+    )
+    model.fit(X, y) # type: ignore
+    return model
+
+# Evaluate the model
+
+
+def evaluate_model(model: LogisticRegression, X: Array, y: Array) -> Tuple[float, Array, Array, List[str]]:
+    probs = model.predict_proba(X) # type: ignore
+    accuracy = model.score(X, y) # type: ignore
+    predictions = model.predict(X) # type: ignore
+    confidence_levels: List[str] = []
+    for prob in probs: # type: ignore
+        max_prob = max(prob) # type: ignore
+        if max_prob >= 0.8:
+            confidence_levels.append("High")
+        elif max_prob >= 0.6:
+            confidence_levels.append("Medium")
+        else:
+            confidence_levels.append("Low")
+    return accuracy, probs, predictions, confidence_levels # type: ignore
+
+# Print model coefficients
+
+
+def print_model_coefficients(model_: LogisticRegression, feature_functions: List[str]) -> None:
+    model : Any = model_
+    coefficients = model.coef_[0]
+    intercept = model.intercept_[0]
+    print("Learned function:")
+    for coef, func in zip(coefficients, feature_functions):
+        rounded_coef = round(coef, 2)
+        if rounded_coef != 0:
+            print(f"{rounded_coef} * {func}")
+    print("Intercept:", round(intercept, 2))
+
+# Derive the most likely predicted function
+
+
+def derive_predicted_function(model: LogisticRegression, feature_functions: List[str]) -> str:
+    coefficients = model.coef_[0] # type: ignore
+
+    # Identify the most significant term(s) (highest absolute value of coefficients)
+    significant_terms = sorted( # type: ignore
+        zip(coefficients, feature_functions), key=lambda x: abs(x[0]), reverse=True) # type: ignore
+
+    # Get the top significant term
+    top_term = significant_terms[0][1]
+
+    # Construct the function
+    predicted_function = f"f(x, y) => {top_term}"
+
+    return predicted_function
+
+# Simplified predicted class function based on significant terms
+
+
+def predicted_class(x: int, y: int):
+    term1 = (x + y) % 4
+
+    # Considering the intercept and most significant terms
+    if term1 in [0, 2]:
+        return 0
+    else:
+        return 1
+
+
+# Define the custom color scheme as a list of colors
+color_scheme = [
+    '#000000',  # black
+    '#0074D9',  # blue
+    '#FF4136',  # red
+    '#2ECC40',  # green
+    '#FFDC00',  # yellow
+    '#AAAAAA',  # grey
+    '#F012BE',  # fuschia
+    '#FF851B',  # orange
+    '#870C25',   # brown
+    '#7FDBFF',  # teal
+]
+
+cmap = colors.ListedColormap(color_scheme)
+
+
+# Visualization
+
+# Simplified Visualization
+
+def visualize_results(true_color_indices: Array, predicted_color_indices: Array, grid_size: int) -> None:
+    true_grid = true_color_indices.reshape(grid_size, grid_size)
+    predicted_grid = predicted_color_indices.reshape(grid_size, grid_size)
+
+    _fig, axs = plt.subplots(1, 2, figsize=(12, 5))
+
+    # Plot the expected checkerboard pattern
+    axs[0].imshow(true_grid, cmap='viridis')
+    axs[0].set_title("Expected Checkerboard Pattern")
+    axs[0].axis('off')
+
+    # Plot the predicted checkerboard pattern
+    axs[1].imshow(predicted_grid, cmap='viridis')
+    axs[1].set_title("Predicted Checkerboard Pattern")
+    axs[1].axis('off')
+
+    plt.tight_layout()
+    plt.show()
+
+# Main function
+
+
+def main() -> None:
+    # Generate training data (checkerboard pattern)
+    grid_size = 100
+    modulo = 4
+    X_train, Y_train, color_indices_train = generate_grid(grid_size, modulo)
+    X_augmented_train = augment_features(X_train, Y_train, modulo_n_values)
+
+    # Train the logistic regression model
+    logistic_model = train_model(
+        X_augmented_train, color_indices_train.flatten())
+
+    # Generate feature functions
+    feature_functions = generate_feature_functions(modulo_n_values)
+
+    # Print the model coefficients
+    print_model_coefficients(logistic_model, feature_functions)
+
+    # Derive and print the most likely predicted function
+    predicted_function = derive_predicted_function(
+        logistic_model, feature_functions)
+    print("The most likely predicted function:")
+    print(predicted_function)
+
+    # Generate test data (different range to test generalization)
+    test_grid_size = 100
+    X_test, Y_test, color_indices_test = generate_grid(test_grid_size, modulo)
+    X_augmented_test = augment_features(X_test, Y_test, modulo_n_values)
+
+    # Evaluate the logistic regression model
+    test_accuracy, probs_test, predictions, confidence_levels_test = evaluate_model(
+        logistic_model, X_augmented_test, color_indices_test.flatten())
+
+    # Display the test accuracy and the confidence levels for the first few instances
+    print("Test Accuracy for checkerboard pattern:", test_accuracy)
+    print("Prediction Probabilities for first 5 instances:")
+    print(probs_test[:5])
+    print("Confidence Levels for first 5 instances:")
+    print(confidence_levels_test[:5])
+
+    # Visualize the results
+    visualize_results(color_indices_test.flatten(),
+                      predictions, test_grid_size)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/fft.py b/fft.py
@@ -0,0 +1,42 @@
+import numpy.typing as npt
+import numpy as np
+
+num_points = 1000
+
+x: npt.NDArray[np.int_] = np.arange(0, num_points)
+
+def dominant_frequency_likelihood(y: npt.NDArray[np.int_]) -> float:
+    # Perform Fourier Transform on the dataset
+    fft_values = np.fft.fft(y)
+
+    # Get the absolute values (magnitudes) of the FFT
+    magnitudes = np.abs(fft_values)
+
+    # Get the frequencies corresponding to FFT values
+    frequencies = np.fft.fftfreq(len(y), d=1)  # d=1 means sample spacing is 1
+
+    # Find the index of the maximum magnitude, excluding the zero-frequency component
+    dominant_frequency_index = np.argmax(magnitudes[1:]) + 1
+    dominant_magnitude = magnitudes[dominant_frequency_index]
+
+    # Calculate the average magnitude, excluding the zero-frequency component
+    average_magnitude = np.mean(magnitudes[1:])
+
+    # Calculate the ratio of the dominant magnitude to the average magnitude
+    likelihood = dominant_magnitude / average_magnitude
+
+    return likelihood
+
+def test_mod3():
+    y = x % 3
+    likelihood = dominant_frequency_likelihood(y)
+    print(f"Likelihood of periodic patterns in mod3: {likelihood}")
+
+def test_random():
+    y: npt.NDArray[np.int_] = np.random.randint(0, 100, size=num_points) # type: ignore
+    likelihood = dominant_frequency_likelihood(y)
+    print(f"Likelihood of periodic patterns in random: {likelihood}")
+
+# Run the tests
+test_mod3()
+test_random()