main.py

import glob
import os
from typing import Any

import matplotlib.pyplot as plt  # noqa: E402
import numpy as np  # noqa: E402
import pandas as pd  # noqa: E402
from netCDF4 import Dataset, Variable  # noqa: E402
from numpy import ndarray
from numpy.ma import MaskedArray
from numpy.typing import NDArray
from pandas import DataFrame

VariableSubset = NDArray | MaskedArray | Any

number_of_columns: int = 5

# NB: A "run" means a set of simulation results for one investigation, so for example, all aky_* files in `parameter_scans/beta_scan/aky/aky_*.in`` would be one run.
# A "scan" refers to a single simulation result, so e.g. one single aky file.


def remove_values_from_list(the_list, val):
    return [value for value in the_list if value != val]


def generate_fig_and_axs(num_plots: int) -> tuple:
    """
    Generates a figure and axes for the number of plots specified, with a maximum of 4 columns
    num_plots: int = number of plots to be generated
    """
    ncols = min(num_plots, number_of_columns)
    nrows = (num_plots + ncols - 1) // ncols if num_plots > number_of_columns else 1
    fig, axs = plt.subplots(
        nrows, ncols, figsize=(15, 5 * nrows), constrained_layout=True, squeeze=False
    )
    return fig, axs


def extract_variables(
    file_path: str, extra_variables: list, over_all_t: bool = False
) -> tuple[Any, Any, Any, Any] | tuple[Any, Any, dict[str, Any]]:
    """Extracts certain variables from netCDF formatted file at file_path.
    Variables extracted are omega (itself extracted into frequency and growth rate), t, ky, and drhodpsi.


    Args:
        file_path (str): _description_
        over_all_t (bool, optional): If over_all_t is True, the entire omega array, phi2, (and t itself) is returned, otherwise the final frequency, growth rate, and ky are returned.  Defaults to False.

    Returns:
        (tuple): A tuple containing the desired variables. If over_all_t is True, the tuple contains t, frequency_over_all_t, and growth_rate_over_all_t. Otherwise, the tuple contains final_frequency, final_growth_rate, and ky.
    """
    data: Dataset = Dataset(
        file_path,
        "r",
    )
    omega: VariableSubset = data.variables["omega"][
        :
    ]  # Extracts the entire 'omega' array
    # print("omega:\n", data.variables["omega"])
    t: VariableSubset = data.variables["t"][:]  # Extracts the entire 't' array
    # for theta do something similar to t ^
    frequency_over_all_t = omega[
        :, 0, 0, 0
    ]  # Extracts the first component of omega for all t
    growth_rate_over_all_t = omega[
        :, 0, 0, 1
    ]  # Extracts the second component of omega for all t

    # format for getting phi out is phi[0, 0, :, 0 or 1 for complex]

    phi2: VariableSubset = data.variables["phi2"][:]
    extra_variables_values: dict = {}
    for variable in extra_variables:
        variable_raw: Variable = data.variables[variable]
        variable_value: VariableSubset = variable_raw[0]
        extra_variables_values[variable] = variable_value

    final_frequency = omega[-1, 0, 0, 0]
    final_growth_rate = omega[-1, 0, 0, 1]
    data.close()

    if over_all_t:
        return t, frequency_over_all_t, growth_rate_over_all_t, phi2
    else:
        return (final_frequency, final_growth_rate, extra_variables_values)


def plot_data_for_all_t(
    axs: ndarray,
    row: int,
    col: int,
    t: VariableSubset,
    frequency_over_all_t,
    growth_rate_over_all_t,
    phi2,
    file_path: str,
):
    axs[row, col].plot(t, frequency_over_all_t, label="Frequency")
    axs[row, col].plot(t, growth_rate_over_all_t, label="Growth Rate")
    axs_temp = axs[row, col].twinx()
    axs_temp.plot(t, phi2, label="phi2", color="green")
    axs_temp.set_yscale("log")

    axs[row, col].set_title(f"File: {os.path.basename(file_path)}", fontsize=8)


def get_directories(directory_pattern: str) -> list[str]:
    """
    Returns a list of directories matching the pattern directory_pattern
    """
    all_files = glob.glob(directory_pattern)
    directories_only = [
        all_files[i] for i in range(len(all_files)) if os.path.isdir(all_files[i])
    ]
    return directories_only


def get_fig_axs_for_each_ky(relative_file_path: str, directories: list[str]) -> tuple:
    """
    Plots the final frequency and growth rate for each ky value in the directory pattern relative_file_path
    """

    num_directories: int = len(directories)
    num_plots: int = num_directories
    fig, axs = generate_fig_and_axs(num_plots)
    return fig, axs


def plot_final_values_for_one_run(
    run_constant_parameters, chosen_variable, final_values, subtitle_file_path=""
):
    final_values_df = pd.DataFrame(
        final_values,
        columns=[
            "final_frequency",
            "final_growth_rate",
        ]
        + run_constant_parameters,
    )

    unwanted_variables = remove_values_from_list(
        run_constant_parameters, chosen_variable
    )
    if chosen_variable == "nesub":
        nesuper = 4
        final_values_df["nesub"] = final_values_df["nenergy"] - nesuper
    if chosen_variable == "nesuper":
        nesub = 12
        final_values_df["nesuper"] = final_values_df["nenergy"] - nesub
    if chosen_variable == "npassing":
        ntheta = 64
        final_values_df["npassing"] = final_values_df["nlambda"] - ((ntheta / 2) + 1)
    final_values_original = final_values_df.copy()
    final_values_df.set_index(chosen_variable, inplace=True)

    final_values_df.sort_index(inplace=True)

    final_values_df.drop(unwanted_variables, axis=1, inplace=True)
    final_values_df.fillna(0)

    if chosen_variable in ["nperiod", "ntheta", "nesub", "nesuper", "npassing"]:
        # For each value, the error term is calculated as the difference between that value and the final value.
        final_values_df["Frequency Error"] = (
            final_values_df["Final Frequency"]
            - final_values_df["Final Frequency"].iloc[-1]
        )
        final_values_df["Growth Rate Error"] = (
            final_values_df["Final Growth Rate"]
            - final_values_df["Final Growth Rate"].iloc[-1]
        )

        ax = final_values_df.plot(
            kind="line",
            subplots=[
                ("Final Frequency", "Final Growth Rate"),
                ("Frequency Error", "Growth Rate Error"),
            ],
            title=[
                "Final Frequency and Growth Rate vs. " + chosen_variable,
                "Error vs. " + chosen_variable,
            ],
        )
        ax[-1].axhline(y=0, linestyle="--")
    else:
        final_values_df.plot(
            title="Final Frequency and Growth Rate vs. "
            + chosen_variable
            + "\n"
            + subtitle_file_path
        )
    # plt.show(block=False)
    # plt.close("all")
    plt.show()
    return final_values_original


def get_final_values(
    relative_file_path: str,
    run_constant_parameters: list[str],
    plot_each_file_over_t: bool = True,
):
    directories, file_names, number_of_directories = get_file_structure(
        relative_file_path
    )
    if plot_each_file_over_t:
        fig, axs = get_fig_axs_for_each_ky(relative_file_path, directories)
    final_values = np.empty((number_of_directories, len(run_constant_parameters) + 2))

    for index, directory in enumerate(directories):
        file_path = f"{directory}/{file_names[index]}.out.nc"
        # file_path = f"{file_names[index]}.out.nc"
        if os.path.isfile(file_path):
            row, col = divmod(index, number_of_columns)

            t, frequency_over_all_t, growth_rate_over_all_t, phi2 = extract_variables(  # type: ignore
                file_path,
                run_constant_parameters,
                over_all_t=True,
            )
            if plot_each_file_over_t:
                plot_data_for_all_t(
                    axs,
                    row,
                    col,
                    t,
                    frequency_over_all_t,
                    growth_rate_over_all_t,
                    phi2,
                    file_path,
                )
            final_frequency, final_growth_rate, extra_variables_values = (
                extract_variables(
                    file_path,
                    run_constant_parameters,
                    over_all_t=False,
                )  # type: ignore
            )

            final_values[index] = [
                final_frequency,
                final_growth_rate,
                *extra_variables_values.values(),
            ]

        else:
            print(f"File {file_path} not found.")
    if plot_each_file_over_t:
        plt.suptitle("Frequency and Growth Rate vs Time\n" + relative_file_path)
        # plt.show(block=False)
        # plt.close("all")
        plt.show()
    return final_values


def get_file_structure(relative_file_path: str):
    relative_file_path_segments = relative_file_path.split("/")
    # trimmed_file_path = "/".join(
    # relative_file_path_segments[:2]
    # )  # Use for beta_ky_scan
    trimmed_file_path = "/".join(
        relative_file_path_segments[:3]
    )  # Use for resolution_scans
    directories: list[str] = get_directories(relative_file_path)

    directories.sort()
    file_names: list[str] = [
        directory.replace(trimmed_file_path, "") for directory in directories
    ]
    file_names = [file_name.replace(".in", "") for file_name in file_names]
    number_of_directories = len(directories)
    return directories, file_names, number_of_directories


def main() -> None:
    run_constant_parameters = [
        "ky",
        "beta",
        "nperiod",
        "ntheta",
        "nenergy",
        "nlambda",
    ]
    chosen_variable = "ky"

    slowingdown_file_paths = [
        "parameter_scans/maxwellian_scans/aky/aky_*",
        "parameter_scans/sdanalytic_scans/aky/aky_*",
        "parameter_scans/no_high_alphas/aky/aky_*",
        "parameter_scans/no_high_alphas_lower_ash_density/aky/aky_*",
    ]

    compare_freq_and_growth_rates(
        run_constant_parameters, chosen_variable, slowingdown_file_paths
    )

    gradient_file_paths = [
        "parameter_scans/sdanalytic_tprim0_scans/aky/aky_*",
        "parameter_scans/sdanalytic_fprim0_scans/aky/aky_*",
    ]

    compare_freq_and_growth_rates(
        run_constant_parameters, chosen_variable, gradient_file_paths
    )


def compare_freq_and_growth_rates(
    run_constant_parameters, chosen_variable, relative_file_paths
):
    split_paths = [file_path.split("/") for file_path in relative_file_paths]
    run_purposes = [split_path[1] for split_path in split_paths]
    # relative_file_path: str = (
    #     "parameter_scans/beta_ky_scan/aky_*"  # Use for beta_ky_scan
    # )
    # relative_file_path: str = (
    #     f"parameter_scans/resolution_scans/{chosen_variable}/{chosen_variable}_*"  # Use for resolution_scans
    # )
    plot_each_file_over_t = False
    final_values_for_each_run: list = []
    for file_path in relative_file_paths:
        final_values_for_each_run.append(
            get_final_values(file_path, run_constant_parameters, plot_each_file_over_t)
        )

    unwanted_variables = remove_values_from_list(
        run_constant_parameters, chosen_variable
    )
    final_values_dataframes: list[DataFrame] = []
    for i, final_values_for_one_run in enumerate(final_values_for_each_run):
        final_values_dataframes.append(
            plot_final_values_for_one_run(
                run_constant_parameters,
                chosen_variable,
                final_values_for_one_run,
                relative_file_paths[i],
            )
        )
        final_values_dataframes[i].rename(
            columns={
                col: f"{col}_{run_purposes[i]}"
                for col in final_values_dataframes[i].columns
            },
            inplace=True,
        )
        final_values_dataframes[i].set_index(
            f"{chosen_variable}_{run_purposes[i]}", inplace=True
        )
        final_values_dataframes[i].sort_index(inplace=True)
        new_unwanted_variables = [
            f"{unwanted_variable}_{run_purposes[i]}"
            for unwanted_variable in unwanted_variables
        ]
        final_values_dataframes[i].drop(new_unwanted_variables, axis=1, inplace=True)

    joined_dataframe = pd.concat(
        [final_values_dataframes[i] for i in range(len(final_values_dataframes))],
        axis=1,
        join="outer",
    ).sort_index()

    print("Joined DataFrame: \n", joined_dataframe)
    joined_dataframe = joined_dataframe[joined_dataframe.index <= 0.675]

    dataframe_columns = joined_dataframe.columns.to_list()

    frequency_columns = tuple((col for col in dataframe_columns if "frequency" in col))
    growth_columns = tuple((col for col in dataframe_columns if "growth" in col))

    joined_dataframe.plot(
        subplots=[
            (*frequency_columns,),
            (*growth_columns,),
        ],
        xlabel="ky",
    )

    plt.tight_layout()
    plt.show()


if __name__ == "__main__":
    main()