lanl · sayefsakin · Jul 3, 2024 · Jul 3, 2024 · Jul 5, 2024 · Jul 8, 2024
diff --git a/.github/workflows/perf_check.yml b/.github/workflows/perf_check.yml
@@ -0,0 +1,159 @@
+# This is a basic workflow to help you get started with Actions
+
+name: Code Performance Analyzer
+
+# Controls when the workflow will run
+on:
+  # Triggers the workflow on push or pull request events but only for the "main" branch
+  # push:
+  #   branches: [ "main" ]
+  # pull_request:
+  #   branches: [ "main" ]
+
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+    # inputs:
+    #   hashes:
+    #     required: true
+    #     type: choice
+    #     description: Make a choice
+    #     options:
+    #     - foo
+    #     - bar
+    #     - baz
+
+# A workflow run is made up of one or more jobs that can run sequentially or in parallel
+jobs:
+  define-matrix:
+    runs-on: ubuntu-latest
+
+    outputs:
+      hashes: ${{ steps.hashes.outputs.hashes }}
+
+    steps:
+      - name: Define Hashes
+        id: hashes
+        run: |
+          echo 'hashes=["158e23d08f73d36f71e144851451955b3ae02dff", "89cc919b28f687a25d30b44ddf547201da930c14"]' >> "$GITHUB_OUTPUT"
+  
+  produce-performance-artifacts:
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        shell: bash -el {0}
+    needs: define-matrix
+    strategy:
+      matrix:
+        hashes: ${{ fromJSON(needs.define-matrix.outputs.hashes) }}
+
+    steps:      
+      - uses: actions/checkout@v4
+        with:
+          repository: UK-MAC/CloverLeaf_ref
+          ref: ${{ matrix.hashes }}
+
+      - uses: fortran-lang/setup-fortran@v1.6.1
+
+      - name: Install OpenMPI
+        run: sudo apt install -y openmpi-bin libopenmpi-dev
+
+    # check all the requirements and their versions
+      - name: Check installed dependencies
+        run: |
+            gcc --version
+            gfortran --version
+            mpirun --version
+            lscpu | grep -E '^Thread|^Core|^Socket|^CPU\('
+
+      - name: Compile cloverleaf
+        run: |
+            make COMPILER=GNU
+    
+      - name: Run cloverleaf
+        run: |
+            mpirun -np 2 clover_leaf
+            mv clover.out clover_output_${{ matrix.hashes }}.out
+    
+      - name: Produce Artifact
+        uses: actions/upload-artifact@v4
+        with:
+            name: clover_artifact_${{ matrix.hashes }}
+            path: clover_output_${{ matrix.hashes }}.out
+
+  consume-artifacts:
+    runs-on: macos-latest
+    needs:
+    - produce-performance-artifacts
+
+    steps:
+    - name: Download all workflow run artifacts
+      uses: actions/download-artifact@v4
+      with:
+        path: clover_artifact
+        pattern: clover_artifact_*
+        merge-multiple: true
+
+    - name: Check artifact files
+      run: |
+        ls -R clover_artifact
+        cd clover_artifact
+        tail -n 10 clover_output_*
+#   # This workflow contains a single job called "build"
+#   build:
+#     # The type of runner that the job will run on
+#     runs-on: macos-latest
+
+#     defaults:
+#       run:
+#         shell: bash -el {0}
+
+#     # Steps represent a sequence of tasks that will be executed as part of the job
+#     steps:
+#       # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
+#       - uses: actions/checkout@v4
+#         # with:
+#         #   repository: UK-MAC/CloverLeaf_ref
+#         #   ref: 0ddf495cf21cc59f84e274617522a1383e2c328c
+
+#     #   - uses: actions/setup-python@v5
+#     #     with:
+#     #         python-version: '3.10'
+
+#     #   - name: Add conda to system path
+#     #     run: |
+#     #         # $CONDA is an environment variable pointing to the root of the miniconda directory
+#     #         echo $CONDA/bin >> $GITHUB_PATH
+
+
+
+    #   - uses: conda-incubator/setup-miniconda@v3
+    #     with:
+    #         channels: defaults,conda-forge,spyder-ide
+    #         activate-environment: cdsi
+    #         environment-file: examples/cloverleaf/environment.yml
+    #         auto-activate-base: false
+
+#     #   - uses: s-weigand/setup-conda@v1.2.2
+
+#     #   - name: Install dependencies
+#     #     run: |
+#     #         cd examples/cloverleaf
+#     #         conda env create --file environment.yml --name cdsi
+#     #         conda activate cdsi
+
+    #   # check all the requirements and their versions
+    #   - name: Check installed dependencies
+    #     run: |
+    #       python3 --version
+    #       gcc --version
+    #       conda --version
+    #       gfortran --version
+    #       conda info
+    #       conda list
+
+#       # Runs a set of commands using the runners shell
+#       - name: Run a multi-line script
+#         run: |
+#           echo Add other actions to build,
+#           echo test, and deploy your project.
+#           ls .
diff --git a/tools/perf_analyzer/Readme.md b/tools/perf_analyzer/Readme.md
@@ -0,0 +1,32 @@
+## PerfAnalyzer
+
+A tool to analze software performance along with code history. This is built on top of DSI SQLite plugin.
+
+Run `fly_server.py` file. Then the dashboard can be accessed through `http://127.0.0.1:8050/`.
+
+##### TODO: add a requirement file
+
+Update the `runner_script.sh` to compile, copy input file, and run the program.
+
+Update the `parse_clover_output.py` file and update `parse_clover_output_file` function to parse specific output file. Return a dictionary containing the contents of the parsed output file.
+
+#### The features available in the dashboard
+
+PerfAnalyzer is a dashboard based visualizer to analyze performance using git commit history and different performance metric. This has the following features
+
+- Git History Graph
+  - Ordered by commit dates
+  - Filter Git Branch
+  - Select a subset of git commits 
+  - Show Commit details like message, committer name, date, and hash.
+- Performance metric line chart
+  - Filter by different metric
+  - Show details on hover
+ - Commit table
+   - Search and filter by date, hash, and message.
+   - Execute the `runner_script` on selected commit.
+   - Show difference between two commits (using git diff)
+ - Variable Search
+   - Use any regex or string to search
+   - Show table of found variable and file
+   - Show file content
diff --git a/tools/perf_analyzer/auto_perf_check.sh b/tools/perf_analyzer/auto_perf_check.sh
@@ -0,0 +1,64 @@
+#! /bin/bash
+
+# make sure that cdsi environment is activated
+if [[ $CONDA_DEFAULT_ENV != 'cdsi' ]]; then
+    echo "activate conda cdsi environment."
+    exit 0
+fi
+if [ -z ${SOURCE_BASE_DIRECTORY+x} ]; then
+    echo "SOURCE_BASE_DIRECTORY is unset"
+    exit 0;
+else
+    echo "SOURCE_BASE_DIRECTORY is set to '$SOURCE_BASE_DIRECTORY'"; 
+fi
+
+# SOURCE_BASE_DIRECTORY="/Users/ssakin/projects/CloverLeaf/CloverLeaf_ref"
+MPI_THREADS=4
+export CHECK_PREV_COMMITS=15
+export OMP_NUM_THREADS=4
+base_directory=$(pwd)
+
+run_and_check_commit() {
+    echo "current commit hash $1"
+
+    cd $SOURCE_BASE_DIRECTORY
+    git checkout $1
+    make clean
+    make COMPILER=GNU
+    echo "================================ Compile Done ================================ "
+
+    echo "============================= Running CloverLeaf ============================= "
+    mpirun -np $MPI_THREADS clover_leaf
+    cp clover.out $base_directory"/clover_output/clover_$1.out"
+    echo "======================= CloverLeaf Executed for has $1 ======================= "
+
+    echo "=========================== Running output parser ============================ "
+    cd $base_directory
+    python3 parse_clover_output.py --testname random_test --gitdir $SOURCE_BASE_DIRECTORY
+    echo "============================ Output CSV updated ============================== "
+}
+
+track_variables() {
+    echo "current commit hash $1"
+
+    cd $SOURCE_BASE_DIRECTORY
+    git checkout $1
+
+    echo "=========================== Running code sensing ============================ "
+    cd $base_directory
+    python3 code_sensing.py --testname random_test --gitdir $SOURCE_BASE_DIRECTORY
+    echo "============================ Output CSV updated ============================== "
+}
+
+cd $SOURCE_BASE_DIRECTORY
+prev_hash=( $(git log master -n "$CHECK_PREV_COMMITS" --format=format:%h) )
+
+for c_hash in "${prev_hash[@]}"
+do
+#    run_and_check_commit $c_hash
+    track_variables $c_hash
+done
+
+cd $SOURCE_BASE_DIRECTORY
+git checkout master
+echo "=========================== Auto Perf Script Completed ============================ "
diff --git a/tools/perf_analyzer/code_sensing.py b/tools/perf_analyzer/code_sensing.py
@@ -0,0 +1,90 @@
+#!/usr/bin/env python3
+
+import argparse
+import pickle
+import sys
+import glob
+import re
+import git
+
+def recursive_c_directive_match(re_list, search_file_list, cur_dir):
+    """ The data is parsed from all of the files in the current directory """
+    occurance = dict()
+    for code_files in glob.iglob('**', root_dir=cur_dir, recursive=True):
+        for f_type in search_file_list:
+            if re.search(f_type + '$', code_files):
+                with open(cur_dir + "/" + code_files, 'r') as cf:
+                    line_number = 1
+                    for line in cf:
+                        for each_re in re_list:
+                            line_match = re.compile(r"\s*[#]" + each_re + r"\s+(\w+)[\t\s]+(.*)\s*(\r\n|\r|\n)").match(line)
+                            if line_match is not None:
+                                c_line = "#" + each_re + " " + line_match.group(1)
+                                second_part = line_match.group(2)
+                                if second_part is not None and len(second_part) > 0:
+                                    c_line = c_line + " " + line_match.group(2)
+                                c_line = c_line.rstrip()
+                                occurance[c_line] = occurance.get(c_line, dict())
+                                # occurance[line]["first"] = line_match.group(1)
+                                # occurance[line]["second"] = line_match.group(2)
+                                occurance[c_line][code_files] = occurance[c_line].get(code_files, list())
+                                occurance[c_line][code_files].append(line_number)
+                        line_number = line_number + 1
+    print("matching done")
+    return occurance
+
+def recursive_customized_match(re_list, search_file_list, cur_dir):
+    """ The data is parsed from all of the files in the current directory """
+    occurance = dict()
+    for code_files in glob.iglob('**', root_dir=cur_dir, recursive=True):
+        for f_type in search_file_list:
+            if re.search(f_type + '$', code_files):
+                with open(cur_dir + "/" + code_files, 'r') as cf:
+                    line_number = 1
+                    for line in cf:
+                        for each_re in re_list:
+                            line_match = re.compile(r".*(" + each_re + r").*").match(line)
+                            if line_match is not None:
+                                # c_line = line_match.group(1)
+                                c_line = line.rstrip()
+                                occurance[c_line] = occurance.get(c_line, dict())
+                                # occurance[line]["first"] = line_match.group(1)
+                                # occurance[line]["second"] = line_match.group(2)
+                                occurance[c_line][code_files] = occurance[c_line].get(code_files, list())
+                                occurance[c_line][code_files].append(line_number)
+                        line_number = line_number + 1
+    print("matching done")
+    return occurance
+
+def main():
+    """ A testname argument is required """
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--testname', help='the test name')
+    parser.add_argument('--gitdir', help='the git directory')
+    args = parser.parse_args()
+    # testname = "temp_test"
+    testname = args.testname
+    git_repo = args.gitdir
+    if testname is None or git_repo is None:
+        parser.print_help()
+        sys.exit(0)
+
+    git_hash = git.Repo(git_repo).head.object.hexsha
+    re_list = ["pragma", "define"]
+    search_file_list = [r"\.c", r"\.cc"]
+    # occ = recursive_c_directive_match(re_list, search_file_list, git_repo)
+
+    # with open(git_hash + '.pickle', 'wb') as handle:
+    #     pickle.dump(occ, handle, protocol=pickle.HIGHEST_PROTOCOL)
+
+    # re_list = [r"OMP PARALLEL", r"vol=0\.0", r"\w+=\d+\.\d+"]
+    search_file_list = [r"\.c", r"\.cc", r"\.f90"]
+    recursive_customized_match(re_list, search_file_list, git_repo)
+
+    # with open(git_hash + '.pickle', 'rb') as handle:
+    #     b = pickle.load(handle)
+    # print(b)
+
+if __name__ == '__main__':
+    main()
+
diff --git a/tools/perf_analyzer/create_and_query_dsi_db.py b/tools/perf_analyzer/create_and_query_dsi_db.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python3
+
+"""
+This script reads in the csv file created from parse_slurm_output.py.
+Then it creates a DSI db from the csv file and performs a query.
+"""
+
+import argparse
+import sys
+from dsi.backends.sqlite import Sqlite, DataType
+from dsi.plugins.collection_reader import Dict
+isVerbose = True
+
+
+def test_dsi_dict():
+    testDict = {"one":1, "two": "number two", "three":"three" }
+    dsi_dict = Dict(testDict)
+    print("--")
+    print(dsi_dict.collections)
+    print(dsi_dict.base_dict)
+    print(dsi_dict.output_collector)
+    # dsi_dict.add_rows()
+    print("==>")
+    print(dsi_dict.collections)
+    print(dsi_dict.base_dict)
+    print(dsi_dict.output_collector)
+    # dsi_dict.collections.clear()
+    dsi_dict.collections.append({"one":15, "three":4, "four":44 })
+    # dsi_dict.base_dict["four"] = "fort minor"
+    dsi_dict.add_rows()
+    print("~~>")
+    print(dsi_dict.collections)
+    print(dsi_dict.base_dict)
+    print(dsi_dict.output_collector)
+
+"""
+Creates the DSI db from the csv file
+"""
+
+def import_cloverleaf_data(test_name):
+    csvpath = 'clover_' + test_name + '.csv'
+    dbpath = 'clover_' + test_name + '.db'
+    store = Sqlite(dbpath)
+    store.put_artifacts_csv(csvpath, "rundata", isVerbose=isVerbose)
+    store.close()
+    # No error implies success
+
+"""
+Performs a sample query on the DSI db
+"""
+def test_artifact_query(test_name):
+    dbpath = "clover_" + test_name + ".db"
+    store = Sqlite(dbpath)
+    _ = store.get_artifact_list(isVerbose=isVerbose)
+    data_type = DataType()
+    data_type.name = "rundata"
+    query = "SELECT * FROM " + str(data_type.name) + " WHERE Viscosity > 0.1"
+    print("Running Query", query)
+    result = store.sqlquery(query)
+    store.export_csv_query(query, "clover_query.csv")
+    store.close()
+
+if __name__ == "__main__":
+    # """ The testname argument is required """
+    # parser = argparse.ArgumentParser()
+    # parser.add_argument('--testname', help='the test name')
+    # args = parser.parse_args()
+    # test_name = args.testname
+    # if test_name is None:
+    #     parser.print_help()
+    #     sys.exit(0)
+
+    # # import_cloverleaf_data(test_name)
+    # test_artifact_query(test_name)
+    test_dsi_dict()
diff --git a/tools/perf_analyzer/db_backups/clover_random_test.csv b/tools/perf_analyzer/db_backups/clover_random_test.csv
@@ -0,0 +1,13 @@
+testname,git_hash,git_committer,git_committed_date,git_repo_name,version,Task Count,Thread Count,x_cells,y_cells,xmin,ymin,xmax,ymax,initial_timestep,timestep_rise,max_timestep,end_step,Timestep,Ideal Gas,Viscosity,PdV,Revert,Acceleration,Fluxes,Cell Advection,Momentum Advection,Reset,Summary,Visit,Tile Halo Exchange,Self Halo Exchange,MPI Halo Exchange,Total,The Rest
+random_test,0fdb917bf10d20363dd8b88d762851908643925b,caxwl,2021-08-09 12:23:14,UK-MAC/CloverLeaf_ref,1.300,4,4,960,960,0.0,0.0,10.0,10.0,0.04,1.5,0.04,87,0.0546,0.0374,0.0357,0.1419,0.0222,0.0601,0.0470,0.1369,0.3139,0.0339,0.0022,0.0000,0.0001,0.2134,0.5226,1.6218,0.0003
+random_test,1398cd9cfa7570db9c8eb7a17a2f3e698c90aeb4,GitHub,2021-08-03 08:49:14,UK-MAC/CloverLeaf_ref,1.300,4,4,960,960,0.0,0.0,10.0,10.0,0.04,1.5,0.04,87,0.0497,0.0354,0.0365,0.1285,0.0239,0.0639,0.0460,0.1288,0.3257,0.0311,0.0016,0.0000,0.0002,0.2176,0.4954,1.5844,0.0003
+random_test,158e23d08f73d36f71e144851451955b3ae02dff,mohan002,2021-08-02 18:40:45,UK-MAC/CloverLeaf_ref,1.300,4,4,960,960,0.0,0.0,10.0,10.0,0.04,1.5,0.04,87,0.0567,0.0353,0.0365,0.1425,0.0223,0.0589,0.0448,0.1369,0.3030,0.0323,0.0021,0.0000,0.0002,0.2011,0.5384,1.6110,0.0003
+random_test,2efdde26ab1eef67dfce5a29163e8d513b6dffc4,jdshanks,2020-08-20 14:20:55,UK-MAC/CloverLeaf_ref,1.300,4,4,960,960,0.0,0.0,10.0,10.0,0.04,1.5,0.04,87,0.0625,0.0422,0.0347,0.1608,0.0161,0.0651,0.0448,0.1439,0.2851,0.0389,0.0069,0.0000,0.0003,0.1667,0.5370,1.6048,0.0003
+random_test,89cc919b28f687a25d30b44ddf547201da930c14,jdshanks,2020-07-14 09:16:46,UK-MAC/CloverLeaf_ref,1.300,4,4,960,960,0.0,0.0,10.0,10.0,0.04,1.5,0.04,87,0.0503,0.0357,0.0343,0.1332,0.0231,0.0582,0.0443,0.1242,0.3041,0.0302,0.0019,0.0000,0.0001,0.2165,0.5463,1.6025,0.0003
+random_test,07fcf4d773ba7626e6ea36c7002f7b2cd7c76b2a,jdshanks,2020-07-14 08:44:25,UK-MAC/CloverLeaf_ref,1.300,4,4,960,960,0.0,0.0,10.0,10.0,0.04,1.5,0.04,87,0.0592,0.0420,0.0339,0.1427,0.0277,0.0637,0.0453,0.1417,0.2957,0.0383,0.0049,0.0000,0.0002,0.1820,0.5184,1.5957,0.0003
+random_test,e37e1d7aab99070a65094e784721b4d05fb86444,jdshanks,2020-07-14 08:13:22,UK-MAC/CloverLeaf_ref,1.300,4,4,960,960,0.0,0.0,10.0,10.0,0.04,1.5,0.04,87,0.0630,0.0371,0.0375,0.1360,0.0211,0.0604,0.0441,0.1337,0.3068,0.0330,0.0020,0.0000,0.0001,0.1939,0.5274,1.5960,0.0003
+random_test,439c8d846ede012c89f7be451763a32dbaa5eb2c,GitHub,2020-07-13 20:16:34,UK-MAC/CloverLeaf_ref,1.300,4,4,3840,3840,0.0,0.0,10.0,10.0,0.04,1.5,0.04,87,0.7595,0.5283,0.6273,2.1090,0.3632,0.9054,0.6695,2.3710,4.9192,0.5662,0.0367,0.0000,0.0003,0.8092,0.9885,15.6531,0.0005
+random_test,3f889495db94c6fba5a5ec1f9937f49e8b66f94d,Olly Perks,2020-07-13 14:02:03,UK-MAC/CloverLeaf_ref,1.300,4,4,3840,3840,0.0,0.0,10.0,10.0,0.04,1.5,0.04,87,0.8014,0.5378,0.6328,2.0701,0.3678,0.9438,0.6536,2.3394,4.9349,0.5455,0.0372,0.0000,0.0003,0.7951,1.0069,15.6668,0.0009
+random_test,b5e598dc0f10ca804dce4a748e3c2314545269cd,GitHub,2020-07-03 08:48:29,UK-MAC/CloverLeaf_ref,1.300,4,4,3840,3840,0.0,0.0,10.0,10.0,0.04,1.5,0.04,87,0.7889,0.5249,0.6021,2.1048,0.3488,0.9416,0.6636,2.3405,4.9103,0.5517,0.0298,0.0000,0.0003,0.8212,1.0441,15.6725,0.0004
+random_test,821792e5b424d84b6043c5dfa22ae3cc4e67fa0f,jdshanks,2020-07-03 08:37:23,UK-MAC/CloverLeaf_ref,1.300,4,4,3840,3840,0.0,0.0,10.0,10.0,0.04,1.5,0.04,87,0.8475,0.5778,0.6166,2.1963,0.3285,0.8830,0.6693,2.3973,4.7511,0.5906,0.0654,0.0000,0.0003,0.2384,1.4976,15.6597,0.0015
+random_test,34869cbce7ba601168e65ea1bbb718e3688d08fa,jdshanks,2020-07-01 17:58:18,UK-MAC/CloverLeaf_ref,1.300,4,4,3840,3840,0.0,0.0,10.0,10.0,0.04,1.5,0.04,87,0.7903,0.5164,0.6468,2.0930,0.3509,0.9221,0.6599,2.3471,4.9155,0.5468,0.0417,0.0000,0.0003,0.8198,1.0058,15.6562,0.0038
diff --git a/tools/perf_analyzer/environment.yml b/tools/perf_analyzer/environment.yml
@@ -0,0 +1,47 @@
+name: cdsi
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - ca-certificates=2024.6.2=hf0a4a13_0
+  - cctools=986=h4c9edd9_0
+  - cctools_osx-arm64=986=hd11630f_0
+  - clang=18.1.8=default_h675cc0c_0
+  - clang-18=18.1.8=default_h5c12605_0
+  - clang_impl_osx-arm64=18.1.8=h2ae9ea5_16
+  - clang_osx-arm64=18.1.8=h54d7cd3_16
+  - clangxx=18.1.8=default_h675cc0c_0
+  - compiler-rt=18.1.8=h28df8ea_0
+  - compiler-rt_osx-arm64=18.1.8=h56c4e69_0
+  - gfortran=13.2.0=h1ca8e4b_1
+  - gfortran_impl_osx-arm64=13.2.0=h30f4408_1
+  - gfortran_osx-arm64=13.2.0=h57527a5_1
+  - gmp=6.3.0=h7bae524_2
+  - icu=73.2=hc8870d7_0
+  - isl=0.25=h9a09cb3_0
+  - ld64=711=h4c6efb1_0
+  - ld64_osx-arm64=711=h5e7191b_0
+  - libclang-cpp18.1=18.1.8=default_h5c12605_0
+  - libcxx=17.0.6=he7857fb_1
+  - libevent=2.1.12=h2757513_1
+  - libgfortran=5.0.0=13_2_0_hd922786_3
+  - libgfortran-devel_osx-arm64=13.2.0=h5d7a38c_3
+  - libgfortran5=13.2.0=hf226fd6_3
+  - libhwloc=2.10.0=default_h7685b71_1001
+  - libiconv=1.17=h0d3ecfb_2
+  - libllvm18=18.1.8=h5090b49_0
+  - libxml2=2.12.7=ha661575_1
+  - libzlib=1.3.1=hfb2fe0b_1
+  - llvm-openmp=18.1.8=hde57baf_0
+  - llvm-tools=18.1.8=h5090b49_0
+  - mpc=1.3.1=h91ba8db_0
+  - mpfr=4.2.1=h41d338b_1
+  - mpi=1.0=openmpi
+  - openmpi=5.0.3=he01d045_107
+  - openssl=3.3.1=hfb2fe0b_1
+  - sigtool=0.1.3=h44b9a77_0
+  - tapi=1100.0.11=he4954df_0
+  - xz=5.2.6=h57fd34a_0
+  - zlib=1.3.1=hfb2fe0b_1
+  - zstd=1.5.6=hb46c0d2_0
+prefix: /Users/ssakin/Softwares/anaconda3/envs/cdsi
diff --git a/tools/perf_analyzer/fly_server.py b/tools/perf_analyzer/fly_server.py
diff --git a/tools/perf_analyzer/initate_git_workflow_through_issue.py b/tools/perf_analyzer/initate_git_workflow_through_issue.py
@@ -0,0 +1,30 @@
+
+import argparse
+import sys
+import numpy as np
+import pandas as pd
+from github import Github
+
+
+
+
+def getGitRepo(user_repo):
+    repo = Github().get_repo(user_repo)
+    return repo
+
+def testGitIsssue():
+    user_repo = "sayefsakin/flycatcher"
+    repo = getGitRepo(user_repo)
+    repo.create_issue(title="This is a new issue")
+    issue = repo.get_issue(number=1)
+    ic = issue.get_comments()
+    issue.create_comment("This")
+    print(issue.title)
+
+def main():
+    testGitIsssue()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    main()
diff --git a/tools/perf_analyzer/parse_clover_output.py b/tools/perf_analyzer/parse_clover_output.py
@@ -0,0 +1,246 @@
+#!/usr/bin/env python3
+
+"""
+Parses the output from CloverLeaf runs and creates a csv file
+"""
+
+import argparse
+import sys
+import re
+import glob
+import git
+import pandas as pd
+from dsi.plugins.collection_reader import Dict
+from dsi.backends.sqlite import Sqlite, DataType
+import json
+
+def get_repo_and_name_from_url(url: str):
+    last_colon_index = url.rfind(":")
+    last_suffix_index = url.rfind(".git")
+    if last_suffix_index < 0:
+        last_suffix_index = len(url)
+
+    if last_colon_index < 0 or last_suffix_index <= last_colon_index:
+        raise Exception("Badly formatted url {}".format(url))
+
+    return url[last_colon_index + 1:last_suffix_index]
+
+def get_git_repo(gitdir):
+    repo = git.Repo(gitdir)
+    # print(repo.head.object.committer)
+    # print(repo.head.object.committed_datetime.strftime("%Y-%m-%d"))
+    # print(get_repo_and_name_from_url(repo.remotes.origin.url))
+    return repo
+
+def add_output_to_csv_file(data, testname):
+    """ The csv file is created and written to disk """
+    with open('clover_' + testname + '.csv', 'a+') as clover_out:
+        header = ""
+        row = ""
+        for key, val in data.items():
+            header += key + ","
+            row += str(val) + ","
+
+        header = header.rstrip(',')
+        row = row.rstrip(',')
+
+        if clover_out.tell() == 0:
+            clover_out.write(header + "\n")
+
+        clover_out.write(row + "\n")
+
+def add_non_existing_columns(data, test_name, db_base_dir):
+    dbpath = db_base_dir + "/clover_" + test_name + ".db"
+    store = Sqlite(dbpath)
+    data_type = DataType()
+    data_type.name = "TABLENAME"
+
+    query = "SELECT name FROM sqlite_master WHERE type='table' AND name='" + data_type.name + "'"
+    result = store.sqlquery(query)
+    if len(result) == 0:
+        return
+
+    str_query = "PRAGMA table_info( " + data_type.name + " );"
+    result = store.sqlquery(str_query)
+    art_list = [tup[1] for tup in result]
+    for key in data:
+        if key not in art_list:
+            print("key not exist: ", key)
+            str_query = "ALTER TABLE " + data_type.name + " ADD " + key + " VARCHAR DEFAULT None"
+            result = store.sqlquery(str_query)
+            print(key, " added")
+    store.close()
+
+
+def add_output_to_dsi(data, test_name, db_base_dir):
+    dbpath = db_base_dir + '/clover_' + test_name + '.db'
+    dsi_dict = Dict(data)
+    dsi_dict.add_rows()
+    # print(dsi_dict.collections)
+
+    add_non_existing_columns(data, test_name, db_base_dir)
+
+    store = Sqlite(dbpath)
+    # store.types.name = test_name
+    store.put_artifacts(dsi_dict.collections[0], isVerbose=False)
+    store.close()
+
+
+"""
+Performs a sample query on the DSI db
+"""
+def test_artifact_query(test_name, db_base_dir, git_hash):
+    dbpath = db_base_dir +  "/clover_" + test_name + ".db"
+    store = Sqlite(dbpath)
+    _ = store.get_artifact_list(isVerbose=False)
+    data_type = DataType()
+    data_type.name = "TABLENAME"
+    query = "SELECT name FROM sqlite_master WHERE type='table' AND name='" + data_type.name + "'"
+    result = store.sqlquery(query)
+    if len(result) == 0:
+        return False
+    query = "SELECT count(*) as cn FROM " + str(data_type.name) + " WHERE git_hash LIKE '" + git_hash + "%'"
+    print("Running Query", query)
+    result = store.sqlquery(query)
+    store.close()
+    if len(result) > 0 and result[0][0] > 0:
+        print("found")
+        return True
+    else:
+        print("not found")
+    return False
+    # store.export_csv_query(query, "clover_query.csv")
+
+
+"""
+Performs a sample query on the DSI db
+"""
+def get_all_db_data(test_name, db_base_dir):
+    dbpath = db_base_dir +  "/clover_" + test_name + ".db"
+    store = Sqlite(dbpath)
+    _ = store.get_artifact_list(isVerbose=False)
+    data_type = DataType()
+    data_type.name = "TABLENAME"
+    query = "SELECT name FROM sqlite_master WHERE type='table' AND name='" + data_type.name + "'"
+    result = store.sqlquery(query)
+    if len(result) == 0:
+        return None
+    query = "SELECT * FROM " + str(data_type.name)
+    print("Running Query", query)
+    result = pd.read_sql(query, store.con)
+    # result = store.sqlquery(query)
+    store.close()
+    return result
+
+
+def process_keys_for_sqlite(key):
+    return key.replace(" ", "_").lower()
+
+
+def parse_clover_output_file(testname, git_dir):
+    git_repo = get_git_repo(git_dir)
+    data = {}
+    data['testname'] = [testname]
+    clover_output = git_dir + "/"
+    if git_repo:
+        data['git_hash'] = [git_repo.head.object.hexsha]
+        data['git_committer'] = [git_repo.head.object.committer.email]
+        data['git_committed_date'] = [git_repo.head.object.committed_datetime.strftime("%Y-%m-%d %H:%M:%S")]
+        data['git_repo_name'] = [get_repo_and_name_from_url(git_repo.remotes.origin.url)]
+        clover_output = clover_output + "clover.out"
+        # print(clover_output)
+    else:
+        raise Exception("Git repo not found")
+
+    with open(clover_output, 'r') as slurmout:
+        for line in slurmout:
+            if "Clover Version" in line:
+                match = re.match(r'Clover Version\s+(\d+.\d+)', line)
+                version = match.group(1)
+                data['version'] = [version]
+            elif "Task Count" in line:
+                match = re.match(r'\s+Task Count\s+(\d+)', line)
+                version = match.group(1)
+                data['Task_Count'] = [version]
+            elif "Thread Count" in line:
+                match = re.match(r'\s+Thread Count:\s+(\d+)', line)
+                version = match.group(1)
+                data['Thread_Count'] = [version]
+            elif "=" in line:
+                # reading input data
+                match = re.match(r'\s+(\w+)=(\d+.?\d+)', line)
+                if match:
+                    pro_key = process_keys_for_sqlite(match.group(1))
+                    pro_value = match.group(2)
+                    data[pro_key] = [pro_value]
+            else:
+                # reading profiler output
+                match = re.match(r'(\w+(\s?\w+)*)\s+:\s+(\d+.\d+)\s+(\d+.\d+)', line)
+                if match:
+                    pro_key = process_keys_for_sqlite(match.group(1))
+                    pro_value = match.group(3)
+                    data[pro_key] = [pro_value]
+    # print(data)
+    return data
+
+
+def parse_tau_output_file(testname, git_dir):
+    git_repo = get_git_repo(git_dir)
+    data = {}
+    data['testname'] = [testname]
+    clover_output = git_dir + "/"
+    if git_repo:
+        data['git_hash'] = [git_repo.head.object.hexsha]
+        data['git_committer'] = [git_repo.head.object.committer.email]
+        data['git_committed_date'] = [git_repo.head.object.committed_datetime.strftime("%Y-%m-%d %H:%M:%S")]
+        data['git_repo_name'] = [get_repo_and_name_from_url(git_repo.remotes.origin.url)]
+        clover_output = clover_output + "tau_results"
+        # print(clover_output)
+    else:
+        raise Exception("Git repo not found")
+
+    do_parse = False
+    with open(clover_output, 'r') as slurmout:
+        for line in slurmout:
+            if 'total' in line:
+                do_parse = True
+            elif 'mean' in line:
+                do_parse = False
+            if do_parse is True:
+                if "MPI_" in line:
+                    match = re.match(r'\s+(\d+\.\d+)\s+([\d,\.]+)\s+([\d,\.]+)\s+([\d,\.]+)\s+([\d,\.]+)\s+([\d,\.]+)\s+(MPI_\w+)\(\)\s+', line)
+                    # data[match.group(7) + '_time'] = [match.group(1)]
+                    data[match.group(7) + '_esec'] = [float(match.group(2))/1000]
+                    data[match.group(7) + '_isec'] = [float(match.group(3))/1000]
+                    # data[match.group(7) + '_calls'] = [match.group(4)]
+                    # data[match.group(7) + '_subs'] = [match.group(5)]
+                    # data[match.group(7) + '_iusec_p_call'] = [match.group(6)]
+    # print(data)
+    return data
+
+
+
+
+def main():
+    """ A testname argument is required """
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--testname', help='the test name')
+    parser.add_argument('--gitdir', help='the git directory')
+    args = parser.parse_args()
+    # testname = "temp_test"
+    testname = args.testname
+    if testname is None:
+        parser.print_help()
+        sys.exit(0)
+
+    # data = parse_clover_output_file(testname, args.gitdir)
+    data = parse_tau_output_file(testname, args.gitdir)
+    # add_output_to_csv_file(data, testname)
+    add_output_to_dsi(data, testname)
+
+    # test_artifact_query(testname)
+
+if __name__ == '__main__':
+    main()
+
+
diff --git a/tools/perf_analyzer/perf_hist_chart.ipynb b/tools/perf_analyzer/perf_hist_chart.ipynb
diff --git a/tools/perf_analyzer/requirements.txt b/tools/perf_analyzer/requirements.txt
@@ -0,0 +1,7 @@
+aiohttp==3.9.0b0
+diskcache>=5.6.3
+GitPython>=3.1.43
+PyGithub>=2.3.0
+dash>=2.17.0
+pandas>=2.2.0
+dash-bootstrap-components>=1.6.0
diff --git a/tools/perf_analyzer/runner_script.sh b/tools/perf_analyzer/runner_script.sh
@@ -0,0 +1,28 @@
+if [ -z ${SOURCE_BASE_DIRECTORY+x} ]; then
+    echo "SOURCE_BASE_DIRECTORY is unset"
+    exit 0;
+else
+    echo "SOURCE_BASE_DIRECTORY is set to '$SOURCE_BASE_DIRECTORY'"; 
+fi
+
+source ~/.bash_profile
+conda activate cdsi
+
+export TAU_PROFILE=1
+export PROFILEDIR=$SOURCE_BASE_DIRECTORY
+
+
+cd $SOURCE_BASE_DIRECTORY
+git checkout -f $CANDIDATE_COMMIT_HASH
+
+cp ../clover.in .
+
+make clean;
+make COMPILER=GNU;
+echo "================================ Compile Done ================================ "l
+
+echo "============================= Running CloverLeaf ============================= "j
+mpirun -np 2 tau_exec $SOURCE_BASE_DIRECTORY/clover_leaf
+pprof -s > tau_results
+cd -
+conda deactivate
diff --git a/tools/perf_analyzer/sample_charting.py b/tools/perf_analyzer/sample_charting.py
@@ -0,0 +1,49 @@
+import git
+from github import Github
+import matplotlib.ticker as ticker
+import matplotlib.pyplot as plt
+import pandas as pd
+
+def main():
+    df = pd.read_csv("clover_random_test.csv")
+
+    def on_move(event):
+        if event.inaxes:
+            print(f'data coords {event.xdata} {event.ydata},',
+                f'pixel coords {event.x} {event.y}')
+
+
+    def on_click(event):
+        if event.button is MouseButton.LEFT:
+            print('disconnecting callback')
+            plt.disconnect(binding_id)
+
+
+
+    sh_hash = [substring[:7] for substring in df["git_hash"]]
+    for col in ["PdV", "Cell Advection", "MPI Halo Exchange", "Self Halo Exchange", "Momentum Advection", "Total"]:
+        plt.plot(sh_hash, df[col], label=col, linestyle='dashdot')
+    plt.xticks(rotation=90)
+    plt.xlabel("Commit (new --> old)")
+    plt.ylabel("Time (s)")
+    plt.legend()
+
+    binding_id = plt.connect('motion_notify_event', on_move)
+    plt.connect('button_press_event', on_click)
+
+    plt.show()
+
+
+def extraTest():
+
+    current_git_directory = "/tmp/fly_dsi/src"
+    git_repo = git.Repo(current_git_directory)
+
+    candidate_commit_hash = "-f `b5e598dc0f10ca804dce4a748e3c2314545269cd`"
+    git_repo.git.reset("HEAD")
+    git_repo.git.checkout(candidate_commit_hash)
+    print("hello")
+
+if __name__ == '__main__':
+    # main()
+    extraTest()