From 9419bde6a6107bea9f03bfc1448ab1fa5878b03c Mon Sep 17 00:00:00 2001 From: fred1912 Date: Thu, 25 Jan 2024 18:25:41 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90metax=E3=80=91First=20PR=20&=20faster?= =?UTF-8?q?=5Frcnn=20project=20(#402)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * update readme * add company info * faster_rcnn update & first PR * fix readme * add config 1x8 bs=16 * fix typo A100->C500 * remove torchvision in requirements.txt * update readme * update fasterrcnn readme * update * add 2x8 info & add 带宽 * fix typo * delete history * update info * update info * update table * delete history * add info in test-conf * fix typo * delete history * fix env bug & add mx tf32 env * update requirements * fix bug --------- Co-authored-by: Shengchu Zhao --- training/benchmarks/driver/helper.py | 5 + .../environment_variables.sh | 5 + .../faster_rcnn-pytorch/requirements.txt | 6 + training/metax/README.md | 70 +++++ .../metax/docker_image/pytorch_2.0/Dockerfile | 3 + .../metax/docker_image/pytorch_2.0/README.md | 5 + .../pytorch_2.0/pytorch_install.sh | 1 + training/metax/faster_rcnn-pytorch/README.md | 59 ++++ .../config/config_C500x1x1.py | 4 + .../config/config_C500x1x8.py | 3 + .../config/config_C500x2x8.py | 4 + .../config/requirements.txt | 3 + .../metax/faster_rcnn-pytorch/extern/.gitkeep | 0 training/metax/metax_monitor.py | 288 ++++++++++++++++++ training/run_benchmarks/config/test_conf.py | 6 +- training/run_benchmarks/run.py | 5 +- 16 files changed, 465 insertions(+), 2 deletions(-) create mode 100644 training/kunlunxin/faster_rcnn-pytorch/environment_variables.sh create mode 100644 training/kunlunxin/faster_rcnn-pytorch/requirements.txt create mode 100644 training/metax/README.md create mode 100644 training/metax/docker_image/pytorch_2.0/Dockerfile create mode 100755 training/metax/docker_image/pytorch_2.0/README.md create mode 100644 training/metax/docker_image/pytorch_2.0/pytorch_install.sh create mode 100644 training/metax/faster_rcnn-pytorch/README.md create mode 100644 training/metax/faster_rcnn-pytorch/config/config_C500x1x1.py create mode 100644 training/metax/faster_rcnn-pytorch/config/config_C500x1x8.py create mode 100644 training/metax/faster_rcnn-pytorch/config/config_C500x2x8.py create mode 100644 training/metax/faster_rcnn-pytorch/config/requirements.txt create mode 100644 training/metax/faster_rcnn-pytorch/extern/.gitkeep create mode 100644 training/metax/metax_monitor.py diff --git a/training/benchmarks/driver/helper.py b/training/benchmarks/driver/helper.py index de513901e..c616b4e92 100644 --- a/training/benchmarks/driver/helper.py +++ b/training/benchmarks/driver/helper.py @@ -83,3 +83,8 @@ def set_seed(self, seed: int, vendor: str = None): else: # TODO 其他厂商设置seed,在此扩展 pass + + if os.environ.get("METAX_USE_TF32"): + import torch + torch.backends.cuda.matmul.allow_tf32 = True + torch.backends.cudnn.allow_tf32 = True \ No newline at end of file diff --git a/training/kunlunxin/faster_rcnn-pytorch/environment_variables.sh b/training/kunlunxin/faster_rcnn-pytorch/environment_variables.sh new file mode 100644 index 000000000..a7f429ac2 --- /dev/null +++ b/training/kunlunxin/faster_rcnn-pytorch/environment_variables.sh @@ -0,0 +1,5 @@ +# ================================================= +# Export variables +# ================================================= + +export METAX_USE_TF32=1 diff --git a/training/kunlunxin/faster_rcnn-pytorch/requirements.txt b/training/kunlunxin/faster_rcnn-pytorch/requirements.txt new file mode 100644 index 000000000..45dd53af8 --- /dev/null +++ b/training/kunlunxin/faster_rcnn-pytorch/requirements.txt @@ -0,0 +1,6 @@ +/root/.cache/torch/hub/checkpoints/torchvision-0.15.1+mc2.19.0.2-cp38-cp38-linux_x86_64.whl +/root/.cache/torch/hub/checkpoints/torch-2.0.0+gite544b36-cp38-cp38-linux_x86_64.whl +pycocotools +numpy +tqdm +schedule \ No newline at end of file diff --git a/training/metax/README.md b/training/metax/README.md new file mode 100644 index 000000000..54fe64afe --- /dev/null +++ b/training/metax/README.md @@ -0,0 +1,70 @@ +# 厂商信息 + +官网: https://www.metax-tech.com/ + +沐曦集成电路(上海)有限公司,于2020年9月成立于上海,并在北京、南京、成都、杭州、深圳、武汉和长沙等地建立了全资子公司暨研发中心。沐曦拥有技术完备、设计和产业化经验丰富的团队,核心成员平均拥有近20年高性能GPU产品端到端研发经验,曾主导过十多款世界主流高性能GPU产品研发及量产,包括GPU架构定义、GPU IP设计、GPU SoC设计及GPU系统解决方案的量产交付全流程。 + +沐曦致力于为异构计算提供全栈GPU芯片及解决方案,可广泛应用于智算、智慧城市、云计算、自动驾驶、数字孪生、元宇宙等前沿领域,为数字经济发展提供强大的算力支撑。 + +沐曦打造全栈GPU芯片产品,推出曦思®N系列GPU产品用于智算推理,曦云®C系列GPU产品用于通用计算,以及曦彩®G系列GPU产品用于图形渲染,满足“高能效”和“高通用性”的算力需求。沐曦产品均采用完全自主研发的GPU IP,拥有完全自主知识产权的指令集和架构,配以兼容主流GPU生态的完整软件栈(MXMACA®),具备高能效和高通用性的天然优势,能够为客户构建软硬件一体的全面生态解决方案,是“双碳”背景下推动数字经济建设和产业数字化、智能化转型升级的算力基石。 + + + +# FlagPerf适配验证环境说明 +## 环境配置参考 +- 硬件 + - 机器型号: 同泰怡 G658V3 + - 加速卡型号: 曦云®C500 64G + - 多机网络类型、带宽: InfiniBand,2x200 Gb/s +- 软件 + - OS版本:Ubuntu 20.04.6 + - OS kernel版本: 5.4.0-26-generic + - 加速卡驱动版本:2.18.0.8 + - VBIOS:1.0.102.0 + - Docker版本:24.0.7 + + +## 容器镜像信息 +- 容器构建信息 + - Dockerfile路径:metax/docker_image/pytorch_2.0/Dockerfile + - 构建后软件安装脚本:metax/docker_image/pytorch_2.0/pytorch_install.sh + +- 核心软件信息 + - AI框架&相关版本: + torch: pytorch-2.0-mc + torchvision: torchvision-0.15-mc + maca: 2.18.0.8 + + +## 加速卡监控采集 +- 加速卡使用信息采集命令 + + ```shell + mx_smi + ``` +- 监控项示例: + ++---------------------------------------------------------------------------------+ +|  MX-SMI 2.0.12          Kernel Mode Driver Version: 2.2.0        | +|  MACA Version: 2.0              BIOS Version: 1.0.102.0          | +|------------------------------------+---------------------+----------------------+ +|  GPU    NAME       | Bus-i       | GPU-Util      | +|  Temp  Power       | Memory-Usage    |           | +|=====================+============+==============| +|  0    MXC500         | 0000:1b:00.0      | 0%               | +|  35C    56W           | 914/65536 MiB       |               | ++------------------------------------+---------------------+----------------------+ + + +- 加速卡使用信息采集项说明 + +|监控项| 日志文件 | 格式 | +|---|---|---| +|温度| mx_monitor.log | xxx C | +|功耗 |mx_monitor.log | xxx W | +|显存占用大小 |mx_monitor.log |xxx MiB | +|总显存大小 |mx_monitor.log |xxx MiB | +|显存使用率 |mx_monitor.log |xxx % | + + + diff --git a/training/metax/docker_image/pytorch_2.0/Dockerfile b/training/metax/docker_image/pytorch_2.0/Dockerfile new file mode 100644 index 000000000..476c1448a --- /dev/null +++ b/training/metax/docker_image/pytorch_2.0/Dockerfile @@ -0,0 +1,3 @@ +FROM maca-2.18.0.8-ubuntu18.04-amd64:FlagPerf-base-v1 +ENV PATH="/opt/conda/bin:${PATH}" +RUN /bin/bash -c "uname -a" \ No newline at end of file diff --git a/training/metax/docker_image/pytorch_2.0/README.md b/training/metax/docker_image/pytorch_2.0/README.md new file mode 100755 index 000000000..5fcb6fa35 --- /dev/null +++ b/training/metax/docker_image/pytorch_2.0/README.md @@ -0,0 +1,5 @@ +# 以下软件包需联系沐曦获取 + +>联系邮箱: shengchu.zhao@metax-tech.com + +docker image: maca-2.18.0.8-ubuntu18.04-amd64 diff --git a/training/metax/docker_image/pytorch_2.0/pytorch_install.sh b/training/metax/docker_image/pytorch_2.0/pytorch_install.sh new file mode 100644 index 000000000..a9bf588e2 --- /dev/null +++ b/training/metax/docker_image/pytorch_2.0/pytorch_install.sh @@ -0,0 +1 @@ +#!/bin/bash diff --git a/training/metax/faster_rcnn-pytorch/README.md b/training/metax/faster_rcnn-pytorch/README.md new file mode 100644 index 000000000..0e892eda5 --- /dev/null +++ b/training/metax/faster_rcnn-pytorch/README.md @@ -0,0 +1,59 @@ +### 模型backbone权重下载 +[模型backbone权重下载](../../benchmarks/faster_rcnn) + +这一部分路径在FlagPerf/training/benchmarks/faster_rcnn/pytorch/model/\_\_init__.py中提供: + +```python +torchvision.models.resnet.ResNet50_Weights.IMAGENET1K_V1.value.url = 'https://download.pytorch.org/models/resnet50-0676ba61.pth' +``` +本case中默认配置为,从官网同路径(0676ba61)自动下载backbone权重。用户如需手动指定,可自行下载至被挂载到容器内的路径下,并于此处修改路径为"file://"+download_path + +### 测试数据集下载 + +[测试数据集下载](https://cocodataset.org/) + +### 沐曦集成电路 C500 GPU配置与运行信息参考 +#### 环境配置 +- ##### 硬件环境 + - 机器、加速卡型号: 曦云®C500 64G + - 多机网络类型、带宽: InfiniBand,2x200 Gb/s + +- ##### 软件环境 + - OS版本:Ubuntu 20.04.6 + - OS kernel版本: 5.4.0-26-generic + - 加速卡驱动版本:2.2.0 + - Docker 版本:24.0.7 + - 训练框架版本:pytorch-2.0.0+mc2.18.0.8-cp38-cp38-linux_x86_64.whl + - 依赖软件版本:无 + + + + +* 通用指标 + +| 指标名称 | 指标值 | 特殊说明 | +| -------------- | ----------------------- | ------------------------------------------- | +| 任务类别 | 图像目标检测 | | +| 模型 | fasterRCNN | | +| 数据集 | coco2017 | | +| 数据精度 | precision,见“性能指标” | 可选fp32/amp/fp16 | +| 超参修改 | fix_hp,见“性能指标” | 跑满硬件设备评测吞吐量所需特殊超参 | +| 硬件设备简称 | MXC500 | | +| 硬件存储使用 | mem,见“性能指标” | 通常称为“显存”,单位为GiB | +| 端到端时间 | e2e_time,见“性能指标” | 总时间+Perf初始化等时间 | +| 总吞吐量 | p_whole,见“性能指标” | 实际训练图片数除以总时间(performance_whole) | +| 训练吞吐量 | p_train,见“性能指标” | 不包含每个epoch末尾的评估部分耗时 | +| **计算吞吐量** | **p_core,见“性能指标”** | 不包含数据IO部分的耗时(p3>p2>p1) | +| 训练结果 | map,见“性能指标” | 单位为平均目标检测正确率 | +| 额外修改项 | 无 | | + + +* 性能指标 + +| 配置 | precision | fix_hp | e2e_time | p_whole | p_train | p_core | map | mem | +| --------------------- | --------- | ------------ | -------- | ------- | ------- | ------ | --- | --- | +| MXC500 单机8卡(1x8) | fp32 | / | | | | | |9.9/64 | +| MXC500单机8卡(1x8) | fp32 | bs=16,lr=0.16 | | | | |36.7%|44.5/64 | +| MXC500 单机单卡(1x1)| fp32 | / | / | | | | | 31.8/64 | +| MXC500 两机8卡(2x8) | fp32 | / | / | | | | | 44.3/64 | + diff --git a/training/metax/faster_rcnn-pytorch/config/config_C500x1x1.py b/training/metax/faster_rcnn-pytorch/config/config_C500x1x1.py new file mode 100644 index 000000000..c11690f00 --- /dev/null +++ b/training/metax/faster_rcnn-pytorch/config/config_C500x1x1.py @@ -0,0 +1,4 @@ +vendor: str = "metax" +train_batch_size = 16 +eval_batch_size = 16 +lr = 0.16 \ No newline at end of file diff --git a/training/metax/faster_rcnn-pytorch/config/config_C500x1x8.py b/training/metax/faster_rcnn-pytorch/config/config_C500x1x8.py new file mode 100644 index 000000000..842eda4bc --- /dev/null +++ b/training/metax/faster_rcnn-pytorch/config/config_C500x1x8.py @@ -0,0 +1,3 @@ +vendor: str = "metax" +train_batch_size = 2 +eval_batch_size = 2 diff --git a/training/metax/faster_rcnn-pytorch/config/config_C500x2x8.py b/training/metax/faster_rcnn-pytorch/config/config_C500x2x8.py new file mode 100644 index 000000000..07128ad72 --- /dev/null +++ b/training/metax/faster_rcnn-pytorch/config/config_C500x2x8.py @@ -0,0 +1,4 @@ +vendor: str = "metax" +train_batch_size = 16 +eval_batch_size = 16 +lr = 0.08 diff --git a/training/metax/faster_rcnn-pytorch/config/requirements.txt b/training/metax/faster_rcnn-pytorch/config/requirements.txt new file mode 100644 index 000000000..cc8d4dc61 --- /dev/null +++ b/training/metax/faster_rcnn-pytorch/config/requirements.txt @@ -0,0 +1,3 @@ +pycocotools +numpy +tqdm \ No newline at end of file diff --git a/training/metax/faster_rcnn-pytorch/extern/.gitkeep b/training/metax/faster_rcnn-pytorch/extern/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/training/metax/metax_monitor.py b/training/metax/metax_monitor.py new file mode 100644 index 000000000..a8d745822 --- /dev/null +++ b/training/metax/metax_monitor.py @@ -0,0 +1,288 @@ +# !/usr/bin/env python3 +# encoding: utf-8 +''' +Usage: python3 sys-monitor.py -o operation -l [log_path] + -o, --operation start|stop|restart|status + -l, --log log path , ./logs/ default +''' + +import os +import sys +import time +import signal +import atexit +import argparse +import datetime +from multiprocessing import Process +import subprocess +import schedule + + +class Daemon: + ''' + daemon subprocess class. + usage: subclass this daemon and override the run() method. + sys-monitor.pid: in the /tmp/, auto del when unexpected exit. + verbose: debug mode, disabled default. + ''' + + def __init__(self, + pid_file, + log_file, + err_file, + gpu_log, + log_path, + rate=5, + stdin=os.devnull, + stdout=os.devnull, + stderr=os.devnull, + home_dir='.', + umask=0o22, + verbose=0): + self.stdin = stdin + self.stdout = stdout + self.stderr = stderr + self.home_dir = home_dir + self.verbose = verbose + self.pidfile = pid_file + self.logfile = log_file + self.errfile = err_file + self.gpufile = gpu_log + self.logpath = log_path + self.rate = rate + self.umask = umask + self.verbose = verbose + self.daemon_alive = True + + def get_pid(self): + try: + with open(self.pidfile, 'r') as pf: + pid = int(pf.read().strip()) + except IOError: + pid = None + except SystemExit: + pid = None + return pid + + def del_pid(self): + if os.path.exists(self.pidfile): + os.remove(self.pidfile) + + def run(self): + ''' + NOTE: override the method in subclass + ''' + + def gpu_mon(file): + TIMESTAMP = datetime.datetime.now().strftime('%Y-%m-%d-%H:%M:%S') + cmd = "mx-smi"# |grep 'Default'|awk '{print $3,$5,$9,$11,$13}'" + process = subprocess.Popen(cmd, + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + encoding='utf-8') + try: + out = process.communicate(timeout=10) + except subprocess.TimeoutExpired: + process.kill() + out = process.communicate() + + if process.returncode != 0: + result = "error" + result = TIMESTAMP + "\n" + out[0] + "\n" + with open(file, 'a') as f: + f.write(result) + + def timer_gpu_mon(): + gpu_process = Process(target=gpu_mon, args=(self.gpufile, )) + gpu_process.start() + + schedule.every(self.rate).seconds.do(timer_gpu_mon) + while True: + schedule.run_pending() + time.sleep(5) + + def daemonize(self): + if self.verbose >= 1: + print('daemon process starting ...') + try: + pid = os.fork() + if pid > 0: + sys.exit(0) + except OSError as e: + sys.stderr.write('fork #1 failed: %d (%s)\n' % + (e.errno, e.strerror)) + sys.exit(1) + os.chdir(self.home_dir) + os.setsid() + os.umask(self.umask) + try: + pid = os.fork() + if pid > 0: + sys.exit(0) + except OSError as e: + sys.stderr.write('fork #2 failed: %d (%s)\n' % + (e.errno, e.strerror)) + sys.exit(1) + sys.stdout.flush() + sys.stderr.flush() + si = open(self.stdin, 'r') + so = open(self.stdout, 'a+') + if self.stderr: + se = open(self.stderr, 'a+') + else: + se = so + os.dup2(si.fileno(), sys.stdin.fileno()) + os.dup2(so.fileno(), sys.stdout.fileno()) + os.dup2(se.fileno(), sys.stderr.fileno()) + atexit.register(self.del_pid) + pid = str(os.getpid()) + with open(self.pidfile, 'w+') as f: + f.write('%s\n' % pid) + + def start(self): + if not os.path.exists(self.logpath): + os.makedirs(self.logpath) + elif os.path.exists(self.gpufile): + os.remove(self.gpufile) + if self.verbose >= 1: + print('ready to start ......') + # check for a pid file to see if the daemon already runs + pid = self.get_pid() + if pid: + msg = 'pid file %s already exists, is it already running?\n' + sys.stderr.write(msg % self.pidfile) + sys.exit(1) + # start the daemon + self.daemonize() + self.run() + + def stop(self): + if self.verbose >= 1: + print('stopping ...') + pid = self.get_pid() + if not pid: + msg = 'pid file [%s] does not exist. Not running?\n' % self.pidfile + sys.stderr.write(msg) + if os.path.exists(self.pidfile): + os.remove(self.pidfile) + return + # try to kill the daemon process + try: + i = 0 + while 1: + os.kill(pid, signal.SIGTERM) + time.sleep(1) + i = i + 1 + if i % 10 == 0: + os.kill(pid, signal.SIGHUP) + except OSError as err: + err = str(err) + if err.find('No such process') > 0: + if os.path.exists(self.pidfile): + os.remove(self.pidfile) + else: + print(str(err)) + sys.exit(1) + if self.verbose >= 1: + print('Stopped!') + + def restart(self): + self.stop() + self.start() + + def status(self): + pid = self.get_pid() + if pid: + if os.path.exists('/proc/%d' % pid): + return pid + return False + + +def parse_args(): + ''' Check script input parameter. ''' + parse = argparse.ArgumentParser(description='Sys monitor script') + parse.add_argument('-o', + type=str, + metavar='[operation]', + required=True, + help='start|stop|restart|status') + parse.add_argument('-l', + type=str, + metavar='[log_path]', + required=False, + default='./logs/', + help='log path') + args = parse.parse_args() + return args + + +def get_system_info(): + cmd = r"echo OS version:;" + cmd = cmd + r"cat /etc/issue | head -n1 | awk '{print $1, $2, $3}';" + cmd = cmd + r"echo ;" + + cmd = cmd + r"echo OS Kernel version:;" + cmd = cmd + r"uname -r;" + cmd = cmd + r"echo ;" + + cmd = cmd + r"echo Hardware Model:;" + cmd = cmd + r"sudo dmidecode | grep -A9 'System Information' | tail -n +2 | sed 's/^[ \t]*//';" + cmd = cmd + r"echo ;" + + cmd = cmd + r"echo Accelerator Model:;" + cmd = cmd + r"mx-smi -L;" + cmd = cmd + r"echo ;" + + cmd = cmd + r"echo Accelerator Driver version:;" + cmd = cmd + r"mx-smi | grep 'Driver Version' | awk '{print $3}';" + cmd = cmd + r"echo ;" + + cmd = cmd + r"echo Docker version:;" + cmd = cmd + r"docker -v" + + return cmd + + +def main(): + sample_rate1 = 5 + args = parse_args() + operation = args.o + log_path = args.l + pid_fn = str('/tmp/gpu_monitor.pid') + log_fn = str(log_path + '/mx-smi_monitor.log') + err_fn = str(log_path + '/mx-smi_monitor.err') + # result for gpu + gpu_fn = str(log_path + '/mx-smi_monitor.log') + + subdaemon = Daemon(pid_fn, + log_fn, + err_fn, + gpu_fn, + log_path, + verbose=1, + rate=sample_rate1) + if operation == 'start': + sys_fn = os.path.join(log_path, 'sys_info.log') + cmd = get_system_info() + with open(sys_fn, "w") as f: + p = subprocess.Popen(cmd, shell=True, stdout=f, stderr=subprocess.STDOUT) + p.wait() + subdaemon.start() + elif operation == 'stop': + subdaemon.stop() + elif operation == 'restart': + subdaemon.restart() + elif operation == 'status': + pid = subdaemon.status() + if pid: + print('process [%s] is running ......' % pid) + else: + print('daemon process [%s] stopped' % pid) + else: + print("invalid argument!") + sys.exit(1) + + +if __name__ == '__main__': + main() diff --git a/training/run_benchmarks/config/test_conf.py b/training/run_benchmarks/config/test_conf.py index a2fca20d3..400119015 100644 --- a/training/run_benchmarks/config/test_conf.py +++ b/training/run_benchmarks/config/test_conf.py @@ -1,7 +1,7 @@ '''Test Configs, including''' # -*-coding:utf-8 -*- -# Set accelerator's vendor name, e.g. iluvatar, cambricon, kunlunxin, ascend and mthreads. +# Set accelerator's vendor name, e.g. iluvatar, cambricon, kunlunxin, ascend, mthreads and metax. # We will run benchmarks in training/ VENDOR = "nvidia" @@ -21,6 +21,8 @@ # -v /usr/local/Ascend/driver -v /usr/local/dcmi -v /usr/local/bin/npu-smi" # mthreads: # " --env MTHREADS_VISIBLE_DEVICES=all" +# metax: +# " --device=/dev/dri --device=/dev/mxcd --group-add video" ACCE_CONTAINER_OPT = " --gpus all" # XXX_VISIBLE_DEVICE item name in env # possible value of ACCE_VISIBLE_DEVICE_ENV_NAME are: @@ -148,4 +150,6 @@ # "transformer:pytorch:BI-V100:1:8:1": "/raid/dataset/transformer/wmt14_en_de_joined_dict", # "bert_hf:pytorch:BI-V100:1:8:1": "/raid/dataset/bert_hf_train", + # metax cases + # "faster_rcnn:C500:pytorch_2.0:1:8:1": "/dataset/coco2017/", } diff --git a/training/run_benchmarks/run.py b/training/run_benchmarks/run.py index 96f1f508e..a846ce2b1 100644 --- a/training/run_benchmarks/run.py +++ b/training/run_benchmarks/run.py @@ -288,9 +288,12 @@ def start_tasks_in_cluster(dp_path, container_name, case_config, base_args, count, curr_log_path): '''Start tasks in cluster, and NOT wait.''' nnodes = case_config["nnodes"] + framework_sub_path = case_config["framework"] + if "_" in framework_sub_path: + framework_sub_path = framework_sub_path.split("_")[0] env_file = os.path.join( tc.FLAGPERF_PATH, tc.VENDOR, - case_config["model"] + "-" + case_config["framework"], + case_config["model"] + "-" + framework_sub_path, "config/environment_variables.sh") framework = case_config["framework"].split("_")[0] if (os.path.isfile(env_file)):