Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[aarch64] Add CUDA 12.4 build script for ARM wheel #1775

Merged
merged 5 commits into from
Apr 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions .github/workflows/build-manywheel-images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ on:
- .github/workflows/build-manywheel-images.yml
- manywheel/Dockerfile
- manywheel/Dockerfile_aarch64
- manywheel/Dockerfile_cuda_aarch64
- manywheel/Dockerfile_cxx11-abi
- manywheel/build_docker.sh
- 'common/*'
Expand All @@ -21,6 +22,7 @@ on:
- .github/workflows/build-manywheel-images.yml
- manywheel/Dockerfile
- manywheel/Dockerfile_aarch64
- manywheel/Dockerfile_cuda_aarch64
- manywheel/Dockerfile_cxx11-abi
- 'common/*'
- manywheel/build_docker.sh
Expand Down Expand Up @@ -54,6 +56,25 @@ jobs:
- name: Build Docker Image
run: |
manywheel/build_docker.sh
build-docker-cuda-aarch64:
runs-on: linux.arm64.2xlarge
strategy:
matrix:
cuda_version: ["12.4"]
env:
GPU_ARCH_TYPE: cuda-aarch64
GPU_ARCH_VERSION: ${{ matrix.cuda_version }}
steps:
- name: Checkout PyTorch
uses: actions/checkout@v3
- name: Authenticate if WITH_PUSH
run: |
if [[ "${WITH_PUSH}" == true ]]; then
echo "${DOCKER_TOKEN}" | docker login -u "${DOCKER_ID}" --password-stdin
fi
- name: Build Docker Image
run: |
manywheel/build_docker.sh
build-docker-rocm:
runs-on: linux.12xlarge
strategy:
Expand Down
8 changes: 7 additions & 1 deletion aarch64_linux/aarch64_ci_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,10 @@ cd /
git config --global --add safe.directory /pytorch
pip install -r /pytorch/requirements.txt
pip install auditwheel
python /builder/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn
if [ -n "$GPU_ARCH_VERSION" ]; then
echo "BASE_CUDA_VERSION is set to: $GPU_ARCH_VERSION"
python /builder/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda
else
echo "BASE_CUDA_VERSION is not set."
python /builder/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn
fi
180 changes: 139 additions & 41 deletions aarch64_linux/aarch64_wheel_ci_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,103 +9,201 @@


def list_dir(path: str) -> List[str]:
''''
"""'
Helper for getting paths for Python
'''
"""
return check_output(["ls", "-1", path]).decode().split("\n")


def build_ArmComputeLibrary() -> None:
'''
"""
Using ArmComputeLibrary for aarch64 PyTorch
'''
print('Building Arm Compute Library')
acl_build_flags=["debug=0", "neon=1", "opencl=0", "os=linux", "openmp=1", "cppthreads=0",
"arch=armv8a", "multi_isa=1", "fixed_format_kernels=1", "build=native"]
acl_install_dir="/acl"
acl_checkout_dir="ComputeLibrary"
"""
print("Building Arm Compute Library")
acl_build_flags = [
"debug=0",
"neon=1",
"opencl=0",
"os=linux",
"openmp=1",
"cppthreads=0",
"arch=armv8a",
"multi_isa=1",
"fixed_format_kernels=1",
"build=native",
]
acl_install_dir = "/acl"
acl_checkout_dir = "ComputeLibrary"
os.makedirs(acl_install_dir)
check_call(["git", "clone", "https://github.com/ARM-software/ComputeLibrary.git", "-b", "v23.08",
"--depth", "1", "--shallow-submodules"])
check_call(["scons", "Werror=1", "-j8", f"build_dir=/{acl_install_dir}/build"] + acl_build_flags,
cwd=acl_checkout_dir)
check_call(
[
"git",
"clone",
"https://github.com/ARM-software/ComputeLibrary.git",
"-b",
"v23.08",
"--depth",
"1",
"--shallow-submodules",
]
)
check_call(
["scons", "Werror=1", "-j8", f"build_dir=/{acl_install_dir}/build"]
+ acl_build_flags,
cwd=acl_checkout_dir,
)
for d in ["arm_compute", "include", "utils", "support", "src"]:
shutil.copytree(f"{acl_checkout_dir}/{d}", f"{acl_install_dir}/{d}")


def update_wheel(wheel_path) -> None:
"""
Update the cuda wheel libraries
"""
folder = os.path.dirname(wheel_path)
wheelname = os.path.basename(wheel_path)
os.mkdir(f"{folder}/tmp")
os.system(f"unzip {wheel_path} -d {folder}/tmp")
libs_to_copy = [
"/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12",
"/usr/local/cuda/lib64/libcudnn.so.8",
"/usr/local/cuda/lib64/libcublas.so.12",
"/usr/local/cuda/lib64/libcublasLt.so.12",
"/usr/local/cuda/lib64/libcudart.so.12",
"/usr/local/cuda/lib64/libcufft.so.11",
"/usr/local/cuda/lib64/libcusparse.so.12",
"/usr/local/cuda/lib64/libcusparseLt.so.0",
"/usr/local/cuda/lib64/libcusolver.so.11",
"/usr/local/cuda/lib64/libcurand.so.10",
"/usr/local/cuda/lib64/libnvToolsExt.so.1",
"/usr/local/cuda/lib64/libnvJitLink.so.12",
"/usr/local/cuda/lib64/libnvrtc.so.12",
"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.4",
"/usr/local/cuda/lib64/libcudnn_adv_infer.so.8",
"/usr/local/cuda/lib64/libcudnn_adv_train.so.8",
"/usr/local/cuda/lib64/libcudnn_cnn_infer.so.8",
"/usr/local/cuda/lib64/libcudnn_cnn_train.so.8",
"/usr/local/cuda/lib64/libcudnn_ops_infer.so.8",
"/usr/local/cuda/lib64/libcudnn_ops_train.so.8",
"/opt/conda/envs/aarch64_env/lib/libopenblas.so.0",
tinglvv marked this conversation as resolved.
Show resolved Hide resolved
"/opt/conda/envs/aarch64_env/lib/libgfortran.so.5",
"/opt/conda/envs/aarch64_env/lib/libgomp.so.1",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

currently the scripts are packaging libomp.so
did you check the inference performance for any models? is there any performance difference observed with libgomp vs libomp in the current wheels?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm observing around 10% performance drop for eager mode inference with libgomp compared to libomp.
if you don't have a strong preference, I suggest keeping libomp till we know better.
for more details, check my comment here: #1774 (comment)

Copy link
Contributor

@Aidyn-A Aidyn-A Apr 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please a follow-up on libgomp to libomp migration in your PR #1781, as it is not trivial change and certainly out of scope of this PR. I would like to underline that this PR is for enabling CUDA. The libgomp has been used with PyTorch for long time: it is reliable and nothing is wrong with it functionality-wise. Moreover, I do not want Ting to waste her time on debugging dependencies due to libomp in this PR.

Copy link
Contributor

@snadampal snadampal Apr 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @Aidyn-A , currently the aarch64 wheels are linked to libomp not libgomp.
https://pypi.org/project/torch/#files

My point was why to change it now? without having a strong reason.

I have another PR to switch wheels from libomp to libgomp but is currently blocked due to the 10% regression.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From what I am seeing, it comes with:

/usr/local/lib/python3.8/dist-packages/torch.libs/libgomp-0f9e2209.so.1.0.0

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I hope you are checking either torch 2.2 or nightly aarch64-linux wheel, because I am seeing,
libomp-b8e5bcfb.so => /home/ubuntu/.local/lib/python3.10/site-packages/torch/lib/./../../torch.libs/libomp-b8e5bcfb.so (0x0000ffffa8c30000)

complete list:

	linux-vdso.so.1 (0x0000ffffb298d000)
	libtorch_cpu.so => /home/ubuntu/.local/lib/python3.10/site-packages/torch/lib/./libtorch_cpu.so (0x0000ffffaa960000)
	libgcc_s.so.1 => /lib/aarch64-linux-gnu/libgcc_s.so.1 (0x0000ffffaa930000)
	libc.so.6 => /lib/aarch64-linux-gnu/libc.so.6 (0x0000ffffaa780000)
	/lib/ld-linux-aarch64.so.1 (0x0000ffffb2954000)
	libc10.so => /home/ubuntu/.local/lib/python3.10/site-packages/torch/lib/./libc10.so (0x0000ffffaa680000)
	librt.so.1 => /lib/aarch64-linux-gnu/librt.so.1 (0x0000ffffaa660000)
	libdl.so.2 => /lib/aarch64-linux-gnu/libdl.so.2 (0x0000ffffaa640000)
	libopenblasp-r0-f658af2e.3.25.so => /home/ubuntu/.local/lib/python3.10/site-packages/torch/lib/./../../torch.libs/libopenblasp-r0-f658af2e.3.25.so (0x0000ffffa8e10000)
	libm.so.6 => /lib/aarch64-linux-gnu/libm.so.6 (0x0000ffffa8d70000)
	libomp-b8e5bcfb.so => /home/ubuntu/.local/lib/python3.10/site-packages/torch/lib/./../../torch.libs/libomp-b8e5bcfb.so (0x0000ffffa8c30000)
	libpthread.so.0 => /lib/aarch64-linux-gnu/libpthread.so.0 (0x0000ffffa8c10000)
	libarm_compute-7362313d.so => /home/ubuntu/.local/lib/python3.10/site-packages/torch/lib/./../../torch.libs/libarm_compute-7362313d.so (0x0000ffffa8170000)
	libarm_compute_graph-15f701fb.so => /home/ubuntu/.local/lib/python3.10/site-packages/torch/lib/./../../torch.libs/libarm_compute_graph-15f701fb.so (0x0000ffffa8030000)
	libarm_compute_core-0793f69d.so => /home/ubuntu/.local/lib/python3.10/site-packages/torch/lib/./../../torch.libs/libarm_compute_core-0793f69d.so (0x0000ffffa7fe0000)
	libstdc++.so.6 => /lib/aarch64-linux-gnu/libstdc++.so.6 (0x0000ffffa7db0000)
	libgfortran-105e6576.so.5.0.0 => /home/ubuntu/.local/lib/python3.10/site-packages/torch/lib/./../../torch.libs/libgfortran-105e6576.so.5.0.0 (0x0000ffffa7c50000)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

btw, libomp wasn't intentionally chosen for aarch64-linux wheels, I think it was picked up from the conda environment. If we all agree that libgomp is what is recommended for PyTorch, then I'm fine with switching to it now. In fact I've already suggested moving to gnu omp (#1774) but waiting mainly because of the 10% regressions with it for the eager mode.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now we have more data on libgomp vs libomp (please check #1774 (comment)), and i'm fine with switching to libgomp for aarch64 linux.

"/acl/build/libarm_compute.so",
"/acl/build/libarm_compute_graph.so",
"/acl/build/libarm_compute_core.so",
]
# Copy libraries to unzipped_folder/a/lib
for lib_path in libs_to_copy:
lib_name = os.path.basename(lib_path)
shutil.copy2(lib_path, f"{folder}/tmp/torch/lib/{lib_name}")
os.system(
f"cd {folder}/tmp/torch/lib/; patchelf --set-rpath '$ORIGIN' {folder}/tmp/torch/lib/libtorch_cuda.so"
)
os.mkdir(f"{folder}/cuda_wheel")
os.system(f"cd {folder}/tmp/; zip -r {folder}/cuda_wheel/{wheelname} *")
shutil.move(
f"{folder}/cuda_wheel/{wheelname}",
f"/dist/{wheelname}",
copy_function=shutil.copy2,
)
os.system(f"rm -rf {folder}/tmp {folder}/dist/cuda_wheel/")


def complete_wheel(folder: str) -> str:
'''
"""
Complete wheel build and put in artifact location
'''
"""
wheel_name = list_dir(f"/{folder}/dist")[0]

if "pytorch" in folder:
if "pytorch" in folder and not enable_cuda:
print("Repairing Wheel with AuditWheel")
check_call(["auditwheel","repair", f"dist/{wheel_name}"], cwd=folder)
check_call(["auditwheel", "repair", f"dist/{wheel_name}"], cwd=folder)
repaired_wheel_name = list_dir(f"/{folder}/wheelhouse")[0]

print(f"Moving {repaired_wheel_name} wheel to /{folder}/dist")
os.rename(f"/{folder}/wheelhouse/{repaired_wheel_name}", f"/{folder}/dist/{repaired_wheel_name}")
os.rename(
f"/{folder}/wheelhouse/{repaired_wheel_name}",
f"/{folder}/dist/{repaired_wheel_name}",
)
else:
repaired_wheel_name = wheel_name

print(f"Copying {repaired_wheel_name} to artfacts")
shutil.copy2(f"/{folder}/dist/{repaired_wheel_name}", f"/artifacts/{repaired_wheel_name}")
print(f"Copying {repaired_wheel_name} to artifacts")
shutil.copy2(
f"/{folder}/dist/{repaired_wheel_name}", f"/artifacts/{repaired_wheel_name}"
)

return repaired_wheel_name


def parse_arguments():
'''
"""
Parse inline arguments
'''
"""
from argparse import ArgumentParser

parser = ArgumentParser("AARCH64 wheels python CD")
parser.add_argument("--debug", action="store_true")
parser.add_argument("--build-only", action="store_true")
parser.add_argument("--test-only", type=str)
parser.add_argument("--enable-mkldnn", action="store_true")
parser.add_argument("--enable-cuda", action="store_true")
return parser.parse_args()


if __name__ == '__main__':
'''
if __name__ == "__main__":
"""
Entry Point
'''
"""
args = parse_arguments()
enable_mkldnn = args.enable_mkldnn
repo = Repository('/pytorch')
enable_cuda = args.enable_cuda
repo = Repository("/pytorch")
branch = repo.head.name
if branch == 'HEAD':
branch = 'master'

if branch == "HEAD":
branch = "master"

print('Building PyTorch wheel')
print("Building PyTorch wheel")
build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
os.system("python setup.py clean")

override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION")
if override_package_version is not None:
version = override_package_version
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version} PYTORCH_BUILD_NUMBER=1 "
elif branch in ['nightly', 'master']:
build_date = check_output(['git', 'log', '--pretty=format:%cs', '-1'], cwd='/pytorch').decode().replace('-', '')
version = check_output(['cat', 'version.txt'], cwd='/pytorch').decode().strip()[:-2]
build_vars += (
f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version} PYTORCH_BUILD_NUMBER=1 "
)
elif branch in ["nightly", "master"]:
build_date = (
check_output(["git", "log", "--pretty=format:%cs", "-1"], cwd="/pytorch")
.decode()
.replace("-", "")
)
version = (
check_output(["cat", "version.txt"], cwd="/pytorch").decode().strip()[:-2]
)
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1 "
elif branch.startswith(("v1.", "v2.")):
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1 "

if enable_mkldnn:
build_ArmComputeLibrary()
print("build pytorch with mkldnn+acl backend")
build_vars += "USE_MKLDNN=ON USE_MKLDNN_ACL=ON " \
"ACL_ROOT_DIR=/acl " \
"LD_LIBRARY_PATH=/pytorch/build/lib:/acl/build:$LD_LIBRARY_PATH " \
"ACL_INCLUDE_DIR=/acl/build " \
"ACL_LIBRARY=/acl/build "
build_vars += (
"USE_MKLDNN=ON USE_MKLDNN_ACL=ON "
"ACL_ROOT_DIR=/acl "
"LD_LIBRARY_PATH=/pytorch/build/lib:/acl/build:$LD_LIBRARY_PATH "
"ACL_INCLUDE_DIR=/acl/build "
"ACL_LIBRARY=/acl/build "
)
else:
print("build pytorch without mkldnn backend")

os.system(f"cd /pytorch; {build_vars} python3 setup.py bdist_wheel")
tinglvv marked this conversation as resolved.
Show resolved Hide resolved
pytorch_wheel_name = complete_wheel("pytorch")
print(f"Build Compelete. Created {pytorch_wheel_name}..")
if enable_cuda:
print("Updating Cuda Dependency")
filename = os.listdir("/pytorch/dist/")
wheel_path = f"/pytorch/dist/{filename[0]}"
update_wheel(wheel_path)
pytorch_wheel_name = complete_wheel("/pytorch/")
print(f"Build Complete. Created {pytorch_wheel_name}..")
90 changes: 90 additions & 0 deletions common/install_cuda_aarch64.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#!/bin/bash

set -ex

function install_cusparselt_052 {
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
mkdir tmp_cusparselt && pushd tmp_cusparselt
wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-sbsa/libcusparse_lt-linux-sbsa-0.5.2.1-archive.tar.xz
tar xf libcusparse_lt-linux-sbsa-0.5.2.1-archive.tar.xz
cp -a libcusparse_lt-linux-sbsa-0.5.2.1-archive/include/* /usr/local/cuda/include/
cp -a libcusparse_lt-linux-sbsa-0.5.2.1-archive/lib/* /usr/local/cuda/lib64/
popd
rm -rf tmp_cusparselt
}

function install_124 {
echo "Installing CUDA 12.4 and cuDNN 8.9 and NCCL 2.20.5 and cuSparseLt-0.5.2"
rm -rf /usr/local/cuda-12.4 /usr/local/cuda
# install CUDA 12.4.0 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux_sbsa.run
chmod +x cuda_12.4.0_550.54.14_linux_sbsa.run
./cuda_12.4.0_550.54.14_linux_sbsa.run --toolkit --silent
rm -f cuda_12.4.0_550.54.14_linux_sbsa.run
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.4 /usr/local/cuda

# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
mkdir tmp_cudnn && cd tmp_cudnn
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-8.9.2.26_cuda12-archive.tar.xz -O cudnn-linux-sbsa-8.9.2.26_cuda12-archive.tar.xz
tar xf cudnn-linux-sbsa-8.9.2.26_cuda12-archive.tar.xz
cp -a cudnn-linux-sbsa-8.9.2.26_cuda12-archive/include/* /usr/local/cuda/include/
cp -a cudnn-linux-sbsa-8.9.2.26_cuda12-archive/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf tmp_cudnn

# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
git clone -b v2.20.5-1 --depth 1 https://github.com/NVIDIA/nccl.git
cd nccl && make -j src.build
cp -a build/include/* /usr/local/cuda/include/
cp -a build/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf nccl

install_cusparselt_052

ldconfig
}

function prune_124 {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What does NVPrune do?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nWEIdia it removes GPU architectures from libraries, which are never used to lower the binary size. This workflow can be dangerous if libraries depend on heuristics and can select kernels from the same GPU family (we've seen issues before where sm_61 was dropped causing all kinds of issues on GTX cards).
I don't think the pruning is useful anymore, as we are using the CUDA dependencies from PyPI now.
However, we might want to keep it here and follow up with a cleanup in a separate PR.

echo "Pruning CUDA 12.4"
#####################################################################################
# CUDA 12.4 prune static libs
#####################################################################################
export NVPRUNE="/usr/local/cuda-12.4/bin/nvprune"
export CUDA_LIB_DIR="/usr/local/cuda-12.4/lib64"

export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"

if [[ -n "$OVERRIDE_GENCODE" ]]; then
export GENCODE=$OVERRIDE_GENCODE
fi

# all CUDA libs except CuDNN and CuBLAS
ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \
| xargs -I {} bash -c \
"echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"

# prune CuDNN and CuBLAS
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a

#####################################################################################
# CUDA 12.1 prune visual tools
#####################################################################################
export CUDA_BASE="/usr/local/cuda-12.4/"
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.1.0 $CUDA_BASE/nsight-systems-2023.4.4/
}

# idiomatic parameter and option handling in sh
while test $# -gt 0
do
case "$1" in
12.4) install_124; prune_124
;;
*) echo "bad argument $1"; exit 1
;;
esac
shift
done
Loading
Loading