hidet-org · vadiklyutiy · Jul 23, 2024 · Apr 3, 2024 · Apr 5, 2024 · Apr 5, 2024
diff --git a/.github/Dockerfile b/.github/Dockerfile
@@ -1,4 +1,4 @@
-FROM nvcr.io/nvidia/pytorch:23.10-py3
+FROM nvcr.io/nvidia/pytorch:24.04-py3
 ADD ./hidet /workspace/hidet
 ADD ./models /workspace/models
 WORKDIR /workspace
@@ -9,4 +9,4 @@ RUN pip install -r hidet/requirements.txt && \
     WHEEL=$(find hidet/scripts/wheel/built_wheel -maxdepth 1 -name '*.whl') && \
     pip install --no-deps --force-reinstall $WHEEL && \
     pip install -e models && \
-    hidet cache clear --all
+    hidet cache clear --all
diff --git a/.github/actions/setup-hidet/action.yaml b/.github/actions/setup-hidet/action.yaml
@@ -0,0 +1,27 @@
+name: 'Setup Hidet'
+description: 'Install dependencies, build and install wheel'
+runs:
+  using: "composite"
+  steps:
+    - name: Install dependencies via pip
+      shell: bash
+      run: |
+        python -m pip install --upgrade pip
+        pip install torch torchvision torchaudio 
+        pip install -r requirements.txt
+        pip install -r requirements-dev.txt
+
+    - name: Build hidet
+      shell: bash
+      run: |
+        bash scripts/wheel/build_wheel.sh
+        WHEEL=$(find ./scripts/wheel/built_wheel -maxdepth 1 -name '*.whl')
+        echo $WHEEL
+        echo "WHEEL_NAME=$WHEEL" >> $GITHUB_ENV
+
+    - name: Install hidet
+      shell: bash
+      env:
+        WHEEL_NAME: ${{ env.WHEEL_NAME }}
+      run: |
+        pip install --no-deps --force-reinstall $WHEEL_NAME
diff --git a/.github/scripts/set_test_matrix.py b/.github/scripts/set_test_matrix.py
@@ -0,0 +1,38 @@
+"""
+Sets the strategy matrix for the functional ci tests.
+This mimics the discovery strategy used by pytest for files inside the tests/ folder
+and shards them based on the top level parent folders. 
+
+Expects to be executed in a GHA envirionment, with GITHUB_OUTPUT context available.
+"""
+import glob
+import json
+import os
+from pathlib import Path
+
+patterns = ('test_*.py', '*_test.py') # the tuple of file types
+files_matched = []
+for pattern in patterns:
+    files_matched.extend(glob.glob(f"tests/**/{pattern}", recursive=True))
+
+testing_paths = []
+for path in files_matched:
+    current_path = Path(path)
+    testing_paths.append("/".join(current_path.parts[:2]))
+
+include = []
+
+for path in list(set(testing_paths)):
+    include.append({
+        "path": path
+    })
+
+matrix = {
+    "include": include
+}
+
+matrix_str = json.dumps(matrix)
+name = 'matrix'
+value = matrix_str
+with open(os.environ['GITHUB_OUTPUT'], 'a') as fh:
+    print(f'{name}={value}', file=fh)
diff --git a/.github/scripts/start_instances.py b/.github/scripts/start_instances.py
@@ -18,6 +18,7 @@ def run_command(cmd):
 
     # e.g., ' 1, 2, ,3,,' -> ['1', '2', '3']
     hw_config_ids = os.environ.get('HW_CONFIG').replace(' ', '')
+    hw_config_ids = '2'
     repo_org = os.environ.get('REPO_NAME').split('/')[0]
     if hw_config_ids == 'all':
         query = (
@@ -96,14 +97,19 @@ def run_command(cmd):
 
     # Start all instances
     for instance in instances:
-        cloud_provider_id, instance_id, _ = instance
-        if cloud_provider_id == 1: # AWS
-            cmd = ['aws', 'ec2', 'start-instances', '--instance-ids', instance_id]
-        elif cloud_provider_id == 2: # Always on, no need to launch. Do Nothing.
-            cmd = ['true']
-        else:
-            raise ValueError(f'Unknown cloud provider id: {cloud_provider_id}')
-        output = run_command(cmd)
+        for i in range(300):
+            cloud_provider_id, instance_id, _ = instance
+            if cloud_provider_id == 1: # AWS
+                cmd = ['aws', 'ec2', 'start-instances', '--instance-ids', instance_id]
+            elif cloud_provider_id == 2: # Always on, no need to launch. Do Nothing.
+                cmd = ['true']
+            else:
+                raise ValueError(f'Unknown cloud provider id: {cloud_provider_id}')
+            output = run_command(cmd)
+            if output.returncode == 0:
+                break
+            time.sleep(60)
+
         if output.returncode != 0:
             raise RuntimeError(f'Failed to start instance {instance_id} on cloud provider {cloud_provider_id}.')
 

diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
@@ -27,6 +27,8 @@ jobs:
           pip install torch torchvision torchaudio
           pip install -r requirements.txt
           pip install -r requirements-dev.txt
+          sudo apt-get update
+          sudo apt-get install clang-format
       - name: Format with black
         run: |
           # stop the build if format is not correct
@@ -36,3 +38,8 @@ jobs:
         run: |
           echo "Running with" $(pip freeze | grep "pylint")
           python -m pylint --rcfile ./scripts/lint/pylintrc -j $(nproc) ./python/hidet
+      - name: Format with clang-format
+        run: |
+          echo "Running with" $(clang-format --version)
+          find ./src ./include -iname '*.h' -o -iname '*.cpp' \
+          | xargs clang-format -style=file:scripts/lint/.clang-format --dry-run -Werror
diff --git a/.github/workflows/regression.yaml b/.github/workflows/regression.yaml
@@ -34,7 +34,7 @@ jobs:
 
       - name: Run main Python script
         id: run_py_script
-        run: timeout 900 python ./.github/scripts/start_instances.py
+        run: timeout 36000 python ./.github/scripts/start_instances.py
         env:
           # TODO: Allow launching only specified GPU instances
           HW_CONFIG: all

diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
@@ -7,13 +7,15 @@ on:
   pull_request:
   workflow_call:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.sha }}
+  cancel-in-progress: true
+
 jobs:
-  tests:
+
+  build-docs:
     if: github.repository == 'CentML/hidet' || github.repository == 'hidet-org/hidet'
-    concurrency:
-      group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
-      cancel-in-progress: true
-    runs-on: [self-hosted, Linux, X64, gpu]
+    runs-on: arc-runner-set
     container:
       image: nvidia/cuda:11.8.0-devel-ubuntu20.04
       options: --gpus all
@@ -22,10 +24,10 @@ jobs:
         run: |
           apt update && DEBIAN_FRONTEND=noninteractive apt install -y ccache git graphviz
 
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
 
       - name: Set up Python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: "3.8"
 
@@ -34,41 +36,89 @@ jobs:
         with:
           cmake-version: '3.19.x'
 
-      - name: Install dependencies via pip
+      - name: Setup Hidet
+        uses: ./.github/actions/setup-hidet
+
+      - name: List installed packages
         run: |
-          python -m pip install --upgrade pip
-          pip install torch torchvision torchaudio 
-          pip install -r requirements.txt
-          pip install -r requirements-dev.txt
+          pip list
 
-      - name: Build hidet
+      - name: Install docs dependencies
         run: |
-          bash scripts/wheel/build_wheel.sh
-          WHEEL=$(find ./scripts/wheel/built_wheel -maxdepth 1 -name '*.whl')
-          echo "WHEEL_NAME=$WHEEL" >> $GITHUB_ENV
-          echo "Built wheel: ${{ env.WHEEL_NAME }}"
+          pip install -r docs/requirements.txt
+
+      - name: Build docs
+        run: |
+          cd docs; make clean; make html
+
+  list-test-dirs:
+    if: github.repository == 'CentML/hidet' || github.repository == 'hidet-org/hidet'
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.set-matrix.outputs.matrix }}
+    steps:
+
+      - name: Checkout Hidet
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.8"
 
-      - name: Install hidet
+      - id: set-matrix
         run: |
-          pip install --no-deps --force-reinstall ${{ env.WHEEL_NAME }}
+          python .github/scripts/set_test_matrix.py
+
+  run-test:
+    needs: list-test-dirs
+    strategy:
+      fail-fast: false
+      matrix: ${{ fromJSON(needs.list-test-dirs.outputs.matrix) }}
+    runs-on: arc-runner-set
+    container:
+      image: nvidia/cuda:11.8.0-devel-ubuntu20.04
+      options: --gpus all
+    steps:
+      - name: Install dependencies via apt
+        run: |
+          apt update && DEBIAN_FRONTEND=noninteractive apt install -y ccache git graphviz
+
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.8"
+
+      - name: Setup cmake
+        uses: jwlawson/actions-setup-cmake@v2
+        with:
+          cmake-version: '3.19.x'
 
+      - name: Setup Hidet
+        uses: ./.github/actions/setup-hidet
+
       - name: List installed packages
         run: |
           pip list
 
       # Run tests
-
       - name: Run tests
         run: |
           rm -rf ~/.config/hidet
-          python -m pytest -v --durations=20 --clear-cache ./tests
-
-      # Build the docs
-
-      - name: Install docs dependencies
-        run: |
-          pip install -r docs/requirements.txt
+          python -m pytest -v --durations=20 --clear-cache ${{ matrix.path }}
 
-      - name: Build docs
-        run: |
-          cd docs; make clean; make html
+  final-status-indicator:
+    if: ${{ always() }}
+    runs-on: ubuntu-latest
+    name: Pass All Functional Tests
+    needs: [run-test]
+    steps:
+      - run: exit 1
+        if: >-
+          ${{
+               contains(needs.*.result, 'failure')
+            || contains(needs.*.result, 'cancelled')
+            || contains(needs.*.result, 'skipped')
+          }}
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -25,6 +25,14 @@ add_library(hidet_runtime SHARED
         src/hidet/runtime/callbacks.cpp
         src/hidet/runtime/logging.cpp
         src/hidet/runtime/symbols.cpp
+        src/hidet/runtime/llm/tokenizer/decoders.cpp
+        src/hidet/runtime/llm/tokenizer/models.cpp
+        src/hidet/runtime/llm/tokenizer/normalizers.cpp
+        src/hidet/runtime/llm/tokenizer/pattern.cpp
+        src/hidet/runtime/llm/tokenizer/postprocessors.cpp
+        src/hidet/runtime/llm/tokenizer/pretokenizers.cpp
+        src/hidet/runtime/llm/tokenizer/tokenizer.cpp
+        src/hidet/runtime/llm/tokenizer/utf8.cpp
         )
 target_include_directories(hidet_runtime PRIVATE ${CMAKE_SOURCE_DIR}/include /usr/include)
 set_target_properties(hidet_runtime PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)

diff --git a/README.md b/README.md
@@ -39,7 +39,6 @@ x = torch.rand(1, 3, 224, 224).cuda()
 # Optional: set optimization options (see our documentation for more details)
 #   import hidet 
 #   hidet.torch.dynamo_config.search_space(2)  # tune each tunable operator
-#   hidet.torch.dynamo_config.use_fp16()       # use float16 for acceleration
 model_opt = torch.compile(model, backend='hidet')  
 
 # Run the optimized model

diff --git a/apps/compile_server/README.md b/apps/compile_server/README.md
@@ -30,5 +30,5 @@ hidet.option.compile_server.password('password')
 hidet.option.compile_server.repo('https://github.com/hidet-org/hidet', 'main')  
 
 # enable the compile server
-hidet.option.compile_server.enable()        
+hidet.option.compile_server.enable()
 ```
diff --git a/gallery/developer-guides/add-torch-operator-mapping.py b/gallery/developer-guides/add-torch-operator-mapping.py
@@ -53,7 +53,7 @@
 from torch import nn
 
 # hidet employs an interpreter to convert a fx.Graph to FlowGraph
-from hidet.graph.frontend.torch.interpreter import Registry
+from hidet.graph.frontend.torch.registry import Registry
 
 # the following three modules register the conversion rules
 import hidet.graph.frontend.torch.register_functions
@@ -91,7 +91,7 @@ def forward(self, x):
 
 def run_model():
     model = Model().cuda()
-    model_opt = torch.compile(model, backend='hidet')
+    model_opt = torch.compile(model, backend='hidet', mode='max-autotune')
 
     x = torch.randn(10, 10, device='cuda')
     y1 = model_opt(x)
@@ -112,7 +112,7 @@ def run_model():
 from typing import Optional
 from hidet import ops
 from hidet import Tensor
-from hidet.graph.frontend.torch.interpreter import (
+from hidet.graph.frontend.torch.registry import (
     register_function,
     register_module,
     register_method,

diff --git a/gallery/getting-started/quick-start.py b/gallery/getting-started/quick-start.py
@@ -37,11 +37,8 @@
 model = torch.hub.load('pytorch/vision:v0.9.0', 'resnet18', pretrained=True, verbose=False)
 model = model.cuda().eval()
 
-# uncomment the following line to enable kernel tuning
-# hidet.torch.dynamo_config.search_space(2)
-
 # optimize the model with 'hidet' backend
-model_opt = torch.compile(model, backend='hidet')
+model_opt = torch.compile(model, backend='hidet', mode='max-autotune')
 
 # run the optimized model
 y1 = model_opt(x)