diff --git a/.github/workflows/canary.yml b/.github/workflows/canary.yml
index 5b053ee21fd..f39b16d050e 100644
--- a/.github/workflows/canary.yml
+++ b/.github/workflows/canary.yml
@@ -55,7 +55,7 @@ jobs:
- name: Checkout Oneflow-Inc/oneflow
if: ${{ github.event.inputs.oneflow-ref == '' }}
uses: actions/checkout@v2
- - uses: Oneflow-Inc/get-oneflow@support-cuda-1106
+ - uses: Oneflow-Inc/get-oneflow@support-iree-ci
name: Build manylinux
id: build-cuda
with:
diff --git a/.github/workflows/on_merge.yml b/.github/workflows/on_merge.yml
index f327d68d0d3..6085a59da77 100644
--- a/.github/workflows/on_merge.yml
+++ b/.github/workflows/on_merge.yml
@@ -15,6 +15,6 @@ jobs:
if: github.event.pull_request.merged == true
runs-on: ubuntu-latest
steps:
- - uses: Oneflow-Inc/get-oneflow/update-benchmark-history@support-cuda-1106
+ - uses: Oneflow-Inc/get-oneflow/update-benchmark-history@support-iree-ci
name: Update benchmark history
timeout-minutes: 10
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index a97e72de34d..1e4112a28ba 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -33,7 +33,7 @@ jobs:
with:
ref: ${{ github.event.pull_request.head.sha }}
repository: ${{github.event.pull_request.head.repo.full_name}}
- - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@support-cuda-1106
+ - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@support-iree-ci
name: find cache
id: find-cache
timeout-minutes: 5
@@ -74,7 +74,7 @@ jobs:
python3 -m pip install -U pip setuptools wheel --user
python3 -m pip install oss2 --user
- uses: actions/checkout@v2
- - uses: Oneflow-Inc/get-oneflow@support-cuda-1106
+ - uses: Oneflow-Inc/get-oneflow@support-iree-ci
name: Build ${{ matrix.entry }}
if: ${{ matrix.entry !='cpu' }}
with:
@@ -98,7 +98,7 @@ jobs:
3.8
3.9
3.10
- - uses: Oneflow-Inc/get-oneflow@support-cuda-1106
+ - uses: Oneflow-Inc/get-oneflow@support-iree-ci
name: Build ${{ matrix.entry }}
if: ${{ matrix.entry =='cpu' }}
with:
diff --git a/.github/workflows/simple.yml b/.github/workflows/simple.yml
index 2f22f7b74d5..1b2064f1a61 100644
--- a/.github/workflows/simple.yml
+++ b/.github/workflows/simple.yml
@@ -245,7 +245,7 @@ jobs:
repository: Oneflow-Inc/conda-env
ref: 30a7f00eb48ee9009d85a848e720823e5054c66b
path: conda-env
- - uses: Oneflow-Inc/get-oneflow@support-cuda-1106
+ - uses: Oneflow-Inc/get-oneflow@support-iree-ci
name: Build with gcc7
if: ${{ matrix.build-type == 'gcc7'}}
with:
@@ -254,7 +254,7 @@ jobs:
oneflow-build-env: conda
conda-env-file: conda-env/dev/gcc7/environment-v2.yml
conda-env-name: oneflow-dev-gcc7-v2
- - uses: Oneflow-Inc/get-oneflow@support-cuda-1106
+ - uses: Oneflow-Inc/get-oneflow@support-iree-ci
name: Build with clang10
if: ${{ matrix.build-type == 'clang10'}}
with:
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 9d465fa372b..c0f79e273ab 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -16,6 +16,8 @@ env:
FLOW_VISION_COMMIT: ca8ebc663b58667cf8cd1b6ef0c861522780b7bb
LIBAI_SRC: libai
LIBAI_COMMIT: 7d31d9781e5f2d559dc0820f599e0bed798488ca
+ ONEFLOW_IREE_SRC: oneflow_iree
+ ONEFLOW_IREE_COMMIT: 4322cbad2545877b1664aa8e0f17a17f6b5f687c
TEST_WITH_TORCH_IMG_TAG: registry.cn-beijing.aliyuncs.com/oneflow/test-with-pytorch-1.10.0-cuda11.3-cudnn8-runtime:afaf913e02a4ba02db92260daee22f99121cef62
MLIR_DOCKER_ARGS: "-e ONEFLOW_MLIR_ENABLE_ROUND_TRIP=1 -e ONEFLOW_MLIR_PREFER_NHWC=0 -e ONEFLOW_MLIR_ENABLE_INFERENCE_OPTIMIZATION=1"
@@ -25,7 +27,7 @@ jobs:
runs-on: ubuntu-latest
if: github.event.pull_request.draft == false && github.base_ref == 'master' && contains(github.event.pull_request.requested_reviewers.*.login, 'oneflow-ci-bot')
steps:
- - uses: Oneflow-Inc/get-oneflow/priority-pr@support-cuda-1106
+ - uses: Oneflow-Inc/get-oneflow/priority-pr@support-iree-ci
name: Check priority PR closed
id: save-cache
timeout-minutes: 5
@@ -159,7 +161,7 @@ jobs:
fi
echo "is_secrets_accessible=1" >> $GITHUB_ENV
- name: Wait for GPU slot
- uses: Oneflow-Inc/get-oneflow/wait-for-gpu@support-cuda-1106
+ uses: Oneflow-Inc/get-oneflow/wait-for-gpu@support-iree-ci
if: env.is_secrets_accessible == '1'
timeout-minutes: 90
continue-on-error: true
@@ -183,7 +185,7 @@ jobs:
with:
ref: ${{ github.event.pull_request.head.sha }}
repository: ${{github.event.pull_request.head.repo.full_name}}
- - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@support-cuda-1106
+ - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/build@support-iree-ci
name: find cache
id: find-cache
timeout-minutes: 5
@@ -230,7 +232,7 @@ jobs:
with:
ref: ${{ github.event.pull_request.head.sha }}
repository: ${{github.event.pull_request.head.repo.full_name}}
- - uses: Oneflow-Inc/get-oneflow/cache-complete@support-cuda-1106
+ - uses: Oneflow-Inc/get-oneflow/cache-complete@support-iree-ci
name: Save cache if successful
id: save-cache
timeout-minutes: 5
@@ -266,7 +268,7 @@ jobs:
python-versions: |
3.6
3.7
- - uses: Oneflow-Inc/get-oneflow@support-cuda-1106
+ - uses: Oneflow-Inc/get-oneflow@support-iree-ci
name: Build manylinux ${{ matrix.entry }}
id: build-cuda
if: ${{ matrix.entry =='cu102' && !matrix.cache-hit }}
@@ -286,7 +288,7 @@ jobs:
clean-ccache: ${{ contains(github.event.pull_request.labels.*.name, 'need-clean-ccache') }}
python-versions: |
3.7
- - uses: Oneflow-Inc/get-oneflow@support-cuda-1106
+ - uses: Oneflow-Inc/get-oneflow@support-iree-ci
name: Build ${{ matrix.entry }}
if: ${{ matrix.entry == 'llvm13' && !matrix.cache-hit }}
with:
@@ -325,7 +327,7 @@ jobs:
})
- name: Upload packed liboneflow
if: ${{ !fromJson(matrix.cache-hit) && matrix.entry != 'llvm13' && matrix.entry != 'cu102_xla' }}
- uses: Oneflow-Inc/get-oneflow/digest/upload@support-cuda-1106
+ uses: Oneflow-Inc/get-oneflow/digest/upload@support-iree-ci
timeout-minutes: 10
with:
digest: ${{ steps.save-cache.outputs.build-digest }}
@@ -336,7 +338,7 @@ jobs:
dst-dir: cpack
- name: Upload whl
if: ${{ !fromJson(matrix.cache-hit) && matrix.entry != 'llvm13' && matrix.entry != 'cu102_xla' }}
- uses: Oneflow-Inc/get-oneflow/digest/upload@support-cuda-1106
+ uses: Oneflow-Inc/get-oneflow/digest/upload@support-iree-ci
timeout-minutes: 10
with:
digest: ${{ steps.save-cache.outputs.build-digest }}
@@ -361,7 +363,7 @@ jobs:
with:
ref: ${{ github.event.pull_request.head.sha }}
repository: ${{github.event.pull_request.head.repo.full_name}}
- - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@support-cuda-1106
+ - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@support-iree-ci
name: find cache
id: find-cache
timeout-minutes: 5
@@ -392,7 +394,7 @@ jobs:
with:
ref: ${{ github.event.pull_request.head.sha }}
repository: ${{github.event.pull_request.head.repo.full_name}}
- - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@support-cuda-1106
+ - uses: Oneflow-Inc/get-oneflow/cache-complete/matrix/test@support-iree-ci
name: find cache
id: find-cache
timeout-minutes: 5
@@ -456,12 +458,20 @@ jobs:
# please use a commit here
ref: ${{ env.LIBAI_COMMIT}}
path: ${{ env.LIBAI_SRC}}
+ - name: Checkout Oneflow-Inc/oneflow_iree
+ if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }}
+ uses: actions/checkout@v2
+ with:
+ repository: Oneflow-Inc/oneflow_iree
+ # please use a commit here
+ ref: ${{ env.ONEFLOW_IREE_COMMIT}}
+ path: ${{ env.ONEFLOW_IREE_SRC}}
- name: Remove container
timeout-minutes: 45
if: ${{ contains(matrix.runs-on, 'self-hosted') }}
run: |
docker rm -f ${{ env.TEST_CONTAINER_NAME }} || true
- - uses: Oneflow-Inc/get-oneflow/cache-complete@support-cuda-1106
+ - uses: Oneflow-Inc/get-oneflow/cache-complete@support-iree-ci
name: Save cache if successful
id: save-cache
timeout-minutes: 5
@@ -477,7 +487,7 @@ jobs:
exit 1
- name: Download wheel and packed liboneflow
if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }}
- uses: Oneflow-Inc/get-oneflow/digest/download@support-cuda-1106
+ uses: Oneflow-Inc/get-oneflow/digest/download@support-iree-ci
id: download-digest
timeout-minutes: 10
with:
@@ -487,7 +497,7 @@ jobs:
ssh-tank-path: ${{ env.SSH_TANK_PATH }}
- name: Get primary node
if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }}
- uses: Oneflow-Inc/get-oneflow/master-address@support-cuda-1106
+ uses: Oneflow-Inc/get-oneflow/master-address@support-iree-ci
id: get-primary-node
with:
rank: ${{ matrix.rank }}
@@ -560,6 +570,7 @@ jobs:
docker exec ${TEST_CONTAINER_NAME} python3 -m pip install -e ${{ env.FLOW_VISION_SRC}}
docker exec ${TEST_CONTAINER_NAME} python3 -m pip install pybind11 --user
docker exec ${TEST_CONTAINER_NAME} python3 -m pip install -e ${{ env.LIBAI_SRC}}
+ docker exec ${TEST_CONTAINER_NAME} python3 -m pip install -e ${{ env.ONEFLOW_IREE_SRC}}
- name: Module API test (distributed)
timeout-minutes: 90
if: ${{ !fromJson(matrix.cache-hit) && matrix.test-type == 'module' && matrix.device == 'cuda' && fromJson(matrix.is-distributed) }}
@@ -649,12 +660,20 @@ jobs:
# please use a commit here
ref: ${{ env.LIBAI_COMMIT}}
path: ${{ env.LIBAI_SRC}}
+ - name: Checkout Oneflow-Inc/oneflow_iree
+ if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }}
+ uses: actions/checkout@v2
+ with:
+ repository: Oneflow-Inc/oneflow_iree
+ # please use a commit here
+ ref: ${{ env.ONEFLOW_IREE_COMMIT}}
+ path: ${{ env.ONEFLOW_IREE_SRC}}
- name: Remove container
timeout-minutes: 45
if: ${{ contains(matrix.runs-on, 'self-hosted') }}
run: |
docker rm -f ${{ env.TEST_CONTAINER_NAME }} || true
- - uses: Oneflow-Inc/get-oneflow/cache-complete@support-cuda-1106
+ - uses: Oneflow-Inc/get-oneflow/cache-complete@support-iree-ci
name: Save cache if successful
id: save-cache
timeout-minutes: 5
@@ -670,7 +689,7 @@ jobs:
exit 1
- name: Download wheel and packed liboneflow
if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }}
- uses: Oneflow-Inc/get-oneflow/digest/download@support-cuda-1106
+ uses: Oneflow-Inc/get-oneflow/digest/download@support-iree-ci
id: download-digest
timeout-minutes: 10
with:
@@ -782,6 +801,7 @@ jobs:
docker exec ${TEST_CONTAINER_NAME} python3 -m pip install -e ${{ env.FLOW_VISION_SRC}}
docker exec ${TEST_CONTAINER_NAME} python3 -m pip install pybind11 --user
docker exec ${TEST_CONTAINER_NAME} python3 -m pip install -e ${{ env.LIBAI_SRC}}
+ docker exec ${TEST_CONTAINER_NAME} python3 -m pip install -e ${{ env.ONEFLOW_IREE_SRC}}
- name: Run OneFlow doctor
if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') }}
run: |
@@ -866,7 +886,7 @@ jobs:
body: "\n Speed stats:
\n\n ``` \n${{ steps.speed.outputs.stats }}\n ``` \n\n ".replace(/\\n/g, '\n')
})
- name: Module API test
- timeout-minutes: 45
+ timeout-minutes: 50
if: ${{ !fromJson(matrix.cache-hit) && matrix.test-type == 'module' && !fromJson(matrix.is-distributed) }}
run: |
docker exec -e ONEFLOW_TEST_DIR=$PWD/python/oneflow/test/modules ${{ env.TEST_CONTAINER_NAME }} bash ci/test/generic_test_multi_client.sh
@@ -884,6 +904,11 @@ jobs:
docker exec -e ONEFLOW_TEST_DEVICE_NUM=4 -w $PWD/${{ env.LIBAI_SRC }} ${{ env.TEST_CONTAINER_NAME }} python3 -m oneflow.distributed.launch --nproc_per_node 4 -m unittest -f tests/models/test_gpt.py
docker exec -e ONEFLOW_TEST_DEVICE_NUM=4 -w $PWD/${{ env.LIBAI_SRC }} ${{ env.TEST_CONTAINER_NAME }} python3 -m oneflow.distributed.launch --nproc_per_node 4 -m unittest -f tests/models/test_t5.py
docker exec -e ONEFLOW_TEST_DEVICE_NUM=4 -w $PWD/${{ env.LIBAI_SRC }} ${{ env.TEST_CONTAINER_NAME }} python3 -m oneflow.distributed.launch --nproc_per_node 4 -m unittest -f tests/models/test_vit.py
+ - name: oneflow_iree test
+ timeout-minutes: 45
+ if: ${{ !fromJson(matrix.cache-hit) && matrix.test-type == 'misc' }}
+ run: |
+ docker exec -w $PWD/${{ env.ONEFLOW_IREE_SRC }} ${{ env.TEST_CONTAINER_NAME }} python3 -m pytest examples
- name: Expensive tests (models, cases require exclusive access to GPU)
timeout-minutes: 45
if: ${{ !fromJson(matrix.cache-hit) && (matrix.test-type == 'speed-test' || (matrix.test-type == 'misc' && matrix.device == 'cpu')) && !fromJson(matrix.is-distributed) }}
@@ -909,7 +934,7 @@ jobs:
- name: Benchmark Test
timeout-minutes: 100
if: ${{ !fromJson(matrix.cache-hit) && matrix.test-type == 'benchmark' && matrix.device == 'cuda' }}
- uses: Oneflow-Inc/get-oneflow/pytest-benchmark@support-cuda-1106
+ uses: Oneflow-Inc/get-oneflow/pytest-benchmark@support-iree-ci
with:
collect-path: ${{ env.FLOW_VISION_SRC }}/benchmark
container-name: ${{ env.TEST_CONTAINER_NAME }}
@@ -962,7 +987,7 @@ jobs:
ref: ${{ github.event.pull_request.head.sha }}
repository: ${{github.event.pull_request.head.repo.full_name}}
fetch-depth: 0
- - uses: Oneflow-Inc/get-oneflow/cache-complete@support-cuda-1106
+ - uses: Oneflow-Inc/get-oneflow/cache-complete@support-iree-ci
name: Save cache if successful
id: save-cache
timeout-minutes: 5
diff --git a/cmake/oneflow.cmake b/cmake/oneflow.cmake
index b93a12e55fe..0176468ccd6 100644
--- a/cmake/oneflow.cmake
+++ b/cmake/oneflow.cmake
@@ -250,18 +250,21 @@ if("${LLVM_MONO_REPO_URL}" STREQUAL
"https://github.com/llvm/llvm-project/archive/7eaa84eac3ba935d13f4267d3d533a6c3e1283ed.zip"
OR "${LLVM_MONO_REPO_URL}" STREQUAL
"https://github.com/llvm/llvm-project/archive/35e60f5de180aea55ed478298f4b40f04dcc57d1.zip"
+ OR "${LLVM_MONO_REPO_URL}" STREQUAL
+ "https://github.com/llvm/llvm-project/archive/6a9bbd9f20dcd700e28738788bb63a160c6c088c.zip"
OR "${LLVM_MONO_REPO_MD5}" STREQUAL "f2f17229cf21049663b8ef4f2b6b8062"
OR "${LLVM_MONO_REPO_MD5}" STREQUAL "6b7c6506d5922de9632c8ff012b2f945"
OR "${LLVM_MONO_REPO_MD5}" STREQUAL "e0ea669a9f0872d35bffda5ec6c5ac6f"
+ OR "${LLVM_MONO_REPO_MD5}" STREQUAL "241a333828bba1efa35aff4c4fc2ce87"
OR "${LLVM_MONO_REPO_MD5}" STREQUAL "075fbfdf06cb3f02373ea44971af7b03")
unset(LLVM_MONO_REPO_URL CACHE)
unset(LLVM_MONO_REPO_MD5 CACHE)
endif()
set(LLVM_MONO_REPO_URL
- "https://github.com/llvm/llvm-project/archive/6a9bbd9f20dcd700e28738788bb63a160c6c088c.zip"
+ "https://github.com/llvm/llvm-project/archive/32805e60c9de1f82887cd2af30d247dcabd2e1d3.zip"
CACHE STRING "")
use_mirror(VARIABLE LLVM_MONO_REPO_URL URL ${LLVM_MONO_REPO_URL})
-set(LLVM_MONO_REPO_MD5 "241a333828bba1efa35aff4c4fc2ce87" CACHE STRING "")
+set(LLVM_MONO_REPO_MD5 "e412dc61159b5e929b0c94e44b11feb2" CACHE STRING "")
set(ONEFLOW_BUILD_ROOT_DIR "${PROJECT_BINARY_DIR}")
add_subdirectory(${PROJECT_SOURCE_DIR}/oneflow/ir)
if(WITH_MLIR)
diff --git a/oneflow/ir/include/OneFlow/Conversion/SCFToGPU.h b/oneflow/ir/include/OneFlow/Conversion/SCFToGPU.h
deleted file mode 100644
index e6c70591035..00000000000
--- a/oneflow/ir/include/OneFlow/Conversion/SCFToGPU.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-#ifndef ONEFLOW_IR_INCLUDE_ONEFLOW_CONVERSION_SCFTOGPU_H_
-#define ONEFLOW_IR_INCLUDE_ONEFLOW_CONVERSION_SCFTOGPU_H_
-
-#include "mlir/Pass/Pass.h"
-
-namespace mlir {
-
-namespace oneflow {
-
-std::unique_ptr createMapSCFToGPUPass();
-
-} // namespace oneflow
-
-} // namespace mlir
-
-#endif // ONEFLOW_IR_INCLUDE_ONEFLOW_CONVERSION_SCFTOGPU_H_
diff --git a/oneflow/ir/include/OneFlow/OneFlowDialect.td b/oneflow/ir/include/OneFlow/OneFlowDialect.td
index 10bfca306c0..94e4d31ac5b 100644
--- a/oneflow/ir/include/OneFlow/OneFlowDialect.td
+++ b/oneflow/ir/include/OneFlow/OneFlowDialect.td
@@ -14,6 +14,7 @@ def OneFlow_Dialect : Dialect {
"func::FuncDialect"
];
let hasConstantMaterializer = 1;
+ let useDefaultTypePrinterParser = 1;
}
#endif // ONEFLOW_DIALECT
diff --git a/oneflow/ir/include/OneFlow/OneFlowOps.td b/oneflow/ir/include/OneFlow/OneFlowOps.td
index 405ff4499e0..c22a87143b3 100644
--- a/oneflow/ir/include/OneFlow/OneFlowOps.td
+++ b/oneflow/ir/include/OneFlow/OneFlowOps.td
@@ -288,12 +288,6 @@ def LowerOneFlowToTosaPass : Pass<"lower-oneflow-to-tosa", "ModuleOp"> {
];
}
-def MapSCFToGPUPass : Pass<"gpu-greedy-parallel-loop-mapping", "ModuleOp"> {
- let summary = "Greedily maps all parallel loops to gpu hardware ids";
- let constructor = "mlir::oneflow::createMapSCFToGPUPass()";
- let dependentDialects = ["scf::SCFDialect"];
-}
-
def BufferHostRegisterPass : Pass<"buffer-host-register", "func::FuncOp"> {
let summary = "";
let constructor = "mlir::oneflow::createBufferHostRegisterPass()";
diff --git a/oneflow/ir/include/OneFlow/OneFlowPatterns.td b/oneflow/ir/include/OneFlow/OneFlowPatterns.td
index 5ea5d776f36..097d76c5fbb 100644
--- a/oneflow/ir/include/OneFlow/OneFlowPatterns.td
+++ b/oneflow/ir/include/OneFlow/OneFlowPatterns.td
@@ -5,7 +5,7 @@
include "mlir/IR/PatternBase.td"
include "OneFlow/OneFlowOps.td"
include "mlir/Dialect/MemRef/IR/MemRefOps.td"
-include "mlir/Dialect/GPU/GPUOps.td"
+include "mlir/Dialect/GPU/IR/GPUOps.td"
def IsNotNestedInJit: ConstraintgetParentOfType<::mlir::oneflow::Job>())">, "">;
def IsScalarTensor: Constraint, "">;
diff --git a/oneflow/ir/include/OneFlow/Passes.h b/oneflow/ir/include/OneFlow/Passes.h
index 59c05c42d34..7c46d8f3e59 100644
--- a/oneflow/ir/include/OneFlow/Passes.h
+++ b/oneflow/ir/include/OneFlow/Passes.h
@@ -19,13 +19,12 @@ limitations under the License.
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/Tosa/IR/TosaOps.h"
#include "mlir/Dialect/SCF/SCF.h"
-#include "mlir/Dialect/GPU/GPUDialect.h"
+#include "mlir/Dialect/GPU/IR/GPUDialect.h"
#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
#include "mlir/Dialect/Linalg/IR/Linalg.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "OneFlow/Conversion/OneFlowToTosa.h"
-#include "OneFlow/Conversion/SCFToGPU.h"
#include "OneFlow/Transform/BufferHostRegister.h"
#include "OneFlow/Transform/ConvertInferenceOp.h"
#include "OneFlow/Transform/OutlineAndFuse.h"
diff --git a/oneflow/ir/install-llvm.cmake b/oneflow/ir/install-llvm.cmake
index e01bba1b36d..d25b1911634 100644
--- a/oneflow/ir/install-llvm.cmake
+++ b/oneflow/ir/install-llvm.cmake
@@ -10,6 +10,7 @@ if(NOT llvm_monorepo_POPULATED)
execute_process(
COMMAND
"${CMAKE_COMMAND}" ${llvm_monorepo_SOURCE_DIR}/llvm
+ -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} # this is required in newer version of LLVM
-DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER}
-DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER}
-DCMAKE_CUDA_COMPILER_LAUNCHER=${CMAKE_CUDA_COMPILER_LAUNCHER}
diff --git a/oneflow/ir/lib/OneFlow/CMakeLists.txt b/oneflow/ir/lib/OneFlow/CMakeLists.txt
index cdc4ccbb55b..b8d0ce21d1f 100644
--- a/oneflow/ir/lib/OneFlow/CMakeLists.txt
+++ b/oneflow/ir/lib/OneFlow/CMakeLists.txt
@@ -1,7 +1,7 @@
get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
message(STATUS "MLIR_DIALECT_LIBS: ${dialect_libs}")
if(WITH_MLIR_CUDA_CODEGEN)
- set(MLIR_GPU_LIBS MLIRSCFToGPU MLIRGPUToNVVMTransforms MLIRNVVMToLLVMIRTranslation)
+ set(MLIR_GPU_LIBS MLIRGPUToNVVMTransforms MLIRNVVMToLLVMIRTranslation)
endif(WITH_MLIR_CUDA_CODEGEN)
set(ONEFLOW_OP_GROUPS
@@ -24,7 +24,6 @@ oneflow_add_mlir_dialect_library(
OneFlowSupport.cpp
OneFlowOpFolders.cpp
Conversion/OneFlowToTosa.cpp
- Conversion/SCFToGPU.cpp
Conversion/PTXToCubin.cpp
Transform/BufferHostRegister.cpp
Transform/OutlineAndFuse.cpp
@@ -43,6 +42,7 @@ oneflow_add_mlir_dialect_library(
MLIRTosaToLinalg
MLIRMemRefToLLVM
MLIRLinalgToLLVM
+ MLIRSCFToGPU
MLIRReconcileUnrealizedCasts
${MLIR_GPU_LIBS}
MLIRIR
diff --git a/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp b/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp
index ec92bb352ec..912ac6c3e0b 100644
--- a/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp
+++ b/oneflow/ir/lib/OneFlow/Conversion/OneFlowToTosa.cpp
@@ -144,7 +144,7 @@ struct InputOpLowering final : public OpConversionPattern {
// TODO: more choices to passing data between tosa and oneflow
const auto newValues = op.input();
const auto is_block_arg = newValues.dyn_cast() != nullptr;
- if (!is_block_arg) op->emitError("input is not block arg");
+ if (!is_block_arg) { return op->emitError("input is not block arg"); }
rewriter.replaceOp(op, newValues);
return success();
}
@@ -168,10 +168,10 @@ struct VariableOpLowering final : public OpConversionPattern {
LogicalResult matchAndRewrite(VariableOp op, OpAdaptor adaptor,
ConversionPatternRewriter& rewriter) const override {
const auto mgr = ::oneflow::Global<::oneflow::VariableTensorMgr>::Get();
- if (!mgr) op->emitError("global variable tensor manager miss");
+ if (!mgr) { return op->emitError("global variable tensor manager miss"); }
const auto tensor = mgr->Get(op.op_name().str());
- if (!tensor) op->emitError("tensor is null");
+ if (!tensor) { return op->emitError("tensor is null"); }
const auto value = support::TensorToDenseElementsAttr(tensor, rewriter.getContext());
const auto output = op.output().getType();
@@ -204,7 +204,7 @@ struct VariableOpToConstLowering final : public OpConversionPattern
rewriter.replaceOpWithNewOp(op, output, value);
} else {
- op->emitError(
+ return op->emitError(
"OneFlow variable op lower to TOSA const op only support integer and float value now");
}
@@ -327,7 +327,7 @@ struct MaxPool2DOpLowering final : public OpConversionPattern {
return RankedTensorType::get(ranked_type, shape_type.getElementType());
};
// TODO: support return indice
- if (op.return_indices()) op->emitError("not support return indices now");
+ if (op.return_indices()) { return op->emitError("not support return indices now"); }
auto stride_pairs = get_pair_int64_from_array(op.stride());
auto kernel_pairs = get_pair_int64_from_array(op.kernel_size());
auto pad_pairs = get_pair_int64_from_array(op.padding());
diff --git a/oneflow/ir/lib/OneFlow/Conversion/PTXToCubin.cpp b/oneflow/ir/lib/OneFlow/Conversion/PTXToCubin.cpp
index 35ea2bd8b0e..8c22c3055de 100644
--- a/oneflow/ir/lib/OneFlow/Conversion/PTXToCubin.cpp
+++ b/oneflow/ir/lib/OneFlow/Conversion/PTXToCubin.cpp
@@ -17,7 +17,7 @@ limitations under the License.
This file is ported from mlir/lib/Dialect/GPU/Transforms/SerializeToCubin.cpp
*/
-#include "mlir/Dialect/GPU/Passes.h"
+#include "mlir/Dialect/GPU/Transforms/Passes.h"
#ifdef WITH_MLIR_CUDA_CODEGEN
#include "mlir/Pass/Pass.h"
diff --git a/oneflow/ir/lib/OneFlow/Conversion/SCFToGPU.cpp b/oneflow/ir/lib/OneFlow/Conversion/SCFToGPU.cpp
deleted file mode 100644
index 18cb2b4bd74..00000000000
--- a/oneflow/ir/lib/OneFlow/Conversion/SCFToGPU.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-#include "OneFlow/OneFlowOps.h"
-#include
-#include
-#include "OneFlow/OneFlowDialect.h"
-#include "OneFlow/Passes.h"
-#include "llvm/ADT/STLExtras.h"
-#include "mlir/Conversion/LinalgToLLVM/LinalgToLLVM.h"
-#include "mlir/Conversion/MemRefToLLVM/MemRefToLLVM.h"
-#include "mlir/Conversion/FuncToLLVM/ConvertFuncToLLVMPass.h"
-#include "mlir/Conversion/TosaToLinalg/TosaToLinalg.h"
-#include "mlir/Dialect/Affine/IR/AffineOps.h"
-#include "mlir/Dialect/Linalg/Passes.h"
-#include "mlir/Dialect/MemRef/IR/MemRef.h"
-#include "mlir/Dialect/SCF/Passes.h"
-#include "mlir/Dialect/Func/IR/FuncOps.h"
-#include "mlir/Dialect/Func/Transforms/Passes.h"
-#include "mlir/Dialect/Tensor/Transforms/Passes.h"
-#include "mlir/Dialect/Tosa/IR/TosaOps.h"
-#include "mlir/IR/BuiltinAttributes.h"
-#include "mlir/IR/OpImplementation.h"
-
-#include "mlir/Pass/Pass.h"
-#include "mlir/Pass/PassManager.h"
-#include "mlir/Support/LogicalResult.h"
-#include "mlir/Transforms/DialectConversion.h"
-#include "mlir/Transforms/Passes.h"
-
-#include "mlir/Dialect/GPU/ParallelLoopMapper.h"
-#include "mlir/Pass/Pass.h"
-
-using namespace mlir;
-
-namespace {
-/// Simple pass for testing the mapping of parallel loops to hardware ids using
-/// a greedy mapping strategy.
-class GpuGreedyParallelLoopMappingPass
- : public MapSCFToGPUPassBase {
- void runOnOperation() override {
- Operation* op = getOperation();
- for (Region& region : op->getRegions()) greedilyMapParallelSCFToGPU(region);
- }
-};
-} // namespace
-
-namespace mlir {
-
-namespace oneflow {
-
-std::unique_ptr createMapSCFToGPUPass() {
- return std::make_unique();
-}
-
-} // namespace oneflow
-
-} // namespace mlir
diff --git a/oneflow/ir/lib/OneFlow/Passes.cpp b/oneflow/ir/lib/OneFlow/Passes.cpp
index 612e0a79a9a..b0f8c71bf57 100644
--- a/oneflow/ir/lib/OneFlow/Passes.cpp
+++ b/oneflow/ir/lib/OneFlow/Passes.cpp
@@ -62,7 +62,7 @@ limitations under the License.
#include "mlir/Conversion/AffineToStandard/AffineToStandard.h"
#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
#include "mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h"
-#include "mlir/Dialect/GPU/Passes.h"
+#include "mlir/Dialect/GPU/Transforms/Passes.h"
#include "mlir/Conversion/SCFToGPU/SCFToGPUPass.h"
#endif // WITH_MLIR_CUDA_CODEGEN
@@ -769,9 +769,10 @@ LogicalResult LowerModuleToCUDALLVM(mlir::MLIRContext* context, ModuleOp module)
AddLowerToLinalgMemRefPasses(pm);
pm.addNestedPass(
createConvertLinalgToParallelLoopsPass()); // convert-linalg-to-parallel-loops
- pm.addPass(createMapSCFToGPUPass()); // gpu-greedy-parallel-loop-mapping
- pm.addPass(createParallelLoopToGpuPass()); // convert-parallel-loops-to-gpu
- pm.addPass(createGpuKernelOutliningPass()); // gpu-kernel-outlining
+ pm.addNestedPass(createGpuMapParallelLoopsPass()); // gpu-map-parallel-loops
+ pm.addPass(createParallelLoopToGpuPass()); // convert-parallel-loops-to-gpu
+ pm.addPass(createGpuLauchSinkIndexComputationsPass());
+ pm.addPass(createGpuKernelOutliningPass()); // gpu-kernel-outlining
pm.addNestedPass(createBufferHostRegisterPass()); // buffer-host-register
pm.addPass(createCanonicalizerPass()); // canonicalize
// -pass-pipeline='gpu.module([PASS1][PASS2]...)'
@@ -781,6 +782,7 @@ LogicalResult LowerModuleToCUDALLVM(mlir::MLIRContext* context, ModuleOp module)
pm.addNestedPass(createSerializeToCubinPass()); // out-of-tree-gpu-to-cubin
pm.addNestedPass(createGpuCopyArgPass()); // buffer-host-register
pm.addPass(createGpuToLLVMConversionPass());
+ pm.addPass(createReconcileUnrealizedCastsPass()); // reconcile-unrealized-casts
if (enable_ir_printing) pm.enableIRPrinting();
return pm.run(module);
}
diff --git a/oneflow/ir/oneflow-extension/CMakeLists.txt b/oneflow/ir/oneflow-extension/CMakeLists.txt
index 8a0b21aa8f3..e7e2f1fbd18 100644
--- a/oneflow/ir/oneflow-extension/CMakeLists.txt
+++ b/oneflow/ir/oneflow-extension/CMakeLists.txt
@@ -11,7 +11,7 @@ oneflow_add_mlir_library(
MLIRIR
MLIRParser
MLIRPass
- MLIRSPIRV
+ MLIRSPIRVDialect
MLIRTranslateLib
MLIRSupport
MLIROneFlow
diff --git a/oneflow/ir/oneflow-opt/oneflow-opt.cpp b/oneflow/ir/oneflow-opt/oneflow-opt.cpp
index 0496d741603..f8b35f58d59 100644
--- a/oneflow/ir/oneflow-opt/oneflow-opt.cpp
+++ b/oneflow/ir/oneflow-opt/oneflow-opt.cpp
@@ -47,7 +47,7 @@ int32_t main(int32_t argc, char** argv) {
mlir::registerAllPasses();
mlir::registerTestOneFlowTraitsPass();
mlir::registerLowerOneFlowToTosaPassPass();
- mlir::registerMapSCFToGPUPassPass();
+ mlir::registerGpuMapParallelLoopsPassPass();
mlir::registerBufferHostRegisterPassPass();
mlir::registerGpuCopyArgPassPass();
#ifdef WITH_MLIR_CUDA_CODEGEN
diff --git a/oneflow/ir/oneflow-runner/CMakeLists.txt b/oneflow/ir/oneflow-runner/CMakeLists.txt
index d594362192b..9c5a601af5f 100644
--- a/oneflow/ir/oneflow-runner/CMakeLists.txt
+++ b/oneflow/ir/oneflow-runner/CMakeLists.txt
@@ -16,7 +16,7 @@ target_link_libraries(
MLIRExecutionEngine
MLIRIR
MLIRJitRunner
- MLIRLLVMIR
+ MLIRLLVMIRTransforms
MLIRLLVMToLLVMIRTranslation
MLIRToLLVMIRTranslationRegistration
MLIRParser
diff --git a/oneflow/ir/oneflow-translate/lib/OneFlow/CMakeLists.txt b/oneflow/ir/oneflow-translate/lib/OneFlow/CMakeLists.txt
index 5ce5c097953..539021f8f54 100644
--- a/oneflow/ir/oneflow-translate/lib/OneFlow/CMakeLists.txt
+++ b/oneflow/ir/oneflow-translate/lib/OneFlow/CMakeLists.txt
@@ -14,7 +14,7 @@ oneflow_add_mlir_library(
MLIRIR
MLIRParser
MLIRPass
- MLIRSPIRV
+ MLIRSPIRVDialect
MLIRTranslateLib
MLIRSupport
MLIROneFlow
diff --git a/oneflow/ir/test/Frontend/test_iree_resnet.py b/oneflow/ir/test/Frontend/test_iree_resnet.py
deleted file mode 100644
index c538a66b575..00000000000
--- a/oneflow/ir/test/Frontend/test_iree_resnet.py
+++ /dev/null
@@ -1,108 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-# RUN: python3 %s
-
-from oneflow_iree.compiler import Runner
-from flowvision.models import resnet50
-import oneflow as flow
-import oneflow.unittest
-import unittest
-import os
-import numpy as np
-import time
-
-os.environ["ONEFLOW_MLIR_ENABLE_ROUND_TRIP"] = "1"
-os.environ["ONEFLOW_MLIR_ENABLE_CODEGEN_FUSERS"] = "1"
-
-
-def _test_iree_resnet_cpu(test_case):
- model = resnet50(pretrained=True)
- model.eval()
-
- class GraphModuleForIree(flow.nn.Graph):
- def __init__(self):
- super().__init__()
- self.model = model
-
- def build(self, x):
- return self.model(x)
-
- class GraphModuleForOFMLIR(flow.nn.Graph):
- def __init__(self):
- super().__init__()
- self.model = model
-
- def build(self, x):
- return self.model(x)
-
- func = Runner(GraphModuleForIree, return_numpy=True)
- input = flow.ones([1, 3, 224, 224])
- f = GraphModuleForOFMLIR()
- for iter in range(2):
- iree_output = func(input)
- graph_output = f(input)
- graph_output = graph_output.cpu().detach().numpy()
- # the rtol accumulate layer by layer
- test_case.assertTrue(
- np.allclose(iree_output, graph_output, rtol=1.0e-1, atol=1e-3)
- )
-
-
-def _test_iree_resnet_cuda(test_case):
- model = resnet50(pretrained=True).cuda()
- model.eval()
-
- class GraphModuleForIree(flow.nn.Graph):
- def __init__(self):
- super().__init__()
- self.model = model
-
- def build(self, x):
- return self.model(x)
-
- class GraphModuleForOFMLIR(flow.nn.Graph):
- def __init__(self):
- super().__init__()
- self.model = model
-
- def build(self, x):
- return self.model(x)
-
- func = Runner(GraphModuleForIree, return_numpy=True)
- input = flow.ones([1, 3, 224, 224]).cuda()
- f = GraphModuleForOFMLIR()
- for iter in range(2):
- iree_output = func(input)
- graph_output = f(input)
- graph_output = graph_output.cpu().detach().numpy()
- # the rtol accumulate layer by layer
- test_case.assertTrue(
- np.allclose(iree_output, graph_output, rtol=1.0e-1, atol=1e-3)
- )
-
-
-@flow.unittest.skip_unless_1n1d()
-class TestIreeResnet(oneflow.unittest.TestCase):
- def test_iree_resnet_cpu(test_case):
- _test_iree_resnet_cpu(test_case)
-
- @unittest.skipUnless(oneflow.sysconfig.with_cuda(), "only test cpu cases")
- def test_iree_resnet_cuda(test_case):
- _test_iree_resnet_cuda(test_case)
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/oneflow/ir/test/Frontend/test_iree_runner.py b/oneflow/ir/test/Frontend/test_iree_runner.py
deleted file mode 100644
index a0caa90fecd..00000000000
--- a/oneflow/ir/test/Frontend/test_iree_runner.py
+++ /dev/null
@@ -1,71 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-# RUN: python3 %s
-
-from oneflow_iree.compiler import Runner
-import oneflow as flow
-import oneflow.unittest
-import unittest
-import numpy as np
-
-
-class RELU(flow.nn.Module):
- def __init__(self):
- super().__init__()
- self.relu = flow.nn.ReLU()
-
- def forward(self, x):
- return self.relu(x)
-
-
-class GraphModule(flow.nn.Graph):
- def __init__(self):
- super().__init__()
- self.fw = RELU()
-
- def build(self, x):
- return self.fw(x)
-
-
-def _test_check_iree_runner(test_case):
- func = Runner(GraphModule, return_numpy=True).cuda()
- # run on iree cuda backend
- input = flow.Tensor([-1.0, 1.0])
- output = func(input)
- test_case.assertTrue(np.allclose(output, [0.0, 1.0]))
- # change input shape
- input = flow.Tensor([-1.0, 1.0, -1])
- output = func(input)
- test_case.assertTrue(np.allclose(output, [0.0, 1.0, 0.0]))
- # change on iree cpu backend
- func = func.cpu()
- input = flow.Tensor([-1.0, 0.0, 1.0])
- output = func(input)
- test_case.assertTrue(np.allclose(output, [0.0, 0.0, 1.0]))
- # change input shape
- input = flow.Tensor([-1, 1.0])
- output = func(input)
- test_case.assertTrue(np.allclose(output, [0.0, 1.0]))
-
-
-@flow.unittest.skip_unless_1n1d()
-class TestCheckIreeRunner(oneflow.unittest.TestCase):
- def test_check_iree_runner(test_case):
- _test_check_iree_runner(test_case)
-
-
-if __name__ == "__main__":
- unittest.main()
diff --git a/oneflow/ir/test/Frontend/test_tosa_to_elf.mlir b/oneflow/ir/test/Frontend/test_tosa_to_elf.mlir
index 34ee5b499dc..3115bad55c6 100644
--- a/oneflow/ir/test/Frontend/test_tosa_to_elf.mlir
+++ b/oneflow/ir/test/Frontend/test_tosa_to_elf.mlir
@@ -4,7 +4,7 @@
// RUN: -tensor-bufferize -func-bufferize -buffer-results-to-out-params \
// RUN: -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm \
// RUN: -convert-func-to-llvm -convert-memref-to-llvm -reconcile-unrealized-casts --print-after-all \
-// RUN: | oneflow-translate -mlir-to-llvmir | clang -x ir - -c -o test.o
+// RUN: | oneflow-translate -mlir-to-llvmir
builtin.module {
func.func @Graph_0(%arg0: tensor<2xf32>) -> tensor<2xf32> {
diff --git a/oneflow/ir/test/OneFlow/cuda_code_gen/fuse_cast_scale.mlir b/oneflow/ir/test/OneFlow/cuda_code_gen/fuse_cast_scale.mlir
index a6a7db89b1b..9eaf154ac6f 100644
--- a/oneflow/ir/test/OneFlow/cuda_code_gen/fuse_cast_scale.mlir
+++ b/oneflow/ir/test/OneFlow/cuda_code_gen/fuse_cast_scale.mlir
@@ -1,4 +1,4 @@
-// RUN: oneflow-opt %s -lower-oneflow-to-tosa -pass-pipeline="func.func(tosa-to-linalg)" -cse --linalg-fuse-elementwise-ops -linalg-bufferize -convert-linalg-to-parallel-loops -gpu-greedy-parallel-loop-mapping \
+// RUN: oneflow-opt %s -lower-oneflow-to-tosa -pass-pipeline="func.func(tosa-to-linalg)" -cse --linalg-fuse-elementwise-ops -linalg-bufferize -convert-linalg-to-parallel-loops -gpu-map-parallel-loops \
// RUN: -convert-parallel-loops-to-gpu -gpu-kernel-outlining -buffer-host-register -canonicalize \
// RUN: -pass-pipeline='gpu.module(strip-debuginfo,lower-affine,convert-gpu-to-nvvm,out-of-tree-gpu-to-cubin)' \
// RUN: --func-bufferize -buffer-results-to-out-params -gpu-copy-arg --tensor-bufferize \
@@ -12,7 +12,7 @@
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_c_runner_utils%shlibext \
// RUN: --entry-point-result=void
-// RUN: oneflow-opt %s -lower-oneflow-to-tosa -pass-pipeline="func.func(tosa-to-linalg)" -cse --linalg-fuse-elementwise-ops -linalg-bufferize -convert-linalg-to-parallel-loops -gpu-greedy-parallel-loop-mapping \
+// RUN: oneflow-opt %s -lower-oneflow-to-tosa -pass-pipeline="func.func(tosa-to-linalg)" -cse --linalg-fuse-elementwise-ops -linalg-bufferize -convert-linalg-to-parallel-loops -gpu-map-parallel-loops \
// RUN: -convert-parallel-loops-to-gpu -gpu-kernel-outlining -buffer-host-register -canonicalize \
// RUN: -pass-pipeline='gpu.module(strip-debuginfo,lower-affine,convert-gpu-to-nvvm,out-of-tree-gpu-to-cubin)' \
// RUN: --func-bufferize --tensor-bufferize \
@@ -25,13 +25,13 @@
// RUN: --shared-libs=%linalg_test_lib_dir/libmlir_runner_utils%shlibext \
// RUN: --entry-point-result=void
-func @Cast_289__FUSE__ScalarMulByTensor_290(%arg0: tensor<3x3xi64>, %arg1: tensor<1xf32>) -> tensor<3x3xf32> {
+func.func @Cast_289__FUSE__ScalarMulByTensor_290(%arg0: tensor<3x3xi64>, %arg1: tensor<1xf32>) -> tensor<3x3xf32> {
%0 = "oneflow.cast"(%arg0) {device_name = ["@0:0"], device_tag = "cuda", dtype = 2 : i32, hierarchy = [1], op_name = "Cast_289", output_lbns = ["Cast_289/out_0"], scope_symbol_id = 4611686018427478014 : i64} : (tensor<3x3xi64>) -> tensor<3x3xf32>
%1 = "oneflow.scalar_mul_by_tensor"(%0, %arg1) {device_name = ["@0:0"], device_tag = "cuda", hierarchy = [1], op_name = "ScalarMulByTensor_290", output_lbns = ["ScalarMulByTensor_290/y_0"], scope_symbol_id = 4611686018427478014 : i64} : (tensor<3x3xf32>, tensor<1xf32>) -> tensor<3x3xf32>
return %1 : tensor<3x3xf32>
}
-func @main() {
+func.func @main() {
%a_data = memref.alloc() : memref<3x3xi64>
%b_data = memref.alloc() : memref<1xf32>
%a = bufferization.to_tensor %a_data : memref<3x3xi64>
@@ -40,15 +40,15 @@ func @main() {
%c = call @Cast_289__FUSE__ScalarMulByTensor_290(%a, %b) : (tensor<3x3xi64>, tensor<1xf32>) -> (tensor<3x3xf32>)
%c_buffer = bufferization.to_memref %c : memref<3x3xf32>
%cast_c_buffer = memref.cast %c_buffer : memref<3x3xf32> to memref<*xf32>
- call @print_memref_f32(%cast_c_buffer) : (memref<*xf32>) -> ()
+ call @printMemrefF32(%cast_c_buffer) : (memref<*xf32>) -> ()
// TODO: use real number
// CHECK: [3, 3]
%cast_a_data = memref.cast %a_data : memref<3x3xi64> to memref<*xi64>
%cast_b_data = memref.cast %b_data : memref<1xf32> to memref<*xf32>
- call @print_memref_i64(%cast_a_data) : (memref<*xi64>) -> ()
- call @print_memref_f32(%cast_b_data) : (memref<*xf32>) -> ()
+ call @printMemrefI64(%cast_a_data) : (memref<*xi64>) -> ()
+ call @printMemrefF32(%cast_b_data) : (memref<*xf32>) -> ()
return
}
-func private @print_memref_f32(memref<*xf32>)
-func private @print_memref_i64(memref<*xi64>)
+func.func private @printMemrefF32(memref<*xf32>)
+func.func private @printMemrefI64(memref<*xi64>)
diff --git a/oneflow/ir/test/OneFlow/cuda_code_gen/gpu_copy_arg.mlir b/oneflow/ir/test/OneFlow/cuda_code_gen/gpu_copy_arg.mlir
index 3371acad706..f63e65b7431 100644
--- a/oneflow/ir/test/OneFlow/cuda_code_gen/gpu_copy_arg.mlir
+++ b/oneflow/ir/test/OneFlow/cuda_code_gen/gpu_copy_arg.mlir
@@ -1,8 +1,8 @@
-// RUN: oneflow-opt %s -lower-oneflow-to-tosa -pass-pipeline="func.func(tosa-to-linalg)" -cse --linalg-fuse-elementwise-ops -linalg-bufferize -convert-linalg-to-parallel-loops -gpu-greedy-parallel-loop-mapping \
+// RUN: oneflow-opt %s -lower-oneflow-to-tosa -pass-pipeline="func.func(tosa-to-linalg)" -cse --linalg-fuse-elementwise-ops -linalg-bufferize -convert-linalg-to-parallel-loops -gpu-map-parallel-loops \
// RUN: -convert-parallel-loops-to-gpu -gpu-kernel-outlining -buffer-host-register -canonicalize \
// RUN: -pass-pipeline='gpu.module(strip-debuginfo,lower-affine,convert-gpu-to-nvvm,out-of-tree-gpu-to-cubin)' \
// RUN: --func-bufferize -buffer-results-to-out-params -gpu-copy-arg
-func @Cast_289__FUSE__ScalarMulByTensor_290(%arg0: tensor<3x3xi64>, %arg1: tensor<1xf32>) -> tensor<3x3xf32> {
+func.func @Cast_289__FUSE__ScalarMulByTensor_290(%arg0: tensor<3x3xi64>, %arg1: tensor<1xf32>) -> tensor<3x3xf32> {
%0 = "oneflow.cast"(%arg0) {device_name = ["@0:0"], device_tag = "cuda", dtype = 2 : i32, hierarchy = [1], op_name = "Cast_289", output_lbns = ["Cast_289/out_0"], scope_symbol_id = 4611686018427478014 : i64} : (tensor<3x3xi64>) -> tensor<3x3xf32>
%1 = "oneflow.scalar_mul_by_tensor"(%0, %arg1) {device_name = ["@0:0"], device_tag = "cuda", hierarchy = [1], op_name = "ScalarMulByTensor_290", output_lbns = ["ScalarMulByTensor_290/y_0"], scope_symbol_id = 4611686018427478014 : i64} : (tensor<3x3xf32>, tensor<1xf32>) -> tensor<3x3xf32>
return %1 : tensor<3x3xf32>
diff --git a/oneflow/ir/test/OneFlow/cuda_code_gen/gpu_runner.mlir b/oneflow/ir/test/OneFlow/cuda_code_gen/gpu_runner.mlir
index c5aac6f8e94..6f3d14cf212 100644
--- a/oneflow/ir/test/OneFlow/cuda_code_gen/gpu_runner.mlir
+++ b/oneflow/ir/test/OneFlow/cuda_code_gen/gpu_runner.mlir
@@ -8,7 +8,7 @@
// RUN: --entry-point-result=void \
// RUN: | FileCheck %s
// CHECK: [{{(35, ){34}35}}]
-func @main() {
+func.func @main() {
%arg = memref.alloc() : memref<35xf32>
%dst = memref.cast %arg : memref<35xf32> to memref
%one = arith.constant 1 : index
@@ -28,8 +28,8 @@ func @main() {
memref.store %res, %dst[%tx] : memref
gpu.terminator
}
- call @print_memref_f32(%cast_dst) : (memref<*xf32>) -> ()
+ call @printMemrefF32(%cast_dst) : (memref<*xf32>) -> ()
return
}
-func private @print_memref_f32(memref<*xf32>)
+func.func private @printMemrefF32(memref<*xf32>)
diff --git a/oneflow/ir/test/OneFlow/lower_to_tosa.mlir b/oneflow/ir/test/OneFlow/lower_to_tosa.mlir
index df5f91c3129..f65ed33275c 100644
--- a/oneflow/ir/test/OneFlow/lower_to_tosa.mlir
+++ b/oneflow/ir/test/OneFlow/lower_to_tosa.mlir
@@ -1,8 +1,7 @@
// RUN: oneflow-opt -lower-oneflow-to-tosa -pass-pipeline="func.func(tosa-to-linalg)" -cse --linalg-fuse-elementwise-ops -linalg-bufferize -tensor-bufferize -func-bufferize -buffer-results-to-out-params -convert-linalg-to-loops -convert-scf-to-cf -convert-linalg-to-llvm -convert-func-to-llvm -convert-memref-to-llvm -reconcile-unrealized-casts --print-after-all %s
-// RUN: oneflow-opt -lower-oneflow-to-tosa -pass-pipeline="func.func(tosa-to-linalg)" -cse --linalg-fuse-elementwise-ops -linalg-bufferize -tensor-bufferize -func-bufferize -buffer-results-to-out-params -finalizing-bufferize -canonicalize %s
module {
- func @Cast_1__FUSE__ScalarMulByTensor_2(%arg0: tensor<96x96xi64>, %arg1: tensor<1xf32>) -> tensor<96x96xf32> {
+ func.func @Cast_1__FUSE__ScalarMulByTensor_2(%arg0: tensor<96x96xi64>, %arg1: tensor<1xf32>) -> tensor<96x96xf32> {
%0 = "oneflow.cast"(%arg0) {device_name = ["0:0"], device_tag = "cpu", dtype = 2 : i32, hierarchy = [1], op_name = "Cast_1", op_type_name = "cast", scope_symbol_id = 4611686018427416574 : i64} : (tensor<96x96xi64>) -> tensor<96x96xf32>
%1 = "oneflow.scalar_mul_by_tensor"(%0, %arg1) {device_name = ["0:0"], device_tag = "cpu", hierarchy = [1], op_name = "ScalarMulByTensor_2", op_type_name = "scalar_mul_by_tensor", scope_symbol_id = 4611686018427416574 : i64} : (tensor<96x96xf32>, tensor<1xf32>) -> tensor<96x96xf32>
return %1 : tensor<96x96xf32>
diff --git a/oneflow/ir/test/OneFlow/traits.mlir b/oneflow/ir/test/OneFlow/traits.mlir
index ed8eb3a5678..55506828b84 100644
--- a/oneflow/ir/test/OneFlow/traits.mlir
+++ b/oneflow/ir/test/OneFlow/traits.mlir
@@ -1,17 +1,17 @@
// RUN: oneflow-opt -test-oneflow-trait-folder %s | FileCheck %s
-// CHECK-LABEL: func @testSingleIdempotent
+// CHECK-LABEL: func.func @testSingleIdempotent
// CHECK-SAME: ([[ARG0:%.+]]: tensor)
-func @testSingleIdempotent(%arg0 : tensor) -> tensor {
+func.func @testSingleIdempotent(%arg0 : tensor) -> tensor {
// CHECK: [[IDEMPOTENT:%.+]] = "oneflow.relu"([[ARG0]])
%0 = "oneflow.relu"(%arg0) {device_tag = "cuda", op_name = "Relu_1", op_type_name = "relu", device_name = ["0:0-0"], scope_symbol_id = 4611686018427420670 : i64} : (tensor) -> tensor
// CHECK: return [[IDEMPOTENT]]
return %0: tensor
}
-// CHECK-LABEL: func @testDoubleIdempotent
+// CHECK-LABEL: func.func @testDoubleIdempotent
// CHECK-SAME: ([[ARG0:%.+]]: tensor)
-func @testDoubleIdempotent(%arg0: tensor) -> tensor {
+func.func @testDoubleIdempotent(%arg0: tensor) -> tensor {
// CHECK: [[IDEMPOTENT:%.+]] = "oneflow.relu"([[ARG0]])
%0 = "oneflow.relu"(%arg0) {device_tag = "cuda", op_name = "Relu_1", op_type_name = "relu", device_name = ["0:0-0"], scope_symbol_id = 4611686018427420670 : i64} : (tensor) -> tensor
%1 = "oneflow.relu"(%0) {device_tag = "cuda", op_name = "Relu_2", op_type_name = "relu", device_name = ["0:0-0"], scope_symbol_id = 4611686018427420670 : i64} : (tensor) -> tensor
@@ -19,9 +19,9 @@ func @testDoubleIdempotent(%arg0: tensor) -> tensor {
return %1: tensor
}
-// CHECK-LABEL: func @testTripleIdempotent
+// CHECK-LABEL: func.func @testTripleIdempotent
// CHECK-SAME: ([[ARG0:%.+]]: tensor)
-func @testTripleIdempotent(%arg0: tensor) -> tensor {
+func.func @testTripleIdempotent(%arg0: tensor) -> tensor {
// CHECK: [[IDEMPOTENT:%.+]] = "oneflow.relu"([[ARG0]])
%0 = "oneflow.relu"(%arg0) {device_tag = "cuda", op_name = "Relu_1", op_type_name = "relu", device_name = ["0:0-0"], scope_symbol_id = 4611686018427420670 : i64} : (tensor) -> tensor
%1 = "oneflow.relu"(%0) {device_tag = "cuda", op_name = "Relu_2", op_type_name = "relu", device_name = ["0:0-0"], scope_symbol_id = 4611686018427420670 : i64} : (tensor) -> tensor
@@ -30,18 +30,18 @@ func @testTripleIdempotent(%arg0: tensor) -> tensor {
return %2: tensor
}
-// CHECK-LABEL: func @testDoubleInvolution
+// CHECK-LABEL: func.func @testDoubleInvolution
// CHECK-SAME: ([[ARG0:%.+]]: tensor)
-func @testDoubleInvolution(%arg0: tensor) -> tensor {
+func.func @testDoubleInvolution(%arg0: tensor) -> tensor {
%0 = "oneflow.negative"(%arg0) {device_tag = "cuda", op_name = "Relu_1", op_type_name = "relu", device_name = ["0:0-0"], scope_symbol_id = 4611686018427420670 : i64} : (tensor) -> tensor
%1 = "oneflow.negative"(%0) {device_tag = "cuda", op_name = "Relu_2", op_type_name = "relu", device_name = ["0:0-0"], scope_symbol_id = 4611686018427420670 : i64} : (tensor) -> tensor
// CHECK: return [[ARG0]]
return %1: tensor
}
-// CHECK-LABEL: func @testTripleInvolution
+// CHECK-LABEL: func.func @testTripleInvolution
// CHECK-SAME: ([[ARG0:%.+]]: tensor)
-func @testTripleInvolution(%arg0: tensor) -> tensor {
+func.func @testTripleInvolution(%arg0: tensor) -> tensor {
// CHECK: [[INVOLUTION:%.+]] = "oneflow.negative"([[ARG0]])
%0 = "oneflow.negative"(%arg0) {device_tag = "cuda", op_name = "Relu_1", op_type_name = "relu", device_name = ["0:0-0"], scope_symbol_id = 4611686018427420670 : i64} : (tensor) -> tensor
%1 = "oneflow.negative"(%0) {device_tag = "cuda", op_name = "Relu_2", op_type_name = "relu", device_name = ["0:0-0"], scope_symbol_id = 4611686018427420670 : i64} : (tensor) -> tensor
@@ -50,9 +50,9 @@ func @testTripleInvolution(%arg0: tensor) -> tensor {
return %2: tensor
}
-// CHECK-LABEL: func @testFailedInvolutionFoldDueToDifferentPlacement
+// CHECK-LABEL: func.func @testFailedInvolutionFoldDueToDifferentPlacement
// CHECK-SAME: ([[ARG0:%.+]]: tensor)
-func @testFailedInvolutionFoldDueToDifferentPlacement(%arg0: tensor) -> tensor {
+func.func @testFailedInvolutionFoldDueToDifferentPlacement(%arg0: tensor) -> tensor {
%0 = "oneflow.negative"(%arg0) {device_tag = "cuda", op_name = "Relu_1", op_type_name = "relu", device_name = ["0:0-0"], scope_symbol_id = 4611686018427420670 : i64} : (tensor) -> tensor
%1 = "oneflow.negative"(%0) {device_tag = "cuda", op_name = "Relu_2", op_type_name = "relu", device_name = ["1:0-0"], scope_symbol_id = 4611686018427420670 : i64} : (tensor) -> tensor
// CHECK: [[INVOLUTION:%.+]] = "oneflow.negative"(%1)
@@ -61,9 +61,9 @@ func @testFailedInvolutionFoldDueToDifferentPlacement(%arg0: tensor) -> ten
return %2: tensor
}
-// CHECK-LABEL: func @testFailedInvolutionFoldDueToDifferentDevice
+// CHECK-LABEL: func.func @testFailedInvolutionFoldDueToDifferentDevice
// CHECK-SAME: ([[ARG0:%.+]]: tensor)
-func @testFailedInvolutionFoldDueToDifferentDevice(%arg0: tensor) -> tensor {
+func.func @testFailedInvolutionFoldDueToDifferentDevice(%arg0: tensor) -> tensor {
%0 = "oneflow.negative"(%arg0) {device_tag = "cuda", op_name = "Relu_1", op_type_name = "relu", device_name = ["0:0-0"], scope_symbol_id = 4611686018427420670 : i64} : (tensor) -> tensor
%1 = "oneflow.negative"(%0) {device_tag = "cpu", op_name = "Relu_2", op_type_name = "relu", device_name = ["0:0-0"], scope_symbol_id = 4611686018427420670 : i64} : (tensor) -> tensor
// CHECK: [[INVOLUTION:%.+]] = "oneflow.negative"(%1)
diff --git a/oneflow/ir/test/OneFlow/with_cuda/test_conv_bn_auto_nhwc.py b/oneflow/ir/test/OneFlow/with_cuda/test_conv_bn_auto_nhwc.py
index 88d7c307c1a..8202c49ae89 100644
--- a/oneflow/ir/test/OneFlow/with_cuda/test_conv_bn_auto_nhwc.py
+++ b/oneflow/ir/test/OneFlow/with_cuda/test_conv_bn_auto_nhwc.py
@@ -52,7 +52,7 @@ def build(self, *input):
lazy_res = graph(data)
test_case.assertTrue(
- np.allclose(eager_res.numpy(), lazy_res.numpy(), rtol=1e-4, atol=1e-4)
+ np.allclose(eager_res.numpy(), lazy_res.numpy(), rtol=1e-2, atol=1e-2)
)
diff --git a/python/oneflow/nn/graph/graph.py b/python/oneflow/nn/graph/graph.py
index d6583810e5d..1952cea3699 100644
--- a/python/oneflow/nn/graph/graph.py
+++ b/python/oneflow/nn/graph/graph.py
@@ -531,7 +531,7 @@ def _shallow_repr(self):
return shallow_repr
def _ops_repr(self):
- r"""Generate operators' string representation of this graph
+ r"""Generate operators' string representation of this graph
"""
if self._is_compiled and self._compiled_graph_proto is not None:
module_conf = self._compiled_graph_proto.module_name2module_conf[self.name]
@@ -1360,6 +1360,13 @@ def __getattr__(self, name: str):
)
def __del__(self):
+ # Ensure vm has finished running this graph.
+ if self._session._env.is_shutting_down():
+ # After python shutting down, it's not safe to call oneflow._oneflow_internal.eager.
+ # But shutting down will do sync in SwitchToShuttingDownPhase.
+ # So it's safe to skip sync here.
+ return
+ oneflow._oneflow_internal.eager.Sync()
current_env_enable_mlir_inference_opt = os.getenv(
"ONEFLOW_MLIR_ENABLE_INFERENCE_OPTIMIZATION"
)
@@ -1369,13 +1376,6 @@ def __del__(self):
os.environ[
"ONEFLOW_MLIR_ENABLE_INFERENCE_OPTIMIZATION"
] = self.env_enable_mlir_inference_opt
- # Ensure vm has finished running this graph.
- if self._session._env.is_shutting_down():
- # After python shutting down, it's not safe to call oneflow._oneflow_internal.eager.
- # But shutting down will do sync in SwitchToShuttingDownPhase.
- # So it's safe to skip sync here.
- return
- oneflow._oneflow_internal.eager.Sync()
oneflow._oneflow_internal.ClearVariableTensorMgr()
def __ensure_input_tensors_contiguous(self, *args, **kwargs):
diff --git a/python/oneflow/test/graph/test_comb2d.py b/python/oneflow/test/graph/test_comb2d.py
index aac2a5e12a5..7b746017bdb 100644
--- a/python/oneflow/test/graph/test_comb2d.py
+++ b/python/oneflow/test/graph/test_comb2d.py
@@ -24,7 +24,7 @@
import oneflow.unittest
-class TestModule(nn.Module):
+class _TestModule(nn.Module):
def forward(self, x):
sbp_1ds = [
flow.sbp.broadcast,
@@ -62,7 +62,7 @@ def build(self, x):
@unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases")
class TestLazyAllSbpCombinationTesting(flow.unittest.TestCase):
def test_lazy_boxing_2d_all_combination(test_case):
- model = TestModule()
+ model = _TestModule()
graph = _TestGraph(model)
x = flow.ones(
diff --git a/python/oneflow/test/graph/test_graph_ofrecord_reader.py b/python/oneflow/test/graph/test_graph_ofrecord_reader.py
index 16b4f161e13..35dcd4d376c 100644
--- a/python/oneflow/test/graph/test_graph_ofrecord_reader.py
+++ b/python/oneflow/test/graph/test_graph_ofrecord_reader.py
@@ -90,9 +90,6 @@ def build(self):
reader_g = GraphReader()
image, label = reader_g()
- print(image)
- print(label)
-
if __name__ == "__main__":
unittest.main()