Skip to content

Commit

Permalink
Hide yield statements at end of python scf for-loop body (#1785)
Browse files Browse the repository at this point in the history
  • Loading branch information
hunhoffe authored Sep 24, 2024
1 parent 38e388f commit f5ca328
Show file tree
Hide file tree
Showing 75 changed files with 322 additions and 629 deletions.
7 changes: 3 additions & 4 deletions programming_examples/basic/dma_transpose/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,8 @@

from aie.dialects.aie import *
from aie.dialects.aiex import *
from aie.dialects.scf import *
from aie.extras.dialects.ext import memref, arith
from aie.extras.context import mlir_mod_ctx
from aie.extras.dialects.ext.scf import _for as range_

N = 4096
M = 64
Expand Down Expand Up @@ -45,8 +44,8 @@ def device_body():
# Compute tile 2
@core(ComputeTile2)
def core_body():
for _ in for_(sys.maxsize):
yield_([])
for _ in range_(sys.maxsize):
pass

# To/from AIE-array data movement
tensor_ty = T.memref(N, T.i32())
Expand Down
15 changes: 6 additions & 9 deletions programming_examples/basic/matrix_multiplication/cascade/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from aie.dialects.aie import *
from aie.dialects.aiex import *
from aie.dialects.scf import *
from aie.extras.dialects.ext.scf import _for as range_


def main():
Expand Down Expand Up @@ -277,9 +277,11 @@ def device_body():

@core(core_tiles[row][col], f"mm_{m}x{k}x{n}.o")
def core_body():
for _ in for_(0xFFFFFFFF):
for _ in range_(0xFFFFFFFF):
loop = (
for_(n_tiles_per_core) if n_tiles_per_core > 1 else range(1)
range_(n_tiles_per_core)
if n_tiles_per_core > 1
else range(1)
) # Workaround for issue #1547
for _ in loop:
if row == 0:
Expand All @@ -292,7 +294,7 @@ def core_body():
if row == 0:
call(zero_scalar, [elem_out])

for _ in for_(K // k // n_aie_rows):
for _ in range_(K // k // n_aie_rows):
elem_in_a = A_l2l1_fifos[row].acquire(
ObjectFifoPort.Consume, 1
)
Expand All @@ -319,14 +321,9 @@ def core_body():
B_l2l1_fifos[row][col].release(
ObjectFifoPort.Consume, 1
)
yield_([])

if row == 0:
C_l1l2_fifos[col].release(ObjectFifoPort.Produce, 1)
yield_([])

if n_tiles_per_core > 1: # workaround for issue #1547
yield_([])

# To/from AIE-array data movement
@runtime_sequence(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from aie.dialects.aie import *
from aie.dialects.aiex import *
from aie.dialects.scf import *
from aie.extras.dialects.ext.scf import _for as range_


def my_matmul():
Expand Down Expand Up @@ -144,7 +144,7 @@ def device_body():
# Compute tile i
@core(cores[i], f"mv_{m}x{k}.o")
def core_body():
for _ in for_(0xFFFFFFFF):
for _ in range_(0xFFFFFFFF):
elem_out = outC_fifos[outC_fifo_names[i]].acquire(
ObjectFifoPort.Produce,
1,
Expand All @@ -154,7 +154,7 @@ def core_body():
else:
call(zero_scalar, [elem_out])

for _ in for_(K_div_k):
for _ in range_(K_div_k):
elem_in_a = inA_fifos[inA_fifo_names[i]].acquire(
ObjectFifoPort.Consume,
1,
Expand All @@ -175,13 +175,11 @@ def core_body():
ObjectFifoPort.Consume,
1,
)
yield_([])

outC_fifos[outC_fifo_names[i]].release(
ObjectFifoPort.Produce,
1,
)
yield_([])

# To/from AIE-array data movement

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
from aie.extras.context import mlir_mod_ctx
from aie.dialects.aie import *
from aie.dialects.aiex import *
from aie.dialects.scf import *
import aie.utils.trace as trace_utils
from aie.utils.trace import PortEvent
from aie.extras.dialects.ext.scf import _for as range_


def main():
Expand Down Expand Up @@ -214,16 +214,16 @@ def device_body():
# Compute tile 2
@core(compute_tile2, f"mm_{m}x{k}x{n}.o")
def core_body():
for _ in for_(0xFFFFFFFF):
for _ in for_(tiles) if tiles > 1 else range(1): # issue #1547
for _ in range_(0xFFFFFFFF):
for _ in range_(tiles) if tiles > 1 else range(1): # issue #1547
elem_out = memC.acquire(ObjectFifoPort.Produce, 1)
if vectorized:
call(zero, [elem_out])
else:
call(zero_scalar, [elem_out])

for _ in (
for_(K_div_k) if K_div_k > 1 else range(1)
range_(K_div_k) if K_div_k > 1 else range(1)
): # issue #1547
elem_in_a = memA.acquire(ObjectFifoPort.Consume, 1)
elem_in_b = memB.acquire(ObjectFifoPort.Consume, 1)
Expand All @@ -233,13 +233,8 @@ def core_body():
call(matmul_scalar, [elem_in_a, elem_in_b, elem_out])
memA.release(ObjectFifoPort.Consume, 1)
memB.release(ObjectFifoPort.Consume, 1)
if K_div_k > 1:
yield_([])

memC.release(ObjectFifoPort.Produce, 1)
if tiles > 1:
yield_([])
yield_([])

# To/from AIE-array data movement

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from aie.dialects.aie import *
from aie.dialects.aiex import *
from aie.dialects.scf import *
from aie.extras.dialects.ext.scf import _for as range_


def main():
Expand Down Expand Up @@ -299,17 +299,19 @@ def device_body():

@core(core_tiles[row][col], f"mm_{m}x{k}x{n}.o")
def core_body():
for _ in for_(0xFFFFFFFF):
for _ in range_(0xFFFFFFFF):
loop = (
for_(n_tiles_per_core) if n_tiles_per_core > 1 else range(1)
range_(n_tiles_per_core)
if n_tiles_per_core > 1
else range(1)
) # Workaround for issue #1547
for _ in loop:
elem_out = C_l1l2_fifos[row][col].acquire(
ObjectFifoPort.Produce, 1
)
call(zero, [elem_out])

for _ in for_(K // k):
for _ in range_(K // k):
elem_in_a = A_l2l1_fifos[row].acquire(
ObjectFifoPort.Consume, 1
)
Expand All @@ -319,13 +321,8 @@ def core_body():
call(matmul, [elem_in_a, elem_in_b, elem_out])
A_l2l1_fifos[row].release(ObjectFifoPort.Consume, 1)
B_l2l1_fifos[col].release(ObjectFifoPort.Consume, 1)
yield_([])

C_l1l2_fifos[row][col].release(ObjectFifoPort.Produce, 1)
yield_([])

if n_tiles_per_core > 1: # workaround for issue #1547
yield_([])

# To/from AIE-array data movement
@runtime_sequence(
Expand Down
8 changes: 3 additions & 5 deletions programming_examples/basic/matrix_scalar_add/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@

from aie.dialects.aie import *
from aie.dialects.aiex import *
from aie.dialects.scf import *
from aie.extras.dialects.ext import memref, arith
from aie.extras.context import mlir_mod_ctx
from aie.extras.dialects.ext.scf import _for as range_

import sys

Expand Down Expand Up @@ -62,17 +62,15 @@ def device_body():
@core(ComputeTile2)
def core_body():
# Effective while(1)
for _ in for_(sys.maxsize):
for _ in range_(sys.maxsize):
elem_in = of_in1.acquire(ObjectFifoPort.Consume, 1)
elem_out = of_out1.acquire(ObjectFifoPort.Produce, 1)
for i in for_(TILE_SIZE):
for i in range_(TILE_SIZE):
v0 = memref.load(elem_in, [i])
v1 = arith.addi(v0, arith.constant(1, T.i32()))
memref.store(v1, elem_out, [i])
yield_([])
of_in1.release(ObjectFifoPort.Consume, 1)
of_out1.release(ObjectFifoPort.Produce, 1)
yield_([])

# To/from AIE-array data movement

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@

from aie.dialects.aie import *
from aie.dialects.aiex import *
from aie.dialects.scf import *
from aie.extras.dialects.ext import arith
from aie.extras.context import mlir_mod_ctx
from aie.extras.dialects.ext.scf import _for as range_

dev = AIEDevice.npu1_1col
col = 0
Expand Down Expand Up @@ -70,32 +70,28 @@ def device_body():
# Compute tile 2
@core(ComputeTile2)
def core_body():
for _ in for_(sys.maxsize):
for _ in range_(sys.maxsize):
elemOut = of_out2.acquire(ObjectFifoPort.Produce, 1)
elemIn = of_in2.acquire(ObjectFifoPort.Consume, 1)
for i in for_(N // 2):
for i in range_(N // 2):
v0 = memref.load(elemIn, [i])
v1 = arith.addi(v0, arith.constant(1, T.i32()))
memref.store(v1, elemOut, [i])
yield_([])
of_in2.release(ObjectFifoPort.Consume, 1)
of_out2.release(ObjectFifoPort.Produce, 1)
yield_([])

# Compute tile 3
@core(ComputeTile3)
def core_body():
for _ in for_(sys.maxsize):
for _ in range_(sys.maxsize):
elemOut = of_out3.acquire(ObjectFifoPort.Produce, 1)
elemIn = of_in3.acquire(ObjectFifoPort.Consume, 1)
for i in for_(N // 2):
for i in range_(N // 2):
v0 = memref.load(elemIn, [i])
v1 = arith.addi(v0, arith.constant(2, T.i32()))
memref.store(v1, elemOut, [i])
yield_([])
of_in3.release(ObjectFifoPort.Consume, 1)
of_out3.release(ObjectFifoPort.Produce, 1)
yield_([])

# To/from AIE-array data movement
tensor_out_ty = T.memref(out_size, T.i32())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

from aie.dialects.aie import *
from aie.dialects.aiex import *
from aie.dialects.scf import *
from aie.extras.context import mlir_mod_ctx

N = 4096
Expand Down
7 changes: 3 additions & 4 deletions programming_examples/basic/passthrough_dmas/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,8 @@

from aie.dialects.aie import *
from aie.dialects.aiex import *
from aie.dialects.scf import *
from aie.extras.dialects.ext import memref, arith
from aie.extras.context import mlir_mod_ctx
from aie.extras.dialects.ext.scf import _for as range_

N = 4096
dev = AIEDevice.npu1_1col
Expand Down Expand Up @@ -54,8 +53,8 @@ def device_body():
# Compute tile 2
@core(ComputeTile2)
def core_body():
for _ in for_(sys.maxsize):
yield_([])
for _ in range_(sys.maxsize):
pass

# To/from AIE-array data movement
tensor_ty = T.memref(N, T.i32())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,8 @@

from aie.dialects.aie import *
from aie.dialects.aiex import *
from aie.dialects.scf import *
from aie.extras.dialects.ext import memref, arith
from aie.extras.context import mlir_mod_ctx
from aie.extras.dialects.ext.scf import _for as range_

N = 1024

Expand Down Expand Up @@ -44,8 +43,8 @@ def device_body():
# Compute tile 2
@core(ComputeTile2)
def core_body():
for _ in for_(sys.maxsize):
yield_([])
for _ in range_(sys.maxsize):
pass

# To/from AIE-array data movement
tensor_ty = T.memref(N, T.i32())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,8 @@

from aie.dialects.aie import *
from aie.dialects.aiex import *
from aie.dialects.scf import *
from aie.extras.dialects.ext import memref, arith
from aie.extras.context import mlir_mod_ctx
from aie.extras.dialects.ext.scf import _for as range_

N = 1024

Expand Down Expand Up @@ -46,8 +45,8 @@ def device_body():
# Compute tile 2
@core(ComputeTile2)
def core_body():
for _ in for_(sys.maxsize):
yield_([])
for _ in range_(sys.maxsize):
pass

# To/from AIE-array data movement
tensor_ty = T.memref(N, T.i32())
Expand Down
5 changes: 2 additions & 3 deletions programming_examples/basic/passthrough_kernel/aie2.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@

from aie.dialects.aie import *
from aie.dialects.aiex import *
from aie.dialects.scf import *
from aie.extras.context import mlir_mod_ctx
from aie.extras.dialects.ext.scf import _for as range_

import aie.utils.trace as trace_utils

Expand Down Expand Up @@ -47,13 +47,12 @@ def device_body():
# Compute tile 2
@core(ComputeTile2, "passThrough.cc.o")
def core_body():
for _ in for_(sys.maxsize):
for _ in range_(sys.maxsize):
elemOut = of_out.acquire(ObjectFifoPort.Produce, 1)
elemIn = of_in.acquire(ObjectFifoPort.Consume, 1)
call(passThroughLine, [elemIn, elemOut, lineWidthInBytes])
of_in.release(ObjectFifoPort.Consume, 1)
of_out.release(ObjectFifoPort.Produce, 1)
yield_([])

# print(ctx.module.operation.verify())

Expand Down
4 changes: 0 additions & 4 deletions programming_examples/basic/passthrough_kernel/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,9 @@
import numpy as np
import pyxrt as xrt
import sys
import time

from aie.dialects.aie import *
from aie.dialects.aiex import *
from aie.dialects.scf import *
from aie.extras.context import mlir_mod_ctx
from aie.extras.dialects.ext import memref, arith

import aie.utils.test as test_utils

Expand Down
Loading

0 comments on commit f5ca328

Please sign in to comment.