Skip to content

Commit

Permalink
Add block_seq_stride flag (#692)
Browse files Browse the repository at this point in the history
Add `block_seq_stride` flag

---------

Co-authored-by: Rob Suderman <rob.suderman@gmail.com>
  • Loading branch information
archana-ramalingam and rsuderman authored Dec 13, 2024
1 parent 77ca02f commit ec1424e
Showing 1 changed file with 7 additions and 0 deletions.
7 changes: 7 additions & 0 deletions sharktank/sharktank/examples/export_paged_llm_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@ def main():
type=lambda arg: [int(bs) for bs in arg.split(",")],
default="4",
)
parser.add_argument(
"--block-seq-stride",
help="Block sequence stride for paged KV cache, must divide evenly into the context length",
type=int,
default="16",
)
parser.add_argument(
"--verbose",
help="Include verbose logging",
Expand Down Expand Up @@ -76,6 +82,7 @@ def main():
static_tables=False, # Rely on the compiler for hoisting tables.
kv_cache_type="direct" if args.bs == [1] else "paged",
attention_kernel=args.attention_kernel,
block_seq_stride=args.block_seq_stride,
)
llama_config.fake_quant = args.fake_quant

Expand Down

0 comments on commit ec1424e

Please sign in to comment.