base: seed: &seed 42 model: type: Llama path: meta-llama/Llama-2-13b-hf torch_dtype: auto calib: name: wikitext2 download: False n_samples: 512 path: ../cache/data/calib/wikitext2 bs: 1 seq_len: 2048 preproc: wikitext2_gptq seed: *seed eval: eval_pos: [fake_quant] name: [wikitext2, c4] download: False path: ../cache/data/eval bs: 1 seq_len: 2048 inference_per_block: False quant: method: TesseraQ weight: bit: 2 symmetric: False granularity: per_channel group_size: -1 calib_algo: learnable special: lr: 0.001 iterations: 250 wd: 0.0 batch_size: 4 deactive_amp: False aug_loss: False optimize_scale: True scale_lr: 0.001 thresholds: [0.8, 0.65, 0.5, 0.43, 0.38, 0.34, 0.3, 0.27, 0.24, 0.21, 0.18, 0.15, 0.12, 0.10, 0.08, 0.06, 0.04, 0.02, 0.01, 0.005] weight_clip: True load_transform: False clip_version: v2 reduce_memory: False # this clip should be downloaded from OmniQuant clip_path: ../cache/activations/L2_13b/omniq_w2 quant_out: True save: save_fp: False save_trans: False save_lightllm: False save_autogptq: False