Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit aa23412

Browse files
cmp-nctJohnggerganov
authoredFeb 14, 2024
llava : support v1.6 (#5267)
* Create llava-survery-v2.py * Update convert-image-encoder-to-gguf.py * Update convert-image-encoder-to-gguf.py * Rename llava-survery-v2.py to llava-surgery-v2.py * Update convert-image-encoder-to-gguf.py will now search for projector * Update convert-image-encoder-to-gguf.py whoops * Update llava-surgery-v2.py * Clip: Bugfix for normalization (it did not loat the 3 std and mean values) Clip: bicubic resize function Clip: added save-to-bmp/pil for debugging and conversion from/to 32/8 images Clip: added normalization with FP16 precision simulation (image tensors match HF implementation, can be switched off, only used for llava-1.6) Clip: added newline tensor, mergetype kv, image-grid kv, new resize-pad function with resolution from gridpoints Clip: clip_image_preprocess now returns a float * vector instead of float, this way llava 1.5 and 1.6 is supported llava: added ggml cpu graph for embedding patching, added spatial_unpad preliminary support, added a lot of comments that need to be cleaned when all is final convert-image-encoder: fixed image-grid flattening * whitespace corrections * ws * Tensors are now properly permuted. Before the embeddings were inserted 1:1, now they are split into the 24x24 patches as in reference. * ws * added verbose_prompt support into cli added stopwords for llava-1.6 into cli * moved llava functions to llava.cpp, made clip.h C compatible API, replaced vector style functions with pointers, added a debug define to remove functions from compilation while not needed * ws * convert : skip unknown tensors (need for LLaVA) * llava : update readme * llava : fix compile warnings * llava : style * convert : add --skip-unknown CLI arg * server : remove clip structs * bugfix for non llava-1.6 It should now work with llava-1.5 as well * clip : minor code rearrange * llava : update readme a bit --------- Co-authored-by: John <cmt-nct@users.noreply.github.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
1 parent f5ca054 commit aa23412

10 files changed

+1229
-205
lines changed
 

‎convert.py

+21-16
Original file line numberDiff line numberDiff line change
@@ -1173,7 +1173,7 @@ def convert_to_output_type(model: LazyModel, output_type: GGMLFileType) -> LazyM
11731173
for (name, tensor) in model.items()}
11741174

11751175

1176-
def convert_model_names(model: LazyModel, params: Params) -> LazyModel:
1176+
def convert_model_names(model: LazyModel, params: Params, skip_unknown: bool) -> LazyModel:
11771177
tmap = gguf.TensorNameMap(ARCH, params.n_layer)
11781178
should_skip: set[gguf.MODEL_TENSOR] = set(gguf.MODEL_TENSOR_SKIP.get(ARCH, []))
11791179

@@ -1199,7 +1199,11 @@ def convert_model_names(model: LazyModel, params: Params) -> LazyModel:
11991199
for name, lazy_tensor in model.items():
12001200
tensor_type, name_new = tmap.get_type_and_name(name, try_suffixes = (".weight", ".bias")) or (None, None)
12011201
if name_new is None:
1202-
raise Exception(f"Unexpected tensor name: {name}")
1202+
if skip_unknown:
1203+
print(f"Unexpected tensor name: {name} - skipping")
1204+
continue
1205+
else:
1206+
raise Exception(f"Unexpected tensor name: {name}. Use --skip-unknown to ignore it (e.g. LLaVA)")
12031207

12041208
if tensor_type in should_skip:
12051209
print(f"skipping tensor {name_new}")
@@ -1377,19 +1381,20 @@ def main(args_in: list[str] | None = None) -> None:
13771381
output_choices.append("q8_0")
13781382
vocab_types = ["spm", "bpe", "hfft"]
13791383
parser = argparse.ArgumentParser(description="Convert a LLaMa model to a GGML compatible file")
1380-
parser.add_argument("--awq-path", type=Path, help="Path to scale awq cache file", default=None)
1381-
parser.add_argument("--dump", action="store_true", help="don't convert, just show what's in the model")
1382-
parser.add_argument("--dump-single", action="store_true", help="don't convert, just show what's in a single model file")
1383-
parser.add_argument("--vocab-only", action="store_true", help="extract only the vocab")
1384-
parser.add_argument("--outtype", choices=output_choices, help="output format - note: q8_0 may be very slow (default: f16 or f32 based on input)")
1385-
parser.add_argument("--vocab-dir", type=Path, help="directory containing tokenizer.model, if separate from model file")
1386-
parser.add_argument("--vocab-type", choices=vocab_types, help="The vocabulary format used to define the tokenizer model (default: spm)", default="spm")
1387-
parser.add_argument("--outfile", type=Path, help="path to write to; default: based on input")
1388-
parser.add_argument("model", type=Path, help="directory containing model file, or model file itself (*.pth, *.pt, *.bin)")
1389-
parser.add_argument("--ctx", type=int, help="model training context (default: based on input)")
1390-
parser.add_argument("--concurrency", type=int, help=f"concurrency used for conversion (default: {DEFAULT_CONCURRENCY})", default=DEFAULT_CONCURRENCY)
1391-
parser.add_argument("--big-endian", action="store_true", help="model is executed on big endian machine")
1392-
parser.add_argument("--pad-vocab", action="store_true", help="add pad tokens when model vocab expects more than tokenizer metadata provides")
1384+
parser.add_argument("--awq-path", type=Path, help="Path to scale awq cache file", default=None)
1385+
parser.add_argument("--dump", action="store_true", help="don't convert, just show what's in the model")
1386+
parser.add_argument("--dump-single", action="store_true", help="don't convert, just show what's in a single model file")
1387+
parser.add_argument("--vocab-only", action="store_true", help="extract only the vocab")
1388+
parser.add_argument("--outtype", choices=output_choices, help="output format - note: q8_0 may be very slow (default: f16 or f32 based on input)")
1389+
parser.add_argument("--vocab-dir", type=Path, help="directory containing tokenizer.model, if separate from model file")
1390+
parser.add_argument("--vocab-type", choices=vocab_types, help="The vocabulary format used to define the tokenizer model (default: spm)", default="spm")
1391+
parser.add_argument("--outfile", type=Path, help="path to write to; default: based on input")
1392+
parser.add_argument("model", type=Path, help="directory containing model file, or model file itself (*.pth, *.pt, *.bin)")
1393+
parser.add_argument("--ctx", type=int, help="model training context (default: based on input)")
1394+
parser.add_argument("--concurrency", type=int, help=f"concurrency used for conversion (default: {DEFAULT_CONCURRENCY})", default=DEFAULT_CONCURRENCY)
1395+
parser.add_argument("--big-endian", action="store_true", help="model is executed on big endian machine")
1396+
parser.add_argument("--pad-vocab", action="store_true", help="add pad tokens when model vocab expects more than tokenizer metadata provides")
1397+
parser.add_argument("--skip-unknown", action="store_true", help="skip unknown tensor names instead of failing")
13931398

13941399
args = parser.parse_args(args_in)
13951400
if args.awq_path:
@@ -1461,7 +1466,7 @@ def main(args_in: list[str] | None = None) -> None:
14611466
print(f"Special vocab info: {special_vocab}")
14621467

14631468
model = model_plus.model
1464-
model = convert_model_names(model, params)
1469+
model = convert_model_names(model, params, args.skip_unknown)
14651470
ftype = pick_output_type(model, args.outtype)
14661471
model = convert_to_output_type(model, ftype)
14671472
outfile = args.outfile or default_outfile(model_plus.paths, ftype)

‎examples/llava/README.md

+9-3
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,9 @@ After building, run: `./llava-cli` to see the usage. For example:
1919

2020
**note**: A lower temperature like 0.1 is recommended for better quality. add `--temp 0.1` to the command to do so.
2121

22-
## Model conversion
22+
## LLaVA 1.5
2323

24-
- Clone `llava-v15-7b` and `clip-vit-large-patch14-336` locally:
24+
- Clone a LLaVA and a CLIP model ([available options](https://github.com/haotian-liu/LLaVA/blob/main/docs/MODEL_ZOO.md)). For example:
2525

2626
```sh
2727
git clone https://huggingface.co/liuhaotian/llava-v1.5-7b
@@ -55,8 +55,14 @@ python ./convert.py ../llava-v1.5-7b
5555

5656
Now both the LLaMA part and the image encoder is in the `llava-v1.5-7b` directory.
5757

58+
## LLaVA 1.6
59+
60+
- Use `llava-surgery-v2.py`
61+
62+
- TODO: add detailed instructions
63+
5864
## TODO
5965

60-
- [ ] Support non-CPU backend for the image encoding part.
66+
- [x] Support non-CPU backend for the image encoding part.
6167
- [ ] Support different sampling methods.
6268
- [ ] Support more model variants.

0 commit comments

Comments
 (0)