You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
binary_path: F:\Anaconda\envs\chatglm\lib\site-packages\bitsandbytes\cuda_setup\libbitsandbytes_cuda116.dll
CUDA SETUP: Loading binary F:\Anaconda\envs\chatglm\lib\site-packages\bitsandbytes\cuda_setup\libbitsandbytes_cuda116.dll...
Setting eos_token is not supported, use the default one.
Setting pad_token is not supported, use the default one.
Setting unk_token is not supported, use the default one.
报错信息如下:
┌─────────────────────────────── Traceback (most recent call last) ────────────── ──────────────────┐
│ F:\ChatGLM\chatglm3_6b_finetune\inference_hf.py:51 in main │
│ │
│ 48 │ │ prompt: Annotated[str, typer.Option(help='')], │
│ 49 ): │
│ 50 │ model, tokenizer = load_model_and_tokenizer(model_dir) │
│ > 51 │ response, _ = model.chat(tokenizer, prompt) │
│ 52 │ print(response) │
│ 53 │
│ 54 │
│ │
│ F:\Anaconda\envs\chatglm\lib\site-packages\torch\autograd\grad_mode.py:27 in decorate_context │
│ │
│ 24 │ │ @functools.wraps(func) │
│ 25 │ │ def decorate_context(*args, **kwargs): │
│ 26 │ │ │ with self.clone(): │
│ > 27 │ │ │ │ return func(*args, **kwargs) │
│ 28 │ │ return cast(F, decorate_context) │
│ 29 │ │
│ 30 │ def _wrap_generator(self, func): │
│ │
│ C:\Users\Administrator.cache\huggingface\modules\transformers_modules\chatglm3-6b\modeling_chat │
│ glm.py:1042 in chat │
│ │
│ 1039 │ │ inputs = inputs.to(self.device) │
│ 1040 │ │ eos_token_id = [tokenizer.eos_token_id, tokenizer.get_command("<|user|>"), │
│ 1041 │ │ │ │ │ │ tokenizer.get_command("<|observation|>")] │
│ > 1042 │ │ outputs = self.generate(**inputs, **gen_kwargs, eos_token_id=eos_token_id) │
│ 1043 │ │ outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):-1] │
│ 1044 │ │ response = tokenizer.decode(outputs) │
│ 1045 │ │ history.append({"role": role, "content": query}) │
│ │
│ F:\Anaconda\envs\chatglm\lib\site-packages\torch\autograd\grad_mode.py:27 in decorate_context │
│ │
│ 24 │ │ @functools.wraps(func) │
│ 25 │ │ def decorate_context(*args, **kwargs): │
│ 26 │ │ │ with self.clone(): │
│ > 27 │ │ │ │ return func(*args, **kwargs) │
│ 28 │ │ return cast(F, decorate_context) │
│ 29 │ │
│ 30 │ def _wrap_generator(self, func): │
│ │
│ F:\Anaconda\envs\chatglm\lib\site-packages\transformers\generation\utils.py:1575 in generate │
│ │
│ 1572 │ │ │ ) │
│ 1573 │ │ │ │
│ 1574 │ │ │ # 13. run sample │
│ > 1575 │ │ │ result = self._sample( │
│ 1576 │ │ │ │ input_ids, │
│ 1577 │ │ │ │ logits_processor=prepared_logits_processor, │
│ 1578 │ │ │ │ logits_warper=logits_warper, │
│ │
│ F:\Anaconda\envs\chatglm\lib\site-packages\transformers\generation\utils.py:2697 in _sample │
│ │
│ 2694 │ │ │ model_inputs = self.prepare_inputs_for_generation(input_ids, **model_kwargs) │
│ 2695 │ │ │ │
│ 2696 │ │ │ # forward pass to get next token │
│ > 2697 │ │ │ outputs = self( │
│ 2698 │ │ │ │ **model_inputs, │
│ 2699 │ │ │ │ return_dict=True, │
│ 2700 │ │ │ │ output_attentions=output_attentions, │
│ │
│ F:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py:1130 in _call_impl │
│ │
│ 1127 │ │ # this function, and just call forward. │
│ 1128 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │
│ 1129 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ > 1130 │ │ │ return forward_call(*input, **kwargs) │
│ 1131 │ │ # Do not call functions when jit is used │
│ 1132 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1133 │ │ if self._backward_hooks or _global_backward_hooks: │
│ │
│ C:\Users\Administrator.cache\huggingface\modules\transformers_modules\chatglm3-6b\modeling_chat │
│ glm.py:941 in forward │
│ │
│ 938 │ │ use_cache = use_cache if use_cache is not None else self.config.use_cache │
│ 939 │ │ return_dict = return_dict if return_dict is not None else self.config.use_return │
│ 940 │ │ │
│ > 941 │ │ transformer_outputs = self.transformer( │
│ 942 │ │ │ input_ids=input_ids, │
│ 943 │ │ │ position_ids=position_ids, │
│ 944 │ │ │ attention_mask=attention_mask, │
│ │
│ F:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py:1130 in _call_impl │
│ │
│ 1127 │ │ # this function, and just call forward. │
│ 1128 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │
│ 1129 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ > 1130 │ │ │ return forward_call(*input, **kwargs) │
│ 1131 │ │ # Do not call functions when jit is used │
│ 1132 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1133 │ │ if self._backward_hooks or _global_backward_hooks: │
│ │
│ C:\Users\Administrator.cache\huggingface\modules\transformers_modules\chatglm3-6b\modeling_chat │
│ glm.py:822 in forward │
│ │
│ 819 │ │ │ │ │ │ │ │ │ │ │ attention_mask], dim=-1) │
│ 820 │ │ │
│ 821 │ │ if full_attention_mask is None: │
│ > 822 │ │ │ if (attention_mask is not None and not attention_mask.all()) or (past_key_va │
│ 823 │ │ │ │ full_attention_mask = self.get_masks(input_ids, past_key_values, padding │
│ 824 │ │ │
│ 825 │ │ # Rotary positional embeddings │
└──────────────────────────────────────────────────────────────────────────────────────────────────┘
RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
模型正常加载 发送prompt后报错
Loading checkpoint shards: 14%|████████▏ | 1/7 [00:00<00:05, 1Loading checkpoint shards: 29%|████████████████▎ | 2/7 [00:01<0Loading checkpoint shards: 43%|████████████████████████▍ | 3/7 Loading checkpoint shards: 57%|████████████████████████████████▌ Loading checkpoint shards: 71%|████████████████████████████████████████▋ Loading checkpoint shards: 86%|████████████████████████████████████████████Loading checkpoint shards: 100%|████████████████████████████████████████████Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████| 7/7 [00:05<00:00, 1.18it/s]
Setting eos_token is not supported, use the default one.
Setting pad_token is not supported, use the default one.
Setting unk_token is not supported, use the default one.
===================================BUG REPORT===================================
Welcome to bitsandbytes. For bug reports, please submit your error trace to: https://github.com/TimDettmers/bitsandbytes/issues
binary_path: F:\Anaconda\envs\chatglm\lib\site-packages\bitsandbytes\cuda_setup\libbitsandbytes_cuda116.dll
CUDA SETUP: Loading binary F:\Anaconda\envs\chatglm\lib\site-packages\bitsandbytes\cuda_setup\libbitsandbytes_cuda116.dll...
Setting eos_token is not supported, use the default one.
Setting pad_token is not supported, use the default one.
Setting unk_token is not supported, use the default one.
报错信息如下:
┌─────────────────────────────── Traceback (most recent call last) ────────────── ──────────────────┐
│ F:\ChatGLM\chatglm3_6b_finetune\inference_hf.py:51 in main │
│ │
│ 48 │ │ prompt: Annotated[str, typer.Option(help='')], │
│ 49 ): │
│ 50 │ model, tokenizer = load_model_and_tokenizer(model_dir) │
│ > 51 │ response, _ = model.chat(tokenizer, prompt) │
│ 52 │ print(response) │
│ 53 │
│ 54 │
│ │
│ F:\Anaconda\envs\chatglm\lib\site-packages\torch\autograd\grad_mode.py:27 in decorate_context │
│ │
│ 24 │ │ @functools.wraps(func) │
│ 25 │ │ def decorate_context(*args, **kwargs): │
│ 26 │ │ │ with self.clone(): │
│ > 27 │ │ │ │ return func(*args, **kwargs) │
│ 28 │ │ return cast(F, decorate_context) │
│ 29 │ │
│ 30 │ def _wrap_generator(self, func): │
│ │
│ C:\Users\Administrator.cache\huggingface\modules\transformers_modules\chatglm3-6b\modeling_chat │
│ glm.py:1042 in chat │
│ │
│ 1039 │ │ inputs = inputs.to(self.device) │
│ 1040 │ │ eos_token_id = [tokenizer.eos_token_id, tokenizer.get_command("<|user|>"), │
│ 1041 │ │ │ │ │ │ tokenizer.get_command("<|observation|>")] │
│ > 1042 │ │ outputs = self.generate(**inputs, **gen_kwargs, eos_token_id=eos_token_id) │
│ 1043 │ │ outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):-1] │
│ 1044 │ │ response = tokenizer.decode(outputs) │
│ 1045 │ │ history.append({"role": role, "content": query}) │
│ │
│ F:\Anaconda\envs\chatglm\lib\site-packages\torch\autograd\grad_mode.py:27 in decorate_context │
│ │
│ 24 │ │ @functools.wraps(func) │
│ 25 │ │ def decorate_context(*args, **kwargs): │
│ 26 │ │ │ with self.clone(): │
│ > 27 │ │ │ │ return func(*args, **kwargs) │
│ 28 │ │ return cast(F, decorate_context) │
│ 29 │ │
│ 30 │ def _wrap_generator(self, func): │
│ │
│ F:\Anaconda\envs\chatglm\lib\site-packages\transformers\generation\utils.py:1575 in generate │
│ │
│ 1572 │ │ │ ) │
│ 1573 │ │ │ │
│ 1574 │ │ │ # 13. run sample │
│ > 1575 │ │ │ result = self._sample( │
│ 1576 │ │ │ │ input_ids, │
│ 1577 │ │ │ │ logits_processor=prepared_logits_processor, │
│ 1578 │ │ │ │ logits_warper=logits_warper, │
│ │
│ F:\Anaconda\envs\chatglm\lib\site-packages\transformers\generation\utils.py:2697 in _sample │
│ │
│ 2694 │ │ │ model_inputs = self.prepare_inputs_for_generation(input_ids, **model_kwargs) │
│ 2695 │ │ │ │
│ 2696 │ │ │ # forward pass to get next token │
│ > 2697 │ │ │ outputs = self( │
│ 2698 │ │ │ │ **model_inputs, │
│ 2699 │ │ │ │ return_dict=True, │
│ 2700 │ │ │ │ output_attentions=output_attentions, │
│ │
│ F:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py:1130 in _call_impl │
│ │
│ 1127 │ │ # this function, and just call forward. │
│ 1128 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │
│ 1129 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ > 1130 │ │ │ return forward_call(*input, **kwargs) │
│ 1131 │ │ # Do not call functions when jit is used │
│ 1132 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1133 │ │ if self._backward_hooks or _global_backward_hooks: │
│ │
│ C:\Users\Administrator.cache\huggingface\modules\transformers_modules\chatglm3-6b\modeling_chat │
│ glm.py:941 in forward │
│ │
│ 938 │ │ use_cache = use_cache if use_cache is not None else self.config.use_cache │
│ 939 │ │ return_dict = return_dict if return_dict is not None else self.config.use_return │
│ 940 │ │ │
│ > 941 │ │ transformer_outputs = self.transformer( │
│ 942 │ │ │ input_ids=input_ids, │
│ 943 │ │ │ position_ids=position_ids, │
│ 944 │ │ │ attention_mask=attention_mask, │
│ │
│ F:\Anaconda\envs\chatglm\lib\site-packages\torch\nn\modules\module.py:1130 in _call_impl │
│ │
│ 1127 │ │ # this function, and just call forward. │
│ 1128 │ │ if not (self._backward_hooks or self._forward_hooks or self._forward_pre_hooks o │
│ 1129 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks): │
│ > 1130 │ │ │ return forward_call(*input, **kwargs) │
│ 1131 │ │ # Do not call functions when jit is used │
│ 1132 │ │ full_backward_hooks, non_full_backward_hooks = [], [] │
│ 1133 │ │ if self._backward_hooks or _global_backward_hooks: │
│ │
│ C:\Users\Administrator.cache\huggingface\modules\transformers_modules\chatglm3-6b\modeling_chat │
│ glm.py:822 in forward │
│ │
│ 819 │ │ │ │ │ │ │ │ │ │ │ attention_mask], dim=-1) │
│ 820 │ │ │
│ 821 │ │ if full_attention_mask is None: │
│ > 822 │ │ │ if (attention_mask is not None and not attention_mask.all()) or (past_key_va │
│ 823 │ │ │ │ full_attention_mask = self.get_masks(input_ids, past_key_values, padding │
│ 824 │ │ │
│ 825 │ │ # Rotary positional embeddings │
└──────────────────────────────────────────────────────────────────────────────────────────────────┘
RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
环境如下:
absl-py==2.1.0
accelerate==0.27.2
aiofiles==23.2.1
aiohttp==3.9.3
aiosignal==1.3.1
altair==5.2.0
annotated-types==0.6.0
antlr4-python3-runtime==4.9.3
anyio==4.3.0
argon2-cffi==23.1.0
argon2-cffi-bindings==21.2.0
arxiv==2.1.0
async-timeout==4.0.3
attrs==23.2.0
azure-core==1.30.1
azure-storage-blob==12.19.1
backoff==2.2.1
beautifulsoup4==4.12.3
bitsandbytes==0.37.1
bitsandbytes-windows==0.37.5
blinker==1.7.0
blis==0.7.11
Brotli==1.1.0
cachetools==5.3.3
catalogue==2.0.10
certifi==2024.2.2
cffi==1.16.0
chardet==5.2.0
charset-normalizer==3.3.2
click==8.1.7
cloudpathlib==0.16.0
colorama==0.4.6
coloredlogs==15.0.1
confection==0.1.4
contourpy==1.2.0
cpm-kernels==1.0.11
cryptography==42.0.5
curl_cffi==0.6.2
cycler==0.12.1
cymem==2.0.8
dashscope==1.13.6
dataclasses-json==0.6.4
datasets==2.18.0
deepdiff==6.7.1
Deprecated==1.2.14
deprecation==2.1.0
dill==0.3.8
distro==1.9.0
duckduckgo_search==5.1.0
effdet==0.4.1
einops==0.7.0
emoji==2.10.1
environs==9.5.0
et-xmlfile==1.1.0
exceptiongroup==1.2.0
faiss-cpu==1.7.4
fake-useragent==1.5.1
fastapi==0.109.0
feedparser==6.0.10
ffmpy==0.3.2
filelock==3.13.1
filetype==1.2.0
flatbuffers==24.3.7
fonttools==4.49.0
frozenlist==1.4.1
fschat==0.2.35
fsspec==2024.2.0
gitdb==4.0.11
GitPython==3.1.42
google-auth==2.29.0
google-auth-oauthlib==0.4.6
gradio==3.50.0
gradio_client==0.6.1
greenlet==3.0.3
grpcio==1.60.0
h11==0.14.0
h2==4.1.0
hpack==4.0.0
httpcore==1.0.4
httpx==0.27.0
httpx-sse==0.4.0
huggingface-hub==0.21.4
humanfriendly==10.0
hyperframe==6.0.1
idna==3.4
imageio==2.34.0
importlib_metadata==7.0.2
importlib_resources==6.3.1
iniconfig==2.0.0
iopath==0.1.10
isodate==0.6.1
jieba==0.42.1
Jinja2==3.1.3
joblib==1.3.2
jsonpatch==1.33
jsonpath-python==1.0.6
jsonpointer==2.4
jsonschema==4.21.1
jsonschema-specifications==2023.12.1
kiwisolver==1.4.5
langchain==0.0.354
langchain-community==0.0.20
langchain-core==0.1.23
langchain-experimental==0.0.47
langcodes==3.3.0
langdetect==1.0.9
langsmith==0.0.87
latex2mathml==3.77.0
layoutparser==0.3.4
lazy_loader==0.3
llama-index==0.9.35
loguru==0.7.2
lxml==5.1.0
Markdown==3.5.2
markdown-it-py==3.0.0
markdown2==2.4.13
markdownify==0.11.6
MarkupSafe==2.1.5
marshmallow==3.21.1
matplotlib==3.8.3
mdtex2html==1.3.0
mdurl==0.1.2
metaphor-python==0.1.23
minio==7.2.5
mkl-fft==1.3.8
mkl-random==1.2.4
mkl-service==2.4.0
mpmath==1.3.0
msg-parser==1.2.0
multidict==6.0.5
multiprocess==0.70.16
murmurhash==1.0.10
mypy-extensions==1.0.0
nest-asyncio==1.6.0
networkx==3.2.1
nh3==0.2.15
nltk==3.8.1
numexpr==2.8.6
numpy==1.24.4
oauthlib==3.2.2
olefile==0.47
omegaconf==2.3.0
onnx==1.15.0
onnxruntime==1.15.1
openai==1.9.0
opencv-python==4.9.0.80
openpyxl==3.1.2
ordered-set==4.1.0
orjson==3.9.15
packaging==23.2
pandas==2.0.3
pathlib==1.0.1
pdf2image==1.17.0
pdfminer.six==20231228
pdfplumber==0.11.0
peft==0.9.0
pikepdf==8.4.1
Pillow==9.5.0
pillow_heif==0.15.0
pip==23.3.1
pluggy==1.4.0
portalocker==2.8.2
preshed==3.0.9
prompt-toolkit==3.0.43
protobuf==3.20.3
psutil==5.9.8
pyarrow==15.0.1
pyarrow-hotfix==0.6
pyasn1==0.6.0
pyasn1_modules==0.4.0
pyclipper==1.3.0.post5
pycocotools==2.0.7
pycparser==2.21
pycryptodome==3.20.0
pydantic==1.10.13
pydantic_core==2.16.3
pydash==7.0.7
pydeck==0.8.1b0
pydub==0.25.1
PyExecJS==1.5.1
Pygments==2.17.2
PyJWT==2.8.0
pymilvus==2.4.0
PyMuPDF==1.23.16
PyMuPDFb==1.23.9
pypandoc==1.13
pyparsing==3.1.2
pypdf==4.1.0
pypdfium2==4.28.0
pyreadline3==3.4.1
pytesseract==0.3.10
pytest==7.4.3
python-dateutil==2.9.0.post0
python-decouple==3.8
python-docx==1.1.0
python-dotenv==1.0.1
python-iso639==2024.2.7
python-magic==0.4.27
python-magic-bin==0.4.14
python-multipart==0.0.9
python-pptx==0.6.23
pytz==2024.1
pywencai==0.12.2
pywin32==306
PyYAML==6.0.1
rapidfuzz==3.6.2
rapidocr-onnxruntime==1.3.8
referencing==0.33.0
regex==2023.12.25
requests==2.31.0
requests-oauthlib==2.0.0
rich==13.7.1
rouge-chinese==1.0.3
rpds-py==0.18.0
rsa==4.9
ruamel.yaml==0.18.6
ruamel.yaml.clib==0.2.8
ruff==0.3.3
safetensors==0.4.2
scikit-image==0.22.0
scikit-learn==1.4.1.post1
scipy==1.12.0
semantic-version==2.10.0
sentence-transformers==2.2.2
sentencepiece==0.2.0
setuptools==68.2.2
sgmllib3k==1.0.0
shapely==2.0.3
shellingham==1.5.4
shortuuid==1.0.13
simplejson==3.19.2
six==1.16.0
smart-open==6.4.0
smmap==5.0.1
sniffio==1.3.1
socksio==1.0.0
soupsieve==2.5
spacy==3.7.2
spacy-legacy==3.0.12
spacy-loggers==1.0.5
SQLAlchemy==2.0.19
srsly==2.4.8
sse-starlette==1.8.2
starlette==0.35.0
streamlit==1.30.0
streamlit-aggrid==0.3.4.post3
streamlit-antd-components==0.3.1
streamlit-chatbox==1.1.11
streamlit-feedback==0.1.3
streamlit-modal==0.1.0
streamlit-option-menu==0.3.12
strsimpy==0.2.1
svgwrite==1.4.3
sympy==1.12
tabulate==0.9.0
tenacity==8.2.3
tensorboard==2.10.0
tensorboard-data-server==0.6.1
tensorboard-plugin-wit==1.8.1
text2vec==1.2.9
thinc==8.2.3
threadpoolctl==3.3.0
tifffile==2024.2.12
tiktoken==0.5.2
timm==0.9.16
tokenizers==0.15.2
toml==0.10.2
tomli==2.0.1
tomlkit==0.12.0
toolz==0.12.1
torch==1.12.0+cu113
torchaudio==0.12.0+cu113
torchvision==0.13.0+cu113
tornado==6.4
tqdm==4.66.1
transformers==4.39.3
transformers-stream-generator==0.0.4
typer==0.9.0
typing_extensions==4.10.0
typing-inspect==0.9.0
tzdata==2024.1
tzlocal==5.2
ujson==5.9.0
unstructured==0.11.0
unstructured-client==0.22.0
unstructured-inference==0.7.15
unstructured.pytesseract==0.3.12
urllib3==2.1.0
uvicorn==0.28.0
validators==0.22.0
visdom==0.2.4
wasabi==1.1.2
watchdog==3.0.0
wavedrom==2.0.3.post3
wcwidth==0.2.13
weasel==0.3.4
websocket-client==1.7.0
websockets==12.0
Werkzeug==3.0.2
wheel==0.41.2
win32-setctime==1.1.0
wrapt==1.16.0
xformers==0.0.23.post1
xlrd==2.0.1
XlsxWriter==3.2.0
xxhash==3.4.1
yarl==1.9.4
youtube-search==2.1.2
zipp==3.18.0
The text was updated successfully, but these errors were encountered: