Skip to content

Commit 6a337f7

Browse files
shaohuzhang1wangdan-fit2cloud
authored andcommitted
fix: 修复在线知识库爬取文档名超过128个字符报错 #706 (#778)
1 parent 5dfe56c commit 6a337f7

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

apps/dataset/serializers/document_serializers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -666,13 +666,13 @@ def handler(source_url: str, selector, response: Fork.Response):
666666
paragraphs = get_split_model('web.md').parse(response.content)
667667
# 插入
668668
DocumentSerializers.Create(data={'dataset_id': dataset_id}).save(
669-
{'name': source_url, 'paragraphs': paragraphs,
669+
{'name': source_url[0:128], 'paragraphs': paragraphs,
670670
'meta': {'source_url': source_url, 'selector': selector},
671671
'type': Type.web}, with_valid=True)
672672
except Exception as e:
673673
logging.getLogger("max_kb_error").error(f'{str(e)}:{traceback.format_exc()}')
674674
else:
675-
Document(name=source_url,
675+
Document(name=source_url[0:128],
676676
meta={'source_url': source_url, 'selector': selector},
677677
type=Type.web,
678678
char_length=0,

0 commit comments

Comments
 (0)