From c6d77fd15a4165b24b9b053973523683a3738d8a Mon Sep 17 00:00:00 2001 From: Sinclair Date: Fri, 28 Jun 2024 23:00:09 +0800 Subject: [PATCH] fix: sitemap, image --- config/aiGenerate.go | 38 +++++++++++++++++--------------- config/collector.go | 4 +++- config/constant.go | 9 ++++---- model/attachment.go | 2 +- provider/anqi.go | 45 +++++++++++++++++++++++++++++++++----- provider/attachment.go | 33 ++++++++++++++++++++++++++++ provider/collector.go | 23 ++++++++++++++++--- provider/combination.go | 35 ++++++++++++++++++++++++----- provider/keywordCollect.go | 44 +++++++++++++++++++------------------ provider/openai.go | 1 + provider/setting.go | 2 ++ provider/sitemap.go | 2 +- 12 files changed, 179 insertions(+), 59 deletions(-) diff --git a/config/aiGenerate.go b/config/aiGenerate.go index 6220bd5a..ffc04ce6 100644 --- a/config/aiGenerate.go +++ b/config/aiGenerate.go @@ -1,24 +1,26 @@ package config type AiGenerateConfig struct { - Open bool `json:"open"` // 是否自动写作 - Language string `json:"language"` // zh|en|cr - DoubleTitle bool `json:"double_title"` // 是否生成双标题 - DoubleSplit int `json:"double_split"` // 双标题形式 - Demand string `json:"demand"` // 通用Demand - InsertImage int `json:"insert_image"` // 是否插入图片, 0 移除图片,2 插入自定义图片 - Images []string `json:"images"` - ContentReplace []ReplaceKeyword `json:"content_replace"` - CategoryId uint `json:"category_id"` //默认分类 - SaveType uint `json:"save_type"` // 文档处理方式 - StartHour int `json:"start_hour"` //每天开始时间 - EndHour int `json:"end_hour"` //每天结束时间 - DailyLimit int `json:"daily_limit"` //每日限额 - AiEngine string `json:"ai_engine"` // ai 引擎,default 官方接口,openai 自定义openai,spark 星火大模型 - OpenAIKeys []OpenAIKey `json:"open_ai_keys"` // self openai key - ApiValid bool `json:"api_valid"` // api地址是否可用 - KeyIndex int `json:"-"` // 上一次调用的key id - Spark SparkSetting `json:"spark"` + Open bool `json:"open"` // 是否自动写作 + Language string `json:"language"` // zh|en|cr + DoubleTitle bool `json:"double_title"` // 是否生成双标题 + DoubleSplit int `json:"double_split"` // 双标题形式 + Demand string `json:"demand"` // 通用Demand + InsertImage int `json:"insert_image"` // 是否插入图片, 0 移除图片,2 插入自定义图片,3,图片库分类 + Images []string `json:"images"` + ImageCategoryId int `json:"image_category_id"` // 选定的图片分类 + ContentReplace []ReplaceKeyword `json:"content_replace"` + CategoryId uint `json:"category_id"` //默认分类 + CategoryIds []uint `json:"category_ids"` // 默认分类,支持多个 + SaveType uint `json:"save_type"` // 文档处理方式 + StartHour int `json:"start_hour"` //每天开始时间 + EndHour int `json:"end_hour"` //每天结束时间 + DailyLimit int `json:"daily_limit"` //每日限额 + AiEngine string `json:"ai_engine"` // ai 引擎,default 官方接口,openai 自定义openai,spark 星火大模型 + OpenAIKeys []OpenAIKey `json:"open_ai_keys"` // self openai key + ApiValid bool `json:"api_valid"` // api地址是否可用 + KeyIndex int `json:"-"` // 上一次调用的key id + Spark SparkSetting `json:"spark"` } type OpenAIKey struct { diff --git a/config/collector.go b/config/collector.go index c062232b..4c42b844 100644 --- a/config/collector.go +++ b/config/collector.go @@ -6,8 +6,9 @@ type CollectorJson struct { Channels int `json:"channels"` //预留 CollectMode int `json:"collect_mode"` // 0: 采集, 1: 组合, 2: AI 生成 Language string `json:"language"` // zh|en|cr - InsertImage int `json:"insert_image"` // 是否插入图片, 0 移除图片,1 保留图片,2 插入自定义图片 + InsertImage int `json:"insert_image"` // 是否插入图片, 0 移除图片,1 保留图片,2 插入自定义图片,3,图片库分类 Images []string `json:"images"` + ImageCategoryId int `json:"image_category_id"` // 选定的图片分类 FromWebsite string `json:"from_website"` TitleMinLength int `json:"title_min_length"` ContentMinLength int `json:"content_min_length"` @@ -22,6 +23,7 @@ type CollectorJson struct { AutoTranslate bool `json:"auto_translate"` //是否翻译 ToLanguage string `json:"to_language"` // 支持谷歌翻译列表语言 CategoryId uint `json:"category_id"` //默认分类 + CategoryIds []uint `json:"category_ids"` // 默认分类,支持多个 SaveType uint `json:"save_type"` // 文档处理方式 StartHour int `json:"start_hour"` //每天开始时间 EndHour int `json:"end_hour"` //每天结束时间 diff --git a/config/constant.go b/config/constant.go index e52c555e..db340925 100644 --- a/config/constant.go +++ b/config/constant.go @@ -1,6 +1,6 @@ package config -const Version = "3.3.7" +const Version = "3.3.8" const ( StatusOK = 0 @@ -114,9 +114,10 @@ const ( CollectModeCollect = 0 CollectModeCombine = 1 - CollectImageRemove = 0 // 移除 - CollectImageRetain = 1 // 保留 - CollectImageInsert = 2 // 自定义插入 + CollectImageRemove = 0 // 移除 + CollectImageRetain = 1 // 保留 + CollectImageInsert = 2 // 自定义插入 + CollectImageCategory = 3 // 插入指定分类图片 ) // login platform diff --git a/model/attachment.go b/model/attachment.go index 649a867c..ea48a95c 100644 --- a/model/attachment.go +++ b/model/attachment.go @@ -56,7 +56,7 @@ func (attachment *Attachment) GetThumb(storageUrl string) { return } //如果是一个远程地址,则缩略图和原图地址一致 - if strings.HasPrefix(attachment.FileLocation, "http") && !strings.HasPrefix(attachment.FileLocation, "//") { + if strings.HasPrefix(attachment.FileLocation, "http") || strings.HasPrefix(attachment.FileLocation, "//") { attachment.Logo = attachment.FileLocation attachment.Thumb = attachment.FileLocation } else { diff --git a/provider/anqi.go b/provider/anqi.go index c0c6a635..9f3b9201 100644 --- a/provider/anqi.go +++ b/provider/anqi.go @@ -485,14 +485,35 @@ func (w *Website) AnqiAiGenerateArticle(keyword *model.Keyword) (int, error) { } content[index] = imgTag } + if w.AiGenerateConfig.InsertImage == config.CollectImageCategory { + // 根据分类每次只取其中一张 + img := w.GetRandImageFromCategory(w.AiGenerateConfig.ImageCategoryId, keyword.Title) + if len(img) > 0 { + index := len(content) / 3 + content = append(content, "") + copy(content[index+1:], content[index:]) + imgTag := "" + req.Title + "" + // ![新的图片](http://xxx/xxx.webp) + if w.Content.Editor == "markdown" { + imgTag = fmt.Sprintf("![%s](%s)", req.Title, img) + } + content[index] = imgTag + } + } + categoryId := keyword.CategoryId if categoryId == 0 { - if w.AiGenerateConfig.CategoryId == 0 { + if len(w.AiGenerateConfig.CategoryIds) > 0 { + categoryId = w.AiGenerateConfig.CategoryIds[rand.New(rand.NewSource(time.Now().UnixNano())).Intn(len(w.AiGenerateConfig.CategoryIds))] + } else if w.AiGenerateConfig.CategoryId > 0 { + categoryId = w.AiGenerateConfig.CategoryId + } + if categoryId == 0 { var category model.Category w.DB.Where("module_id = 1").Take(&category) - w.AiGenerateConfig.CategoryId = category.Id + w.AiGenerateConfig.CategoryIds = []uint{category.Id} + categoryId = category.Id } - categoryId = w.AiGenerateConfig.CategoryId } archiveReq := request.Archive{ @@ -618,8 +639,22 @@ func (w *Website) AnqiSyncAiPlanResult(plan *model.AiArticlePlan) error { imgTag := "" + req.Title + "" content[index] = imgTag } + if w.AiGenerateConfig.InsertImage == config.CollectImageCategory { + // 根据分类每次只取其中一张 + img := w.GetRandImageFromCategory(w.AiGenerateConfig.ImageCategoryId, result.Data.Keyword) + if len(img) > 0 { + index := len(content) / 3 + content = append(content, "") + copy(content[index+1:], content[index:]) + imgTag := "" + req.Title + "" + content[index] = imgTag + } + } var keyword *model.Keyword categoryId := w.AiGenerateConfig.CategoryId + if len(w.AiGenerateConfig.CategoryIds) > 0 { + categoryId = w.AiGenerateConfig.CategoryIds[rand.New(rand.NewSource(time.Now().UnixNano())).Intn(len(w.AiGenerateConfig.CategoryIds))] + } keyword, err = w.GetKeywordByTitle(plan.Keyword) if err == nil { if keyword.CategoryId > 0 { @@ -629,9 +664,9 @@ func (w *Website) AnqiSyncAiPlanResult(plan *model.AiArticlePlan) error { if categoryId == 0 { var category model.Category w.DB.Where("module_id = 1").Take(&category) - w.AiGenerateConfig.CategoryId = category.Id + w.AiGenerateConfig.CategoryIds = []uint{category.Id} + categoryId = category.Id } - categoryId = w.AiGenerateConfig.CategoryId archive := request.Archive{ Title: result.Data.Title, diff --git a/provider/attachment.go b/provider/attachment.go index 87619a27..8afe7af8 100644 --- a/provider/attachment.go +++ b/provider/attachment.go @@ -790,6 +790,39 @@ func (w *Website) AttachmentScanUploads(baseDir string) { } } +func (w *Website) GetRandImageFromCategory(categoryId int, title string) string { + var img string + // 根据分类每次只取其中一张 + var attach model.Attachment + if categoryId >= 0 { + w.DB.Model(&model.Attachment{}).Where("category_id = ? and is_image = ?", w.CollectorConfig.ImageCategoryId, 1).Order("rand()").Limit(1).Take(&attach) + } else if categoryId == -1 { + // 全部图片,所以每次只取其中一张 + w.DB.Model(&model.Attachment{}).Where("is_image = ?", 1).Order("rand()").Limit(1).Take(&attach) + } else if categoryId == -2 { + // 尝试关键词匹配图片名称 + // 每次只取其中一张 + // 先分词 + keywordSplit := library.WordSplit(title, false) + // 查询attachment表,尝试匹配keywordSplit里的关键词 + tx := w.DB.Model(&model.Attachment{}).Where("is_image = ?", 1) + var queries []string + var args []interface{} + for _, word := range keywordSplit { + queries = append(queries, "name like ?") + args = append(args, "%"+word+"%") + } + tx = tx.Where(strings.Join(queries, " OR "), args...) + + tx.Order("rand()").Limit(1).Take(&attach) + } + if len(attach.FileLocation) > 0 { + img = w.PluginStorage.StorageUrl + "/" + attach.FileLocation + } + + return img +} + func encodeImage(img image.Image, imgType string, quality int) ([]byte, error) { buff := &bytes.Buffer{} diff --git a/provider/collector.go b/provider/collector.go index 3003ec71..9a203c1d 100644 --- a/provider/collector.go +++ b/provider/collector.go @@ -207,12 +207,17 @@ func (w *Website) SaveCollectArticle(archive *request.Archive, keyword *model.Ke archive.KeywordId = keyword.Id categoryId := keyword.CategoryId if categoryId == 0 { - if w.CollectorConfig.CategoryId == 0 { + if len(w.CollectorConfig.CategoryIds) > 0 { + categoryId = w.CollectorConfig.CategoryIds[rand.New(rand.NewSource(time.Now().UnixNano())).Intn(len(w.CollectorConfig.CategoryIds))] + } else if w.CollectorConfig.CategoryId > 0 { + categoryId = w.CollectorConfig.CategoryId + } + if categoryId == 0 { var category model.Category w.DB.Where("module_id = 1").Take(&category) - w.CollectorConfig.CategoryId = category.Id + w.CollectorConfig.CategoryIds = []uint{category.Id} + categoryId = category.Id } - categoryId = w.CollectorConfig.CategoryId } archive.CategoryId = categoryId //log.Println("draft:", w.CollectorConfig.SaveType) @@ -363,6 +368,18 @@ func (w *Website) CollectSingleArticle(link *response.WebLink, keyword *model.Ke content[index] = "" + archive.Title + "" archive.Content = strings.Join(content, "") } + if w.CollectorConfig.InsertImage == config.CollectImageCategory { + // 根据分类每次只取其中一张 + img := w.GetRandImageFromCategory(w.CollectorConfig.ImageCategoryId, keyword.Title) + if len(img) > 0 { + content := strings.SplitAfter(archive.Content, ">") + index := len(content) / 3 + content = append(content, "") + copy(content[index+1:], content[index:]) + content[index] = "" + archive.Title + "" + archive.Content = strings.Join(content, "") + } + } //log.Println(archive.Title, len(archive.Content), archive.OriginUrl) return archive, nil diff --git a/provider/combination.go b/provider/combination.go index b806ac6b..13a40736 100644 --- a/provider/combination.go +++ b/provider/combination.go @@ -81,14 +81,29 @@ func (w *Website) GenerateCombination(keyword *model.Keyword) (int, error) { copy(content[index+1:], content[index:]) content[index] = "" + title + "" } + if w.CollectorConfig.InsertImage == config.CollectImageCategory { + // 根据分类每次只取其中一张 + img := w.GetRandImageFromCategory(w.CollectorConfig.ImageCategoryId, keyword.Title) + if len(img) > 0 { + index := len(content) / 3 + content = append(content, "") + copy(content[index+1:], content[index:]) + content[index] = "" + title + "" + } + } categoryId := keyword.CategoryId if categoryId == 0 { - if w.CollectorConfig.CategoryId == 0 { + if len(w.CollectorConfig.CategoryIds) > 0 { + categoryId = w.CollectorConfig.CategoryIds[rand.New(rand.NewSource(time.Now().UnixNano())).Intn(len(w.CollectorConfig.CategoryIds))] + } else if w.CollectorConfig.CategoryId > 0 { + categoryId = w.CollectorConfig.CategoryId + } + if categoryId == 0 { var category model.Category w.DB.Where("module_id = 1").Take(&category) - w.CollectorConfig.CategoryId = category.Id + w.CollectorConfig.CategoryIds = []uint{category.Id} + categoryId = category.Id } - categoryId = w.CollectorConfig.CategoryId } archive := request.Archive{ @@ -159,13 +174,23 @@ func (w *Website) GetCombinationArticle(keyword *model.Keyword) (*request.Archiv content = append(content, "

"+text+"

") } if w.CollectorConfig.InsertImage == config.CollectImageInsert && len(w.CollectorConfig.Images) > 0 { - rand.Seed(time.Now().UnixMicro()) - img := w.CollectorConfig.Images[rand.Intn(len(w.CollectorConfig.Images))] + randItem := rand.New(rand.NewSource(time.Now().UnixNano())) + img := w.CollectorConfig.Images[randItem.Intn(len(w.CollectorConfig.Images))] index := len(content) / 3 content = append(content, "") copy(content[index+1:], content[index:]) content[index] = "" + title + "" } + if w.CollectorConfig.InsertImage == config.CollectImageCategory { + // 根据分类每次只取其中一张 + img := w.GetRandImageFromCategory(w.CollectorConfig.ImageCategoryId, keyword.Title) + if len(img) > 0 { + index := len(content) / 3 + content = append(content, "") + copy(content[index+1:], content[index:]) + content[index] = "" + title + "" + } + } archive := request.Archive{ Title: title, diff --git a/provider/keywordCollect.go b/provider/keywordCollect.go index 8351f396..4cc0845b 100644 --- a/provider/keywordCollect.go +++ b/provider/keywordCollect.go @@ -187,6 +187,20 @@ func (k *KeywordCollect) collectKeyword(keyword *model.Keyword, fix bool) (int, log.Println(l.Title, "不包含核心词") continue } + // 移除排除词 + if len(k.KeywordConfig.TitleExclude) > 0 { + exist := false + for _, e := range k.KeywordConfig.TitleExclude { + if strings.Contains(l.Title, e) { + log.Println(l.Title, "包含排除词", e) + exist = true + break + } + } + if exist { + continue + } + } if _, ok := k.ExistsWords.Load(l.Title); ok { continue } @@ -255,7 +269,7 @@ func (k *KeywordCollect) CollectKeywords(content string, link string) []*model.K var result ZhihuJson err := json.Unmarshal([]byte(content), &result) if err != nil { - log.Println("解析json失败") + log.Println("解析json失败1") return words } for _, v := range result.Suggest { @@ -267,7 +281,7 @@ func (k *KeywordCollect) CollectKeywords(content string, link string) []*model.K var result BaiduSugJson err := json.Unmarshal([]byte(content), &result) if err != nil { - log.Println("解析json失败") + log.Println("解析json失败2") return words } for _, v := range result.G { @@ -279,7 +293,7 @@ func (k *KeywordCollect) CollectKeywords(content string, link string) []*model.K var result BaiduSugJson err := json.Unmarshal([]byte(content), &result) if err != nil { - log.Println("解析json失败") + log.Println("解析json失败3") return words } for _, v := range result.G { @@ -291,7 +305,7 @@ func (k *KeywordCollect) CollectKeywords(content string, link string) []*model.K var result SoSugJson err := json.Unmarshal([]byte(content), &result) if err != nil { - log.Println("解析json失败") + log.Println("解析json失败4") return words } for _, v := range result.Result { @@ -303,19 +317,7 @@ func (k *KeywordCollect) CollectKeywords(content string, link string) []*model.K var result ToutiaoSugJson err := json.Unmarshal([]byte(content), &result) if err != nil { - log.Println("解析json失败") - return words - } - for _, v := range result.Data { - existsWords[v.Keyword] = &model.Keyword{ - Title: v.Keyword, - } - } - } else if strings.Contains(link, "toutiao.com") { - var result ToutiaoSugJson - err := json.Unmarshal([]byte(content), &result) - if err != nil { - log.Println("解析json失败") + log.Println("解析json失败5") return words } for _, v := range result.Data { @@ -327,7 +329,7 @@ func (k *KeywordCollect) CollectKeywords(content string, link string) []*model.K var result SmJson err := json.Unmarshal([]byte(content), &result) if err != nil { - log.Println("解析json失败") + log.Println("解析json失败6") return words } for _, v := range result.R { @@ -352,7 +354,7 @@ func (k *KeywordCollect) CollectKeywords(content string, link string) []*model.K var result BingJson err := json.Unmarshal([]byte(content), &result) if err != nil { - log.Println("解析json失败") + log.Println("解析json失败7") return words } for _, v := range result.AS.Results { @@ -386,7 +388,7 @@ func (k *KeywordCollect) CollectKeywords(content string, link string) []*model.K var result []DuckDuckGoJson err := json.Unmarshal([]byte(content), &result) if err != nil { - log.Println("解析json失败") + log.Println("解析json失败8") return words } for _, v := range result { @@ -400,7 +402,7 @@ func (k *KeywordCollect) CollectKeywords(content string, link string) []*model.K var result YahooJson err := json.Unmarshal([]byte(content), &result) if err != nil { - log.Println("解析json失败") + log.Println("解析json失败9") return words } for _, v := range result.R { diff --git a/provider/openai.go b/provider/openai.go index b513f945..d08baa24 100644 --- a/provider/openai.go +++ b/provider/openai.go @@ -444,6 +444,7 @@ func (w *Website) SelfAiGenerateResult(req *AnqiAiRequest) (*AnqiAiRequest, erro title = strings.TrimPrefix(title, "文章标题:") title = strings.TrimPrefix(title, "主标题:") title = strings.TrimPrefix(title, "副标题:") + title = strings.Replace(title, "副标题", "", 1) title = strings.Replace(title, ":", ",", 1) if utf8.RuneCountInString(title) > 150 { title = string([]rune(title)[:150]) diff --git a/provider/setting.go b/provider/setting.go index 8bf54f9f..2e962102 100644 --- a/provider/setting.go +++ b/provider/setting.go @@ -421,6 +421,7 @@ func (w *Website) LoadCollectorSetting() { w.CollectorConfig.AutoTranslate = collector.AutoTranslate w.CollectorConfig.ToLanguage = collector.ToLanguage w.CollectorConfig.CategoryId = collector.CategoryId + w.CollectorConfig.CategoryIds = collector.CategoryIds w.CollectorConfig.StartHour = collector.StartHour w.CollectorConfig.EndHour = collector.EndHour w.CollectorConfig.FromWebsite = collector.FromWebsite @@ -429,6 +430,7 @@ func (w *Website) LoadCollectorSetting() { w.CollectorConfig.Language = collector.Language w.CollectorConfig.InsertImage = collector.InsertImage w.CollectorConfig.Images = collector.Images + w.CollectorConfig.ImageCategoryId = collector.ImageCategoryId if w.CollectorConfig.Language == "" { w.CollectorConfig.Language = config.LanguageZh diff --git a/provider/sitemap.go b/provider/sitemap.go index 1f788a1c..0a6e6158 100644 --- a/provider/sitemap.go +++ b/provider/sitemap.go @@ -83,7 +83,7 @@ func (w *Website) BuildSitemap() error { categoryBuilder = categoryBuilder.Where("id not in (?)", excludeIds) archiveBuilder = archiveBuilder.Where("category_id not in (?)", excludeIds) } - if w.PluginSitemap.ExcludeModuleIds != nil { + if len(w.PluginSitemap.ExcludeModuleIds) > 0 { categoryBuilder = categoryBuilder.Where("module_id not in (?)", w.PluginSitemap.ExcludeModuleIds) archiveBuilder = archiveBuilder.Where("module_id not in (?)", w.PluginSitemap.ExcludeModuleIds) }