Skip to content

Commit

Permalink
feat: 流水线页面 按名称A-Z 支持中文按拼音排序 TencentBlueKing#5100 替换汉字拼音转换工具 提高多音字正确率
Browse files Browse the repository at this point in the history
  • Loading branch information
hingbong committed Sep 24, 2021
1 parent 7dff8cd commit 0ef16e5
Show file tree
Hide file tree
Showing 7 changed files with 74 additions and 12 deletions.
4 changes: 2 additions & 2 deletions src/backend/ci/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@ allprojects {
entry("poi")
entry("poi-ooxml")
}
dependencySet("com.hankcs:${Versions.HanLP}") {
entry("hanlp")
dependencySet("com.github.taptap:${Versions.PinyinPlus}") {
entry("pinyin-plus")
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,5 @@ object Versions {
const val Elasticsearch = "7.4.0"
const val Lucene = "8.2.0"
const val Feign = "11.6" // TODO 修复这个BUG:https://github.com/OpenFeign/feign/pull/1347 , 后续SpringCloud升级后可以去掉
const val HanLP = "portable-1.8.2"
const val PinyinPlus = "1.0"
}
1 change: 1 addition & 0 deletions src/backend/ci/core/common/common-util/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,5 @@ dependencies {
implementation("commons-codec:commons-codec")
implementation("com.google.guava:guava")
implementation("org.jolokia:jolokia-core")
implementation("com.github.taptap:pinyin-plus")
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package com.tencent.devops.common.util

import com.github.houbb.heaven.util.lang.CharUtil
import com.github.houbb.heaven.util.lang.StringUtil
import com.github.houbb.segment.support.segment.result.impl.SegmentResultHandlers
import com.taptap.pinyin.ResourceLoad
import com.taptap.pinyin.Word
import com.taptap.pinyin.analyzer.WordAnalyzer
import com.taptap.pinyin.utils.Utils
import java.util.HashMap
import java.util.StringJoiner

object PinyinUtil {

private val words: HashMap<String, Word> = ResourceLoad.loadCedict()
private val wordAnalyzer = WordAnalyzer.newInstance()

/**
* com.taptap.pinyin.PinyinPlus#to(java.lang.String, boolean) 修改此方法逻辑, 改为词组之间不带分隔符
* 返回拼音字符串, 不存在拼音的返回原字符
*/
@Suppress("NestedBlockDepth")
fun toPinyin(text: String): String {
if (StringUtil.isBlank(text)) return text
var word = words[text]
return if (word != null) {
Utils.trim(word.pinyinNoTone)
} else {
val joiner = StringJoiner("")
val segmentResult = wordAnalyzer.segment(text, SegmentResultHandlers.word())
for (segmentStr in segmentResult) {
word = words[segmentStr]
if (word != null) {
joiner.add(Utils.trim(word.pinyinNoTone))
} else {
val characterList = StringUtil.toCharacterList(segmentStr)
for (character in characterList) {
if (CharUtil.isChinese(character)) {
word = words[character.toString()]
joiner.add(word!!.pinyinNoTone)
} else {
joiner.add(character.toString())
}
}
}
}
joiner.toString()
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package com.tencent.devops.common.util

import org.junit.Test

class PinyinUtilTest {

@Test
fun test() {
assert("chongzhiliushuixianzhuangtai TEST.テスト-jinめ_ceshi" ==
PinyinUtil.toPinyin("重置流水线状态 TEST.テスト-進め_測試"))
assert("zhongyao zhongda" == PinyinUtil.toPinyin("重要 重大"))
assert(PinyinUtil.toPinyin("奇偶") == "jiou")
assert(PinyinUtil.toPinyin("奇怪奇异") == "qiguaiqiyi")
assert(PinyinUtil.toPinyin("屏风 屏障 屏蔽") == "pingfeng pingzhang pingbi")
assert(PinyinUtil.toPinyin("屏息 屏气") == "bingxi bingqi")
}
}
1 change: 0 additions & 1 deletion src/backend/ci/core/process/biz-base/build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -43,5 +43,4 @@ dependencies {
api("mysql:mysql-connector-java")
implementation("com.github.ben-manes.caffeine:caffeine")
testImplementation(project(":core:common:common-test"))
api("com.hankcs:hanlp")
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,10 @@

package com.tencent.devops.process.engine.dao

import com.hankcs.hanlp.HanLP
import com.hankcs.hanlp.dictionary.py.Pinyin
import com.tencent.devops.common.api.util.timestampmilli
import com.tencent.devops.common.pipeline.enums.ChannelCode
import com.tencent.devops.common.pipeline.pojo.BuildNo
import com.tencent.devops.common.util.PinyinUtil
import com.tencent.devops.model.process.Tables.T_PIPELINE_INFO
import com.tencent.devops.model.process.tables.records.TPipelineInfoRecord
import com.tencent.devops.process.engine.pojo.PipelineInfo
Expand Down Expand Up @@ -605,11 +604,7 @@ class PipelineInfoDao {
}

private fun nameToPinyin(pipelineName: String): String {
return HanLP.convertToPinyinList(pipelineName).asSequence().mapIndexed { index, it ->
// 不属于中文没有拼音
if (Pinyin.none5 == it) {
pipelineName[index].toString()
} else it.pinyinWithoutTone
}.joinToString("")
// 数据库字段长度1300
return PinyinUtil.toPinyin(pipelineName).take(1300)
}
}

0 comments on commit 0ef16e5

Please sign in to comment.