Skip to content

Commit

Permalink
feat(重构代码): 重构代码,适配新版jar包的方法
Browse files Browse the repository at this point in the history
1. 新增 Geocoding 自定义
2. addRegionEntry 方法新增 replace 参数
3. 新增分词
segments
4. 分离 similarityWithResult 和 similarity 两个方法
  • Loading branch information
fjklqq committed May 26, 2022
1 parent 566db01 commit bf74944
Show file tree
Hide file tree
Showing 9 changed files with 459 additions and 183 deletions.
385 changes: 202 additions & 183 deletions GeocodingCHN/Geocoding.py

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions GeocodingCHN/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from GeocodingCHN.Geocoding import Geocoding
12 changes: 12 additions & 0 deletions GeocodingCHN/model/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# -*- coding: utf-8 -*-
# @Time : 2022/5/26 11:07
# @File : __init__.py.py
# @Author : CasuallyName
# @Email : fjkl@vip.qq.com
# @Software : Python 3.7
# @About :
from .address import Address
from .region_type import RegionType
from .version import Version
from .document import Document
from .matched import MatchedResult,MatchedTerm
85 changes: 85 additions & 0 deletions GeocodingCHN/model/address.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# -*- coding: utf-8 -*-
# @Time : 2022/5/26 11:08
# @File : address.py
# @Author : Zhou Hang
# @Email : zhouhang@idataway.com
# @Software : Python 3.7
# @About :
import jpype
class Address(object):
def __init__(self, provinceId=None, province=None, cityId=None, city=None, districtId=None, district=None,
streetId=None, street=None, townId=None, town=None, villageId=None, village=None, road=None,
roadNum=None, buildingNum=None, text=None, java=None):
self.provinceId = int(provinceId) if provinceId else provinceId
self.province = province
self.cityId = int(cityId) if cityId else cityId
self.city = city
self.districtId = int(districtId) if districtId else districtId
self.district = district
self.streetId = int(streetId) if streetId else streetId
self.street = street
self.townId = townId
self.town = town
self.villageId = villageId if villageId is not None else None
self.village = village
self.road = road
self.roadNum = roadNum
self.buildingNum = buildingNum
self.text = text
self._AddressClass = jpype.JClass('org.bitlap.geocoding.model.Address')
self._java = java if java is not None else self._AddressClass(self.provinceId, self.province, self.cityId,
self.city, self.districtId, self.district,
self.streetId, self.street, self.townId,
self.town,
self.villageId, self.village, self.road,
self.roadNum, self.buildingNum, self.text)

def __repr__(self):
return (f"Address(provinceId={self.provinceId}, province={self.province}, " +
f"cityId={self.cityId}, city={self.city}, " +
f"districtId={self.districtId}, district={self.district}, " +
f"streetId={self.streetId}, street={self.street}, " +
f"townId={self.townId}, town={self.town}, " +
f"villageId={self.villageId}, village={self.village}, " +
f"road={self.road}, " +
f"roadNum={self.roadNum}, " +
f"buildingNum={self.buildingNum}, " +
f"text={self.text})")

def __str__(self):
return (f"Address(\n\tprovinceId={self.provinceId}, province={self.province}, " +
f"\n\tcityId={self.cityId}, city={self.city}, " +
f"\n\tdistrictId={self.districtId}, district={self.district}, " +
f"\n\tstreetId={self.streetId}, street={self.street}, " +
f"\n\ttownId={self.townId}, town={self.town}, " +
f"\n\tvillageId={self.villageId}, village={self.village}, " +
f"\n\troad={self.road}, " +
f"\n\troadNum={self.roadNum}, " +
f"\n\tbuildingNum={self.buildingNum}, " +
f"\n\ttext={self.text}\n)")

@property
def __dict__(self):
return {
'provinceId': self.provinceId,
'province': self.province,
'cityId': self.cityId,
'city': self.city,
'districtId': self.districtId,
'district': self.district,
'streetId': self.streetId,
'street': self.street,
'townId': self.townId,
'town': self.town,
'villageId': self.villageId,
'village': self.village,
'road': self.road,
'roadNum': self.roadNum,
'buildingNum': self.buildingNum,
'text': self.text
}

@property
def __java__(self):
return self._java

23 changes: 23 additions & 0 deletions GeocodingCHN/model/document.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# -*- coding: utf-8 -*-
# @Time : 2022/5/26 11:12
# @File : document.py
# @Author : CasuallyName
# @Email : fjkl@vip.qq.com
# @Software : Python 3.7
# @About :

class Document():
def __init__(self, terms=None, termsMap=None, town=None, village=None, road=None, roadNum=None, roadNumValue=None):
self.terms = terms
self.termsMap = termsMap
self.town = town
self.village = village
self.road = road
self.roadNum = roadNum
self.roadNumValue = roadNumValue

def __str__(self):
return "Document(terms={terms}, town={town}, village={village}, road={road}, roadNum={roadNum}, roadNumValue={roadNumValue})".format(
terms=self.terms, town=self.town, village=self.village, road=self.road, roadNum=self.roadNum,
roadNumValue=self.roadNumValue
)
51 changes: 51 additions & 0 deletions GeocodingCHN/model/matched.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# -*- coding: utf-8 -*-
# @Time : 2022/5/26 11:33
# @File : matched.py
# @Author : Zhou Hang
# @Email : zhouhang@idataway.com
# @Software : Python 3.7
# @About :


class MatchedTerm:
def __init__(self, term=None, coord=0, density=0, boost=0, tfidf=0):
# 匹配的词条
self.term = term
# 匹配率
self.coord = coord
# 稠密度
self.density = density
# 权重
self.boost = boost
# 特征值 TF - IDF
self.tfidf = tfidf

def __str__(self):
return "MatchedTerm({}, coord={}, density={}, boost={}, tfidf={})".format(
self.term, self.coord, self.density, self.boost, self.tfidf)


class MatchedResult:
def __init__(self, doc1=None, doc2=None, terms=None, similarity=0,java=None):
self.doc1 = doc1
self.doc2 = doc2
self.terms = terms
self.terms = terms if terms is None else [MatchedTerm(term.getTerm(),
term.getCoord(),
term.getDensity(),
term.getBoost(),
term.getTfidf(),
)
for term in terms]
self.similarity = similarity
self.java = java

def __str__(self):
return "MatchedResult(\n\tdoc1={doc1}, \n\tdoc2={doc2}, \n\tterms={terms}, \n\tsimilarity={similarity}\n)".format(
doc1=self.doc1, doc2=self.doc2, terms=str([str(i) for i in self.terms]), similarity=self.similarity
)

def __repr__(self):
return "MatchedResult(doc1={doc1}, doc2={doc2}, terms={terms}, similarity={similarity})".format(
doc1=self.doc1, doc2=self.doc2, terms=str([str(i) for i in self.terms]), similarity=self.similarity
)
41 changes: 41 additions & 0 deletions GeocodingCHN/model/region_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# -*- coding: utf-8 -*-
# @Time : 2022/5/26 11:09
# @File : region_type.py
# @Author : CasuallyName
# @Email : fjkl@vip.qq.com
# @Software : Python 3.7
# @About :

class RegionType(object):
def __init__(self, RegionTypeClass):
self.Undefined = RegionTypeClass.Undefined # 未定义区域类型
self.Country = RegionTypeClass.Country # 国家
self.Province = RegionTypeClass.Province # 省份
self.ProvinceLevelCity1 = RegionTypeClass.ProvinceLevelCity1 # 直辖市 - 与省份并行的一级
self.ProvinceLevelCity2 = RegionTypeClass.ProvinceLevelCity2 # 直辖市 - 与城市并行的一级
self.City = RegionTypeClass.City # 地级市
self.CityLevelDistrict = RegionTypeClass.CityLevelDistrict # 省直辖县级市
self.District = RegionTypeClass.District # 县、区
self.Street = RegionTypeClass.Street # 街道乡镇一级
self.PlatformL4 = RegionTypeClass.PlatformL4 # 特定平台的4级地址
self.Town = RegionTypeClass.Town # 附加:乡镇
self.Village = RegionTypeClass.Village # 附加:村

@staticmethod
def help():
print('\n'.join([
'RegionType 说明:',
' Country : 国家',
' Province : 省份',
' ProvinceLevelCity1 : 直辖市(与省份并行的一级)',
' ProvinceLevelCity2 : 直辖市(与城市并行的一级)',
' City : 地级市',
' CityLevelDistrict : 省直辖县级市',
' District : 县、区',
' Street : 街道乡镇一级',
' PlatformL4 : 特定平台的4级地址',
' Town : 乡镇(附加)',
' Village : 村(附加)',
' Undefined : 未定义区域类型',
]))

18 changes: 18 additions & 0 deletions GeocodingCHN/model/version.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# -*- coding: utf-8 -*-
# @Time : 2022/5/26 11:10
# @File : version.py
# @Author : CasuallyName
# @Email : fjkl@vip.qq.com
# @Software : Python 3.7
# @About :

class Version:
def __init__(self, package, jar):
self.package = package
self.jar = jar

def __repr__(self):
return f'Package(GeocodingCHN) version: {self.package}\nSource(geocoding.jar) version: {self.jar}'

def __str__(self):
return f'Package(GeocodingCHN) version: {self.package}, Source(geocoding.jar) version: {self.jar}'
26 changes: 26 additions & 0 deletions tests/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 2021/1/18 13:13
# @Author : CasuallyName
# @Email : fjkl@vip.qq.com
# @File : test.py
# @Software: PyCharm
from GeocodingCHN import Geocoding

if __name__ == '__main__':
geocoding = Geocoding()
geocoding = Geocoding(data_class_path='core/region.dat')
print(geocoding.__version__)
text1 = '山东青岛李沧区延川路116号绿城城园东区7号楼2单元802户'
text2 = '山东青岛李沧区延川路绿城城园东区7-2-802'
Address_1 = geocoding.normalizing(text1)
print(Address_1)
Address_2 = geocoding.normalizing(text2)
similar = geocoding.similarityWithResult(Address_1, Address_2)
print(similar)
print(similar.similarity)
print(geocoding.similarity(Address_1, Address_2))

geocoding.addRegionEntry(1, 321200000000, "A街道", geocoding.RegionType.Street)
print(geocoding.normalizing("江苏泰州A街道"))
print(geocoding.segment(text2))

0 comments on commit bf74944

Please sign in to comment.