-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
1. 新增 Geocoding 自定义 2. addRegionEntry 方法新增 replace 参数 3. 新增分词 segments 4. 分离 similarityWithResult 和 similarity 两个方法
- Loading branch information
Showing
9 changed files
with
459 additions
and
183 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from GeocodingCHN.Geocoding import Geocoding |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# -*- coding: utf-8 -*- | ||
# @Time : 2022/5/26 11:07 | ||
# @File : __init__.py.py | ||
# @Author : CasuallyName | ||
# @Email : fjkl@vip.qq.com | ||
# @Software : Python 3.7 | ||
# @About : | ||
from .address import Address | ||
from .region_type import RegionType | ||
from .version import Version | ||
from .document import Document | ||
from .matched import MatchedResult,MatchedTerm |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
# -*- coding: utf-8 -*- | ||
# @Time : 2022/5/26 11:08 | ||
# @File : address.py | ||
# @Author : Zhou Hang | ||
# @Email : zhouhang@idataway.com | ||
# @Software : Python 3.7 | ||
# @About : | ||
import jpype | ||
class Address(object): | ||
def __init__(self, provinceId=None, province=None, cityId=None, city=None, districtId=None, district=None, | ||
streetId=None, street=None, townId=None, town=None, villageId=None, village=None, road=None, | ||
roadNum=None, buildingNum=None, text=None, java=None): | ||
self.provinceId = int(provinceId) if provinceId else provinceId | ||
self.province = province | ||
self.cityId = int(cityId) if cityId else cityId | ||
self.city = city | ||
self.districtId = int(districtId) if districtId else districtId | ||
self.district = district | ||
self.streetId = int(streetId) if streetId else streetId | ||
self.street = street | ||
self.townId = townId | ||
self.town = town | ||
self.villageId = villageId if villageId is not None else None | ||
self.village = village | ||
self.road = road | ||
self.roadNum = roadNum | ||
self.buildingNum = buildingNum | ||
self.text = text | ||
self._AddressClass = jpype.JClass('org.bitlap.geocoding.model.Address') | ||
self._java = java if java is not None else self._AddressClass(self.provinceId, self.province, self.cityId, | ||
self.city, self.districtId, self.district, | ||
self.streetId, self.street, self.townId, | ||
self.town, | ||
self.villageId, self.village, self.road, | ||
self.roadNum, self.buildingNum, self.text) | ||
|
||
def __repr__(self): | ||
return (f"Address(provinceId={self.provinceId}, province={self.province}, " + | ||
f"cityId={self.cityId}, city={self.city}, " + | ||
f"districtId={self.districtId}, district={self.district}, " + | ||
f"streetId={self.streetId}, street={self.street}, " + | ||
f"townId={self.townId}, town={self.town}, " + | ||
f"villageId={self.villageId}, village={self.village}, " + | ||
f"road={self.road}, " + | ||
f"roadNum={self.roadNum}, " + | ||
f"buildingNum={self.buildingNum}, " + | ||
f"text={self.text})") | ||
|
||
def __str__(self): | ||
return (f"Address(\n\tprovinceId={self.provinceId}, province={self.province}, " + | ||
f"\n\tcityId={self.cityId}, city={self.city}, " + | ||
f"\n\tdistrictId={self.districtId}, district={self.district}, " + | ||
f"\n\tstreetId={self.streetId}, street={self.street}, " + | ||
f"\n\ttownId={self.townId}, town={self.town}, " + | ||
f"\n\tvillageId={self.villageId}, village={self.village}, " + | ||
f"\n\troad={self.road}, " + | ||
f"\n\troadNum={self.roadNum}, " + | ||
f"\n\tbuildingNum={self.buildingNum}, " + | ||
f"\n\ttext={self.text}\n)") | ||
|
||
@property | ||
def __dict__(self): | ||
return { | ||
'provinceId': self.provinceId, | ||
'province': self.province, | ||
'cityId': self.cityId, | ||
'city': self.city, | ||
'districtId': self.districtId, | ||
'district': self.district, | ||
'streetId': self.streetId, | ||
'street': self.street, | ||
'townId': self.townId, | ||
'town': self.town, | ||
'villageId': self.villageId, | ||
'village': self.village, | ||
'road': self.road, | ||
'roadNum': self.roadNum, | ||
'buildingNum': self.buildingNum, | ||
'text': self.text | ||
} | ||
|
||
@property | ||
def __java__(self): | ||
return self._java | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# -*- coding: utf-8 -*- | ||
# @Time : 2022/5/26 11:12 | ||
# @File : document.py | ||
# @Author : CasuallyName | ||
# @Email : fjkl@vip.qq.com | ||
# @Software : Python 3.7 | ||
# @About : | ||
|
||
class Document(): | ||
def __init__(self, terms=None, termsMap=None, town=None, village=None, road=None, roadNum=None, roadNumValue=None): | ||
self.terms = terms | ||
self.termsMap = termsMap | ||
self.town = town | ||
self.village = village | ||
self.road = road | ||
self.roadNum = roadNum | ||
self.roadNumValue = roadNumValue | ||
|
||
def __str__(self): | ||
return "Document(terms={terms}, town={town}, village={village}, road={road}, roadNum={roadNum}, roadNumValue={roadNumValue})".format( | ||
terms=self.terms, town=self.town, village=self.village, road=self.road, roadNum=self.roadNum, | ||
roadNumValue=self.roadNumValue | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
# -*- coding: utf-8 -*- | ||
# @Time : 2022/5/26 11:33 | ||
# @File : matched.py | ||
# @Author : Zhou Hang | ||
# @Email : zhouhang@idataway.com | ||
# @Software : Python 3.7 | ||
# @About : | ||
|
||
|
||
class MatchedTerm: | ||
def __init__(self, term=None, coord=0, density=0, boost=0, tfidf=0): | ||
# 匹配的词条 | ||
self.term = term | ||
# 匹配率 | ||
self.coord = coord | ||
# 稠密度 | ||
self.density = density | ||
# 权重 | ||
self.boost = boost | ||
# 特征值 TF - IDF | ||
self.tfidf = tfidf | ||
|
||
def __str__(self): | ||
return "MatchedTerm({}, coord={}, density={}, boost={}, tfidf={})".format( | ||
self.term, self.coord, self.density, self.boost, self.tfidf) | ||
|
||
|
||
class MatchedResult: | ||
def __init__(self, doc1=None, doc2=None, terms=None, similarity=0,java=None): | ||
self.doc1 = doc1 | ||
self.doc2 = doc2 | ||
self.terms = terms | ||
self.terms = terms if terms is None else [MatchedTerm(term.getTerm(), | ||
term.getCoord(), | ||
term.getDensity(), | ||
term.getBoost(), | ||
term.getTfidf(), | ||
) | ||
for term in terms] | ||
self.similarity = similarity | ||
self.java = java | ||
|
||
def __str__(self): | ||
return "MatchedResult(\n\tdoc1={doc1}, \n\tdoc2={doc2}, \n\tterms={terms}, \n\tsimilarity={similarity}\n)".format( | ||
doc1=self.doc1, doc2=self.doc2, terms=str([str(i) for i in self.terms]), similarity=self.similarity | ||
) | ||
|
||
def __repr__(self): | ||
return "MatchedResult(doc1={doc1}, doc2={doc2}, terms={terms}, similarity={similarity})".format( | ||
doc1=self.doc1, doc2=self.doc2, terms=str([str(i) for i in self.terms]), similarity=self.similarity | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
# -*- coding: utf-8 -*- | ||
# @Time : 2022/5/26 11:09 | ||
# @File : region_type.py | ||
# @Author : CasuallyName | ||
# @Email : fjkl@vip.qq.com | ||
# @Software : Python 3.7 | ||
# @About : | ||
|
||
class RegionType(object): | ||
def __init__(self, RegionTypeClass): | ||
self.Undefined = RegionTypeClass.Undefined # 未定义区域类型 | ||
self.Country = RegionTypeClass.Country # 国家 | ||
self.Province = RegionTypeClass.Province # 省份 | ||
self.ProvinceLevelCity1 = RegionTypeClass.ProvinceLevelCity1 # 直辖市 - 与省份并行的一级 | ||
self.ProvinceLevelCity2 = RegionTypeClass.ProvinceLevelCity2 # 直辖市 - 与城市并行的一级 | ||
self.City = RegionTypeClass.City # 地级市 | ||
self.CityLevelDistrict = RegionTypeClass.CityLevelDistrict # 省直辖县级市 | ||
self.District = RegionTypeClass.District # 县、区 | ||
self.Street = RegionTypeClass.Street # 街道乡镇一级 | ||
self.PlatformL4 = RegionTypeClass.PlatformL4 # 特定平台的4级地址 | ||
self.Town = RegionTypeClass.Town # 附加:乡镇 | ||
self.Village = RegionTypeClass.Village # 附加:村 | ||
|
||
@staticmethod | ||
def help(): | ||
print('\n'.join([ | ||
'RegionType 说明:', | ||
' Country : 国家', | ||
' Province : 省份', | ||
' ProvinceLevelCity1 : 直辖市(与省份并行的一级)', | ||
' ProvinceLevelCity2 : 直辖市(与城市并行的一级)', | ||
' City : 地级市', | ||
' CityLevelDistrict : 省直辖县级市', | ||
' District : 县、区', | ||
' Street : 街道乡镇一级', | ||
' PlatformL4 : 特定平台的4级地址', | ||
' Town : 乡镇(附加)', | ||
' Village : 村(附加)', | ||
' Undefined : 未定义区域类型', | ||
])) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# -*- coding: utf-8 -*- | ||
# @Time : 2022/5/26 11:10 | ||
# @File : version.py | ||
# @Author : CasuallyName | ||
# @Email : fjkl@vip.qq.com | ||
# @Software : Python 3.7 | ||
# @About : | ||
|
||
class Version: | ||
def __init__(self, package, jar): | ||
self.package = package | ||
self.jar = jar | ||
|
||
def __repr__(self): | ||
return f'Package(GeocodingCHN) version: {self.package}\nSource(geocoding.jar) version: {self.jar}' | ||
|
||
def __str__(self): | ||
return f'Package(GeocodingCHN) version: {self.package}, Source(geocoding.jar) version: {self.jar}' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
#!/usr/bin/python3 | ||
# -*- coding: utf-8 -*- | ||
# @Time : 2021/1/18 13:13 | ||
# @Author : CasuallyName | ||
# @Email : fjkl@vip.qq.com | ||
# @File : test.py | ||
# @Software: PyCharm | ||
from GeocodingCHN import Geocoding | ||
|
||
if __name__ == '__main__': | ||
geocoding = Geocoding() | ||
geocoding = Geocoding(data_class_path='core/region.dat') | ||
print(geocoding.__version__) | ||
text1 = '山东青岛李沧区延川路116号绿城城园东区7号楼2单元802户' | ||
text2 = '山东青岛李沧区延川路绿城城园东区7-2-802' | ||
Address_1 = geocoding.normalizing(text1) | ||
print(Address_1) | ||
Address_2 = geocoding.normalizing(text2) | ||
similar = geocoding.similarityWithResult(Address_1, Address_2) | ||
print(similar) | ||
print(similar.similarity) | ||
print(geocoding.similarity(Address_1, Address_2)) | ||
|
||
geocoding.addRegionEntry(1, 321200000000, "A街道", geocoding.RegionType.Street) | ||
print(geocoding.normalizing("江苏泰州A街道")) | ||
print(geocoding.segment(text2)) |