From bf74944b479dd5b1dfa3459a5bea132fb336f2c7 Mon Sep 17 00:00:00 2001 From: ZhouHang Date: Thu, 26 May 2022 13:41:10 +0800 Subject: [PATCH] =?UTF-8?q?feat(=E9=87=8D=E6=9E=84=E4=BB=A3=E7=A0=81):=20?= =?UTF-8?q?=E9=87=8D=E6=9E=84=E4=BB=A3=E7=A0=81=EF=BC=8C=E9=80=82=E9=85=8D?= =?UTF-8?q?=E6=96=B0=E7=89=88jar=E5=8C=85=E7=9A=84=E6=96=B9=E6=B3=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. 新增 Geocoding 自定义 2. addRegionEntry 方法新增 replace 参数 3. 新增分词 segments 4. 分离 similarityWithResult 和 similarity 两个方法 --- GeocodingCHN/Geocoding.py | 385 ++++++++++++++++-------------- GeocodingCHN/__init__.py | 1 + GeocodingCHN/model/__init__.py | 12 + GeocodingCHN/model/address.py | 85 +++++++ GeocodingCHN/model/document.py | 23 ++ GeocodingCHN/model/matched.py | 51 ++++ GeocodingCHN/model/region_type.py | 41 ++++ GeocodingCHN/model/version.py | 18 ++ tests/test.py | 26 ++ 9 files changed, 459 insertions(+), 183 deletions(-) create mode 100644 GeocodingCHN/model/__init__.py create mode 100644 GeocodingCHN/model/address.py create mode 100644 GeocodingCHN/model/document.py create mode 100644 GeocodingCHN/model/matched.py create mode 100644 GeocodingCHN/model/region_type.py create mode 100644 GeocodingCHN/model/version.py create mode 100644 tests/test.py diff --git a/GeocodingCHN/Geocoding.py b/GeocodingCHN/Geocoding.py index 67aac1b..4351252 100644 --- a/GeocodingCHN/Geocoding.py +++ b/GeocodingCHN/Geocoding.py @@ -1,192 +1,211 @@ -#!/usr/bin/python3 # -*- coding: utf-8 -*- -# @Time : 2021/1/18 13:11 -# @Author : ZhouHang +# @Time : 2022/5/25 19:10 +# @File : Geocoding.py +# @Author : CasuallyName # @Email : fjkl@vip.qq.com -# @File : Geocoding.py -# @Software: PyCharm -__all__ = ['version', 'normalizing', 'similarityWithResult', 'addRegionEntry', 'Address', 'RegionType'] +# @Software : Python 3.7 +# @About : +__all__ = ['Geocoding'] import jpype import re import os - - -class Version: - def __init__(self, package, jar): - self.package = package - self.jar = jar - - def __repr__(self): - return f'Package(GeocodingCHN) version: {self.package}\nSrc(geocoding.jar) version: {self.jar}' - - def __str__(self): - return f'Package(GeocodingCHN) version: {self.package}, Src(geocoding.jar) version: {self.jar}' - - -version = Version(package='v1.4.0', jar='v1.1.6') - -jpype.startJVM(jpype.getDefaultJVMPath(), "-ea", - "-Djava.class.path=" + os.path.abspath(__file__).replace('Geocoding.py', 'geocoding.jar')) - - -class Address(object): - def __init__(self, provinceId=None, province=None, cityId=None, city=None, districtId=None, district=None, - streetId=None, street=None, townId=None, town=None, villageId=None, village=None, road=None, - roadNum=None, buildingNum=None, text=None, java=None): - self.provinceId = int(provinceId) if provinceId else provinceId - self.province = province - self.cityId = int(cityId) if cityId else cityId - self.city = city - self.districtId = int(districtId) if districtId else districtId - self.district = district - self.streetId = int(streetId) if streetId else streetId - self.street = street - self.townId = townId - self.town = town - self.villageId = villageId if villageId is not None else None - self.village = village - self.road = road - self.roadNum = roadNum - self.buildingNum = buildingNum - self.text = text - self._AddressClass = jpype.JClass('io.patamon.geocoding.model.Address') - self._java = java if java is not None else self._AddressClass(self.provinceId, self.province, self.cityId, - self.city, self.districtId, self.district, - self.streetId, self.street, self.townId, - self.town, - self.villageId, self.village, self.road, - self.roadNum, self.buildingNum, self.text) - - def __str__(self): - return (f"Address(provinceId={self.provinceId}, province={self.province}, " + - f"cityId={self.cityId}, city={self.city}, " + - f"districtId={self.districtId}, district={self.district}, " + - f"streetId={self.streetId}, street={self.street}, " + - f"townId={self.townId}, town={self.town}, " + - f"villageId={self.villageId}, village={self.village}, " + - f"road={self.road}, " + - f"roadNum={self.roadNum}, " + - f"buildingNum={self.buildingNum}, " + - f"text={self.text})") - - def __repr__(self): - return (f"Address(\n\tprovinceId={self.provinceId}, province={self.province}, " + - f"\n\tcityId={self.cityId}, city={self.city}, " + - f"\n\tdistrictId={self.districtId}, district={self.district}, " + - f"\n\tstreetId={self.streetId}, street={self.street}, " + - f"\n\ttownId={self.townId}, town={self.town}, " + - f"\n\tvillageId={self.villageId}, village={self.village}, " + - f"\n\troad={self.road}, " + - f"\n\troadNum={self.roadNum}, " + - f"\n\tbuildingNum={self.buildingNum}, " + - f"\n\ttext={self.text}\n)") +import warnings + +from .model import Address +from .model import RegionType +from .model import Version +from .model import Document +from .model import MatchedResult + + +class Geocoding: + def __init__(self, data_class_path='core/region.dat', strict: bool = False, jvm_path: str = None): + ''' + + :param data_class_path:自定义地址文件路径 + :param strict:模式设置 + :param jvm_path:JVM路径 + ''' + class_path = os.path.join(os.path.split(os.path.abspath(__file__))[0], + 'geocoding.jar' + ) + + # sep = ';' if os.name == 'nt' else os.pathsep + if data_class_path != 'core/region.dat': + if os.path.isabs(data_class_path): + data_class_dir, data_name = os.path.split(data_class_path) + class_path = class_path + os.pathsep + data_class_dir + else: + raise ValueError("'data_class_path' 参数必须为绝对路径") + else: + data_name = data_class_path + + if not jpype.isJVMStarted(): + if jvm_path is None: + jvm_path = jpype.getDefaultJVMPath() + if not os.path.isabs(jvm_path): + raise ValueError("'jvm_path' 参数必须为绝对路径") + jpype.startJVM(jvm_path, "-ea", "-Djava.class.path=" + class_path) # classpath=class_paths)# + else: + try: + jpype.JClass('org.bitlap.geocoding.Geocoding') + warnings.warn("Geocoding 已被创建,正在尝试重新加载(该过程在Windows环境下可能会出现异常)", category=RuntimeWarning) + except: + warnings.warn("JVM 已经在运行", category=RuntimeWarning) + jpype.addClassPath(class_path) + self._jar_version = '1.3.0' + self.geocoding = jpype.JClass('org.bitlap.geocoding.GeocodingX')(data_name, strict=strict) + self.RegionType = RegionType(jpype.JClass('org.bitlap.geocoding.model.RegionType')) @property - def __dict__(self): - return { - 'provinceId': self.provinceId, - 'province': self.province, - 'cityId': self.cityId, - 'city': self.city, - 'districtId': self.districtId, - 'district': self.district, - 'streetId': self.streetId, - 'street': self.street, - 'townId': self.townId, - 'town': self.town, - 'villageId': self.villageId, - 'village': self.village, - 'road': self.road, - 'roadNum': self.roadNum, - 'buildingNum': self.buildingNum, - 'text': self.text - } + def __version__(self): + return Version(package='v1.4.1', jar=self._jar_version) + + def normalizing(self, address: str) -> Address: + """ + 地址标准化 + + :param address: 文本地址 + :return: + """ + try: + address_nor_java = self.geocoding.normalizing(str(address)) + return Address(provinceId=address_nor_java.getProvinceId(), province=address_nor_java.getProvince(), + cityId=address_nor_java.getCityId(), city=address_nor_java.getCity(), + districtId=address_nor_java.getDistrictId(), district=address_nor_java.getDistrict(), + streetId=address_nor_java.getStreetId(), street=address_nor_java.getStreet(), + townId=address_nor_java.getTownId(), town=address_nor_java.getTown(), + villageId=address_nor_java.getVillageId(), village=address_nor_java.getVillage(), + road=address_nor_java.getRoad(), + roadNum=address_nor_java.getRoadNum(), + buildingNum=address_nor_java.getBuildingNum(), + text=address_nor_java.getText(), + java=address_nor_java + ) + except AttributeError: + address_nor_java = self.geocoding.normalizing(str(address)) + pattern = re.compile( + "Address\(\n\tprovinceId=(.*?), province=(.*?), " + + "\n\tcityId=(.*?), city=(.*?), " + + "\n\tdistrictId=(.*?), district=(.*?), " + + "\n\tstreetId=(.*?), street=(.*?), " + + "\n\ttownId=(.*?), town=(.*?), " + + "\n\tvillageId=(.*?), village=(.*?), " + + "\n\troad=(.*?), " + + "\n\troadNum=(.*?), " + + "\n\tbuildingNum=(.*?), " + + "\n\ttext=(.*?)\n\)" + , re.S) + try: + info = re.findall(pattern, str(address_nor_java.toString()))[0] + info = [None if i == 'null' or i == 'nan' else i for i in info] + return Address(info[0], info[1], info[2], info[3], info[4], info[5], info[6], info[7], info[8], info[9], + info[10], info[11], info[12], info[13], info[14], info[15], address_nor_java) + except AttributeError: + return Address() + + def similarityWithResult(self, address_1: [Address, str], address_2: [Address, str]) -> MatchedResult: + """ + 地址相似度计算 + + :param Address_1: 地址1, 由 Geocoding.normalizing 方法返回的 Address 类 + :param Address_2: 地址2, 由 Geocoding.normalizing 方法返回的 Address 类 + :return: + """ + + if type(address_1) == type(address_2) == Address or type(address_1) == type(address_2) == str: + if type(address_1) == type(address_2) == Address: + result = self.geocoding.similarityWithResult(address_1.__java__, address_2.__java__) + else: + result = self.geocoding.similarityWithResult(address_1, address_2) + else: + raise TypeError( + "similarityWithResult仅支持计算两个 Address 或 text 之间的相似度,但此时输入类型为 {} 和 {} ".format( + type(address_1), type(address_2))) + + try: + return MatchedResult(doc1=Document(terms=result.getDoc1().getTerms(), + termsMap=result.getDoc1().getTermsMap(), + town=result.getDoc1().getTown(), + village=result.getDoc1().getVillage(), + road=result.getDoc1().getRoad(), + roadNum=result.getDoc1().getRoadNum(), + roadNumValue=result.getDoc1().getRoadNumValue(), + ), + doc2=Document(terms=result.getDoc2().getTerms(), + termsMap=result.getDoc2().getTermsMap(), + town=result.getDoc2().getTown(), + village=result.getDoc2().getVillage(), + road=result.getDoc2().getRoad(), + roadNum=result.getDoc2().getRoadNum(), + roadNumValue=result.getDoc2().getRoadNumValue(), + ), + terms=result.getTerms(), + similarity=result.getSimilarity(), + java=result + ) + except: + pattern = re.compile("similarity=(.*?)\n\)", re.S) + return MatchedResult(similarity=eval(re.findall(pattern, str(result.toString()))[0])) + + def similarity(self, address_1: [Address, str], address_2: [Address, str]) -> float: + """ + 地址相似度计算 + + :param Address_1: 地址1, 由 Geocoding.normalizing 方法返回的 Address 类 + :param Address_2: 地址2, 由 Geocoding.normalizing 方法返回的 Address 类 + :return: + """ + + if type(address_1) == type(address_2) == Address or type(address_1) == type(address_2) == str: + if type(address_1) == type(address_2) == Address: + result = self.geocoding.similarity(address_1.__java__, address_2.__java__) + else: + result = self.geocoding.similarity(address_1, address_2) + else: + raise TypeError( + "similarityWithResult仅支持计算两个 Address 或 text 之间的相似度,但此时输入类型为 {} 和 {} ".format( + type(address_1), type(address_2))) + return result + + def addRegionEntry(self, Id: int, parentId: int, name: str, RegionType: RegionType, alias: str = '', + replace: bool = True) -> bool: + """ + 添加自定义地址信息 + + :param Id: 地址的ID + :param parentId: 地址的父ID, 必须存在 + :param name: 地址的名称 + :param RegionType: 地址类型,RegionType, + :param alias: 地址的别名, default='' + :param replace: 是否替换旧地址, 当除了[id]之外的字段, 如果相等就替换 + :return: + """ + try: + self.geocoding.addRegionEntry(id=Id, parentId=parentId, name=name, + RegionType=RegionType, alias=alias, replace=replace) + return True + except: + return False + + def segment(self, text: str, seg_type: str = 'ik') -> list: + ''' + 分词 + + :param text: input + :param seg_type: ['ik', 'simple', 'smart', 'word'] + :return: + ''' + if seg_type == 'ik': + seg_class = jpype.JClass('org.bitlap.geocoding.core.segment.IKAnalyzerSegmenter')() + elif seg_type == 'simple': + seg_class = jpype.JClass('org.bitlap.geocoding.core.segment.SimpleSegmenter')() + elif seg_type == 'smart': + seg_class = jpype.JClass('org.bitlap.geocoding.core.segment.SmartCNSegmenter')() + elif seg_type == 'word': + seg_class = jpype.JClass('org.bitlap.geocoding.core.segment.WordSegmenter')() + else: + raise AttributeError("'seg_type' 只可以是 ['ik', 'simple', 'smart', 'word'] 中的一种") + return list(seg_class.segment(text)) - @property - def __java__(self): - return self._java - - -class RegionType(object): - RegionTypeClass = jpype.JClass('io.patamon.geocoding.model.RegionType') - Undefined = RegionTypeClass.Undefined # 未定义区域类型 - Country = RegionTypeClass.Country # 国家 - Province = RegionTypeClass.Province # 省份 - ProvinceLevelCity1 = RegionTypeClass.ProvinceLevelCity1 # 直辖市 - 与省份并行的一级 - ProvinceLevelCity2 = RegionTypeClass.ProvinceLevelCity2 # 直辖市 - 与城市并行的一级 - City = RegionTypeClass.City # 地级市 - CityLevelDistrict = RegionTypeClass.CityLevelDistrict # 省直辖县级市 - District = RegionTypeClass.District # 县、区 - Street = RegionTypeClass.Street # 街道乡镇一级 - PlatformL4 = RegionTypeClass.PlatformL4 # 特定平台的4级地址 - Town = RegionTypeClass.Town # 附加:乡镇 - Village = RegionTypeClass.Village # 附加:村 - - -def normalizing(address: str): - """ - 地址标准化 - - :param address: 文本地址 - :return: - """ - geocoding = jpype.JClass('io.patamon.geocoding.Geocoding') - address_nor_java = geocoding.normalizing(str(address)) - pattern = re.compile( - "Address\(\n\tprovinceId=(.*?), province=(.*?), " + - "\n\tcityId=(.*?), city=(.*?), " + - "\n\tdistrictId=(.*?), district=(.*?), " + - "\n\tstreetId=(.*?), street=(.*?), " + - "\n\ttownId=(.*?), town=(.*?), " + - "\n\tvillageId=(.*?), village=(.*?), " + - "\n\troad=(.*?), " + - "\n\troadNum=(.*?), " + - "\n\tbuildingNum=(.*?), " + - "\n\ttext=(.*?)\n\)" - , re.S) - try: - info = re.findall(pattern, str(address_nor_java.toString()))[0] - info = [None if i == 'null' or i == 'nan' else i for i in info] - return Address(info[0], info[1], info[2], info[3], info[4], info[5], info[6], info[7], info[8], info[9], - info[10], info[11], info[12], info[13], info[14], info[15], address_nor_java) - except AttributeError: - return Address - - -def similarityWithResult(Address_1: Address, Address_2: Address): - """ - 地址相似度计算 - - :param Address_1: 地址1, 由 Geocoding.normalizing 方法返回的 Address 类 - :param Address_2: 地址2, 由 Geocoding.normalizing 方法返回的 Address 类 - :return: - """ - geocoding = jpype.JClass('io.patamon.geocoding.Geocoding') - pattern = re.compile("similarity=(.*?)\n\)", re.S) - if type(Address_1) == type(Address_2) == Address: - return eval(re.findall(pattern, - str(geocoding.similarityWithResult(Address_1.__java__, - Address_2.__java__).toString()))[0]) - else: - raise TypeError( - "Geocoding.similarityWithResult仅支持计算两个由 Geocoding.normalizing 方法返回的Address类之间的相似度") - - -def addRegionEntry(Id: int, parentId: int, name: str, RegionType: RegionType, alias=''): - """ - 添加自定义地址信息 - - :param Id: 地址的ID - :param parentId: 地址的父ID, 必须存在 - :param name: 地址的名称 - :param RegionType: 地址类型,RegionType, - :param alias: 地址的别名, default='' - :return: - """ - geocoding = jpype.JClass('io.patamon.geocoding.Geocoding') - try: - geocoding.addRegionEntry(Id, parentId, name, RegionType, alias) - return True - except: - return False diff --git a/GeocodingCHN/__init__.py b/GeocodingCHN/__init__.py index e69de29..e0443e2 100644 --- a/GeocodingCHN/__init__.py +++ b/GeocodingCHN/__init__.py @@ -0,0 +1 @@ +from GeocodingCHN.Geocoding import Geocoding diff --git a/GeocodingCHN/model/__init__.py b/GeocodingCHN/model/__init__.py new file mode 100644 index 0000000..698617b --- /dev/null +++ b/GeocodingCHN/model/__init__.py @@ -0,0 +1,12 @@ +# -*- coding: utf-8 -*- +# @Time : 2022/5/26 11:07 +# @File : __init__.py.py +# @Author : CasuallyName +# @Email : fjkl@vip.qq.com +# @Software : Python 3.7 +# @About : +from .address import Address +from .region_type import RegionType +from .version import Version +from .document import Document +from .matched import MatchedResult,MatchedTerm \ No newline at end of file diff --git a/GeocodingCHN/model/address.py b/GeocodingCHN/model/address.py new file mode 100644 index 0000000..283de13 --- /dev/null +++ b/GeocodingCHN/model/address.py @@ -0,0 +1,85 @@ +# -*- coding: utf-8 -*- +# @Time : 2022/5/26 11:08 +# @File : address.py +# @Author : Zhou Hang +# @Email : zhouhang@idataway.com +# @Software : Python 3.7 +# @About : +import jpype +class Address(object): + def __init__(self, provinceId=None, province=None, cityId=None, city=None, districtId=None, district=None, + streetId=None, street=None, townId=None, town=None, villageId=None, village=None, road=None, + roadNum=None, buildingNum=None, text=None, java=None): + self.provinceId = int(provinceId) if provinceId else provinceId + self.province = province + self.cityId = int(cityId) if cityId else cityId + self.city = city + self.districtId = int(districtId) if districtId else districtId + self.district = district + self.streetId = int(streetId) if streetId else streetId + self.street = street + self.townId = townId + self.town = town + self.villageId = villageId if villageId is not None else None + self.village = village + self.road = road + self.roadNum = roadNum + self.buildingNum = buildingNum + self.text = text + self._AddressClass = jpype.JClass('org.bitlap.geocoding.model.Address') + self._java = java if java is not None else self._AddressClass(self.provinceId, self.province, self.cityId, + self.city, self.districtId, self.district, + self.streetId, self.street, self.townId, + self.town, + self.villageId, self.village, self.road, + self.roadNum, self.buildingNum, self.text) + + def __repr__(self): + return (f"Address(provinceId={self.provinceId}, province={self.province}, " + + f"cityId={self.cityId}, city={self.city}, " + + f"districtId={self.districtId}, district={self.district}, " + + f"streetId={self.streetId}, street={self.street}, " + + f"townId={self.townId}, town={self.town}, " + + f"villageId={self.villageId}, village={self.village}, " + + f"road={self.road}, " + + f"roadNum={self.roadNum}, " + + f"buildingNum={self.buildingNum}, " + + f"text={self.text})") + + def __str__(self): + return (f"Address(\n\tprovinceId={self.provinceId}, province={self.province}, " + + f"\n\tcityId={self.cityId}, city={self.city}, " + + f"\n\tdistrictId={self.districtId}, district={self.district}, " + + f"\n\tstreetId={self.streetId}, street={self.street}, " + + f"\n\ttownId={self.townId}, town={self.town}, " + + f"\n\tvillageId={self.villageId}, village={self.village}, " + + f"\n\troad={self.road}, " + + f"\n\troadNum={self.roadNum}, " + + f"\n\tbuildingNum={self.buildingNum}, " + + f"\n\ttext={self.text}\n)") + + @property + def __dict__(self): + return { + 'provinceId': self.provinceId, + 'province': self.province, + 'cityId': self.cityId, + 'city': self.city, + 'districtId': self.districtId, + 'district': self.district, + 'streetId': self.streetId, + 'street': self.street, + 'townId': self.townId, + 'town': self.town, + 'villageId': self.villageId, + 'village': self.village, + 'road': self.road, + 'roadNum': self.roadNum, + 'buildingNum': self.buildingNum, + 'text': self.text + } + + @property + def __java__(self): + return self._java + diff --git a/GeocodingCHN/model/document.py b/GeocodingCHN/model/document.py new file mode 100644 index 0000000..504ed0c --- /dev/null +++ b/GeocodingCHN/model/document.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +# @Time : 2022/5/26 11:12 +# @File : document.py +# @Author : CasuallyName +# @Email : fjkl@vip.qq.com +# @Software : Python 3.7 +# @About : + +class Document(): + def __init__(self, terms=None, termsMap=None, town=None, village=None, road=None, roadNum=None, roadNumValue=None): + self.terms = terms + self.termsMap = termsMap + self.town = town + self.village = village + self.road = road + self.roadNum = roadNum + self.roadNumValue = roadNumValue + + def __str__(self): + return "Document(terms={terms}, town={town}, village={village}, road={road}, roadNum={roadNum}, roadNumValue={roadNumValue})".format( + terms=self.terms, town=self.town, village=self.village, road=self.road, roadNum=self.roadNum, + roadNumValue=self.roadNumValue + ) diff --git a/GeocodingCHN/model/matched.py b/GeocodingCHN/model/matched.py new file mode 100644 index 0000000..d9f1559 --- /dev/null +++ b/GeocodingCHN/model/matched.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +# @Time : 2022/5/26 11:33 +# @File : matched.py +# @Author : Zhou Hang +# @Email : zhouhang@idataway.com +# @Software : Python 3.7 +# @About : + + +class MatchedTerm: + def __init__(self, term=None, coord=0, density=0, boost=0, tfidf=0): + # 匹配的词条 + self.term = term + # 匹配率 + self.coord = coord + # 稠密度 + self.density = density + # 权重 + self.boost = boost + # 特征值 TF - IDF + self.tfidf = tfidf + + def __str__(self): + return "MatchedTerm({}, coord={}, density={}, boost={}, tfidf={})".format( + self.term, self.coord, self.density, self.boost, self.tfidf) + + +class MatchedResult: + def __init__(self, doc1=None, doc2=None, terms=None, similarity=0,java=None): + self.doc1 = doc1 + self.doc2 = doc2 + self.terms = terms + self.terms = terms if terms is None else [MatchedTerm(term.getTerm(), + term.getCoord(), + term.getDensity(), + term.getBoost(), + term.getTfidf(), + ) + for term in terms] + self.similarity = similarity + self.java = java + + def __str__(self): + return "MatchedResult(\n\tdoc1={doc1}, \n\tdoc2={doc2}, \n\tterms={terms}, \n\tsimilarity={similarity}\n)".format( + doc1=self.doc1, doc2=self.doc2, terms=str([str(i) for i in self.terms]), similarity=self.similarity + ) + + def __repr__(self): + return "MatchedResult(doc1={doc1}, doc2={doc2}, terms={terms}, similarity={similarity})".format( + doc1=self.doc1, doc2=self.doc2, terms=str([str(i) for i in self.terms]), similarity=self.similarity + ) diff --git a/GeocodingCHN/model/region_type.py b/GeocodingCHN/model/region_type.py new file mode 100644 index 0000000..63ae68b --- /dev/null +++ b/GeocodingCHN/model/region_type.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +# @Time : 2022/5/26 11:09 +# @File : region_type.py +# @Author : CasuallyName +# @Email : fjkl@vip.qq.com +# @Software : Python 3.7 +# @About : + +class RegionType(object): + def __init__(self, RegionTypeClass): + self.Undefined = RegionTypeClass.Undefined # 未定义区域类型 + self.Country = RegionTypeClass.Country # 国家 + self.Province = RegionTypeClass.Province # 省份 + self.ProvinceLevelCity1 = RegionTypeClass.ProvinceLevelCity1 # 直辖市 - 与省份并行的一级 + self.ProvinceLevelCity2 = RegionTypeClass.ProvinceLevelCity2 # 直辖市 - 与城市并行的一级 + self.City = RegionTypeClass.City # 地级市 + self.CityLevelDistrict = RegionTypeClass.CityLevelDistrict # 省直辖县级市 + self.District = RegionTypeClass.District # 县、区 + self.Street = RegionTypeClass.Street # 街道乡镇一级 + self.PlatformL4 = RegionTypeClass.PlatformL4 # 特定平台的4级地址 + self.Town = RegionTypeClass.Town # 附加:乡镇 + self.Village = RegionTypeClass.Village # 附加:村 + + @staticmethod + def help(): + print('\n'.join([ + 'RegionType 说明:', + ' Country : 国家', + ' Province : 省份', + ' ProvinceLevelCity1 : 直辖市(与省份并行的一级)', + ' ProvinceLevelCity2 : 直辖市(与城市并行的一级)', + ' City : 地级市', + ' CityLevelDistrict : 省直辖县级市', + ' District : 县、区', + ' Street : 街道乡镇一级', + ' PlatformL4 : 特定平台的4级地址', + ' Town : 乡镇(附加)', + ' Village : 村(附加)', + ' Undefined : 未定义区域类型', + ])) + diff --git a/GeocodingCHN/model/version.py b/GeocodingCHN/model/version.py new file mode 100644 index 0000000..169c003 --- /dev/null +++ b/GeocodingCHN/model/version.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +# @Time : 2022/5/26 11:10 +# @File : version.py +# @Author : CasuallyName +# @Email : fjkl@vip.qq.com +# @Software : Python 3.7 +# @About : + +class Version: + def __init__(self, package, jar): + self.package = package + self.jar = jar + + def __repr__(self): + return f'Package(GeocodingCHN) version: {self.package}\nSource(geocoding.jar) version: {self.jar}' + + def __str__(self): + return f'Package(GeocodingCHN) version: {self.package}, Source(geocoding.jar) version: {self.jar}' \ No newline at end of file diff --git a/tests/test.py b/tests/test.py new file mode 100644 index 0000000..32799fb --- /dev/null +++ b/tests/test.py @@ -0,0 +1,26 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- +# @Time : 2021/1/18 13:13 +# @Author : CasuallyName +# @Email : fjkl@vip.qq.com +# @File : test.py +# @Software: PyCharm +from GeocodingCHN import Geocoding + +if __name__ == '__main__': + geocoding = Geocoding() + geocoding = Geocoding(data_class_path='core/region.dat') + print(geocoding.__version__) + text1 = '山东青岛李沧区延川路116号绿城城园东区7号楼2单元802户' + text2 = '山东青岛李沧区延川路绿城城园东区7-2-802' + Address_1 = geocoding.normalizing(text1) + print(Address_1) + Address_2 = geocoding.normalizing(text2) + similar = geocoding.similarityWithResult(Address_1, Address_2) + print(similar) + print(similar.similarity) + print(geocoding.similarity(Address_1, Address_2)) + + geocoding.addRegionEntry(1, 321200000000, "A街道", geocoding.RegionType.Street) + print(geocoding.normalizing("江苏泰州A街道")) + print(geocoding.segment(text2))