Skip to content
This repository has been archived by the owner on Jun 3, 2024. It is now read-only.

Remove PagesJaunes ElasticSearch legacy #275

Merged
merged 3 commits into from
Oct 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 26 additions & 95 deletions idunn/datasources/pages_jaunes.py
Original file line number Diff line number Diff line change
@@ -1,109 +1,21 @@
import logging
from os import path
from typing import List

import logging
import requests
from elasticsearch import Elasticsearch
from fastapi import HTTPException
from requests import HTTPError as RequestsHTTPError

from idunn import settings
from idunn.places.pj_poi import PjApiPOI, PjPOI
from idunn.places.models import pj_info, pj_find
from idunn.api.utils import CategoryEnum
from idunn.places.exceptions import PlaceNotFound
from idunn.places.models import pj_info, pj_find
from idunn.places.pj_poi import PjApiPOI
from idunn.utils.auth_session import AuthSession
from idunn.utils.geometry import bbox_inside_polygon, france_polygon
from idunn.api.utils import CategoryEnum

logger = logging.getLogger(__name__)


class PjSource:
PLACE_ID_NAMESPACE = "pj"

def __init__(self):
self.enabled = True

def bbox_is_covered(self, bbox):
if not self.enabled:
return False
return bbox_inside_polygon(*bbox, poly=france_polygon)

def point_is_covered(self, point):
if not self.enabled:
return False
return france_polygon.contains(point)

def internal_id(self, poi_id):
return poi_id.replace(f"{self.PLACE_ID_NAMESPACE}:", "", 1)

# pylint: disable = unused-argument
def search_places(self, query: str, place_in_query: bool, size=10) -> List[PjApiPOI]:
logger.warning("calling unimplemented `search_places` with deprecated LegacyPjSource")
return []

def get_places_bbox(self, categories: List[CategoryEnum], bbox, size=10, query=""):
raise NotImplementedError

def get_place(self, poi_id):
raise NotImplementedError


class LegacyPjSource(PjSource):
es_index = settings.get("PJ_ES_INDEX")
es_query_template = settings.get("PJ_ES_QUERY_TEMPLATE")

def __init__(self):
super().__init__()
pj_es_url = settings.get("PJ_ES")

if pj_es_url:
self.es = Elasticsearch(pj_es_url, timeout=3.0)
self.enabled = True
else:
self.enabled = False

def get_places_bbox(self, categories: List[CategoryEnum], bbox, size=10, query=""):
raw_categories = [pj_category for c in categories for pj_category in c.pj_filters()]
left, bot, right, top = bbox

body = {
"id": self.es_query_template,
"params": {
"query": query,
"top_left_lat": top,
"top_left_lon": left,
"bottom_right_lat": bot,
"bottom_right_lon": right,
},
}

if query:
body["params"]["match_amenities"] = True
if raw_categories:
body["params"]["filter_category"] = True
body["params"]["category"] = raw_categories

result = self.es.search_template(index=self.es_index, body=body, params={"size": size})
raw_places = result.get("hits", {}).get("hits", [])
return [PjPOI(p["_source"]) for p in raw_places]

def get_place(self, poi_id):
# pylint: disable = unexpected-keyword-arg
es_places = self.es.search(
index=self.es_index,
body={"query": {"bool": {"filter": {"term": {"_id": self.internal_id(poi_id)}}}}},
ignore_unavailable=True,
)

es_place = es_places.get("hits", {}).get("hits", [])
if len(es_place) == 0:
raise HTTPException(status_code=404, detail=f"place {poi_id} not found")
if len(es_place) > 1:
logger.warning("Got multiple places with id %s", poi_id)
return PjPOI(es_place[0]["_source"])


class PjAuthSession(AuthSession):
def get_authorization_url(self):
return "https://api.pagesjaunes.fr/oauth/client_credential/accesstoken"
Expand All @@ -116,15 +28,21 @@ def get_authorization_params(self):
}


class ApiPjSource(PjSource):
class ApiPjSource:
PLACE_ID_NAMESPACE = "pj"
PJ_RESULT_MAX_SIZE = 30
PJ_INFO_API_URL = "https://api.pagesjaunes.fr/v1/pros"
PJ_FIND_API_URL = "https://api.pagesjaunes.fr/v1/pros/search"
PJ_API_TIMEOUT = float(settings.get("PJ_API_TIMEOUT"))

def __init__(self):
super().__init__()
self.session = PjAuthSession(refresh_timeout=self.PJ_API_TIMEOUT)
pj_api_url = settings.get("PJ_API_ID")
if pj_api_url:
self.session = PjAuthSession(refresh_timeout=self.PJ_API_TIMEOUT)
self.enabled = True
else:
self.enabled = False

@staticmethod
def format_where(bbox):
Expand All @@ -136,6 +54,19 @@ def format_where(bbox):
left, bot, right, top = bbox
return f"gZ{left:.6f},{bot:.6f},{right:.6f},{top:.6f}"

def bbox_is_covered(self, bbox):
if not self.enabled:
return False
return bbox_inside_polygon(*bbox, poly=france_polygon)

def point_is_covered(self, point):
if not self.enabled:
return False
return france_polygon.contains(point)

def internal_id(self, poi_id):
return poi_id.replace(f"{self.PLACE_ID_NAMESPACE}:", "", 1)

def get_from_params(self, url, params=None) -> PjApiPOI:
res = self.session.get(url, params=params, timeout=self.PJ_API_TIMEOUT)
res.raise_for_status()
Expand Down Expand Up @@ -218,4 +149,4 @@ def get_place(self, poi_id) -> PjApiPOI:
raise


pj_source = ApiPjSource() if settings.get("PJ_API_ID") else LegacyPjSource()
pj_source = ApiPjSource()
2 changes: 1 addition & 1 deletion idunn/places/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from .poi import POI, BragiPOI
from .street import Street
from .place import Place
from .pj_poi import PjApiPOI, PjPOI
from .pj_poi import PjApiPOI
from .latlon import Latlon
from .event import Event

Expand Down
151 changes: 0 additions & 151 deletions idunn/places/pj_poi.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,157 +96,6 @@ def get_class_subclass(raw_categories):
return (None, None)


class PjPOI(BasePlace):
PLACE_TYPE = "poi"

def get_id(self):
business_id = self.get("BusinessId")
if business_id:
return f"pj:{business_id}"
return None

def get_coord(self):
return self.get("Geo")

def get_local_name(self):
return self.get("BusinessName", "")

def get_phone(self):
phone_numbers = self.get("ContactInfos", {}).get("PhoneNumbers", [])
if phone_numbers:
return phone_numbers[0].get("phoneNumber")
return None

def get_website(self):
return self.get("WebsiteURL")

def get_class_name(self):
raw_categories = frozenset(self.get("Category", []))
class_name, _ = get_class_subclass(raw_categories)
return class_name

def get_subclass_name(self):
raw_categories = frozenset(self.get("Category", []))
_, subclass_name = get_class_subclass(raw_categories)
return subclass_name

def get_raw_opening_hours(self):
opening_hours_dict = self.get("OpeningHours", {})
raw = ""

def format_day_range(first_day, last_day, times):
if not times:
return ""
if first_day == last_day:
return f"{first_day} {times}; "
return f"{first_day}-{last_day} {times}; "

first_day, last_day, times = ("", "", "")
for k in ["Mo", "Tu", "We", "Th", "Fr", "Sa", "Su"]:
value = opening_hours_dict.get(k)
if not value or value != times:
raw += format_day_range(first_day, last_day, times)
first_day = ""
last_day = ""
times = ""
if value and value != times:
first_day = k
last_day = k
times = value
if value and value == times:
last_day = k
raw += format_day_range(first_day, last_day, times)
result = raw.rstrip("; ")

if result == "Mo-Su 24/7":
return "24/7"

return result

def get_raw_wheelchair(self):
return self.get("WheelchairAccessible")

def build_address(self, lang):
city = self.get_city()
postcode = self.get_postcode()
number = self.raw_address().get("Number", "")
street = self.raw_address().get("Street", "")

return {
"id": None,
"name": f"{number} {street}".strip(),
"housenumber": number,
"postcode": postcode,
"label": f"{number} {street}, {postcode} {city}".strip().strip(","),
"admin": None,
"admins": self.build_admins(lang),
"street": {
"id": None,
"name": street,
"label": f"{street} ({city})",
"postcodes": [postcode] if postcode else [],
},
"country_code": self.get_country_code(),
}

def build_admins(self, lang=None) -> list:
city = self.get_city()
postcode = self.get_postcode()

if postcode:
label = f"{city} ({postcode})"
else:
label = city

return [
{
"name": city,
"label": label,
"class_name": "city",
"postcodes": [postcode] if postcode else [],
}
]

def raw_address(self):
return self.get("Address", {})

def get_city(self):
return self.raw_address().get("City", "")

def get_postcode(self):
return self.raw_address().get("PostalCode", "")

def get_country_codes(self):
return ["FR"]

def get_images_urls(self):
photos = self.get("photos", {}).get("photos", [])
return [p.get("url", "") for p in photos]

def get_source(self):
return PoiSource.PAGESJAUNES

def get_source_url(self):
business_id = self.get("BusinessId")
if not business_id:
return None
return f"https://www.pagesjaunes.fr/pros/{business_id}"

def get_contribute_url(self):
source_url = self.get_source_url()

if not source_url:
return None

return f"{source_url}#zone-informations-pratiques"

def get_raw_grades(self):
return self.get("grades")

def get_reviews_url(self):
return self.get("Links", {}).get("viewReviews", "")


class PjApiPOI(BasePlace):
PLACE_TYPE = "poi"

Expand Down
6 changes: 0 additions & 6 deletions idunn/utils/default_settings.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,6 @@ BLOCK_TRANSACTIONAL_ENABLED: True
#########################
## Data sources

# Pages jaunes access through legacy ES data
# PJ_API_ID / PJ_API_SECRET will be prefered
PJ_ES:
PJ_ES_INDEX: "pagesjaunes"
PJ_ES_QUERY_TEMPLATE: "pagesjaunes_query"

# Pages jaunes
PJ_API_ID:
PJ_API_SECRET:
Expand Down
Loading