Skip to content

Commit

Permalink
ca_on_markham: Align with original code
Browse files Browse the repository at this point in the history
  • Loading branch information
jpmckinney committed Nov 1, 2024
1 parent 350e477 commit 746369d
Showing 1 changed file with 42 additions and 39 deletions.
81 changes: 42 additions & 39 deletions ca_on_markham/people.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,45 +7,50 @@

class MarkhamPersonScraper(CanadianScraper):
def scrape(self):
regional_councillor_seat_number = 1

page = self.lxmlize(COUNCIL_PAGE)

yield self.scrape_mayor(MAYOR_PAGE)

groups = self.lxmlize(COUNCIL_PAGE).xpath(
'//div[@class="grid md:grid-cols-2 grid-cols-1 lg:grid-cols-4 gap-4 scrollablec"]'
councillors = page.xpath(
'//div[@class="grid md:grid-cols-2 grid-cols-1 lg:grid-cols-4 gap-4 scrollablec"]/div'
)
assert len(groups) == 2, "No councillors found"
assert len(councillors), "No councillors found"

regional_councillor_seat_number = 1
for i, group in enumerate(groups):
for councillor in group:
name = councillor.xpath(".//h3/text()")[0].strip()
district = councillor.xpath(".//p/text()")[0].strip()
for councillor in councillors:
name = councillor.xpath(".//h3/text()")[0].strip()
district = councillor.xpath(".//p/text()")[0].strip()

if i == 0:
role = "Regional Councillor"
district = f"Markham (seat {regional_councillor_seat_number})"
regional_councillor_seat_number += 1
else:
role = "Councillor"
district = district.replace("Councillor", "").strip()
if "Ward" in district:
district = district.replace("Councillor", "").strip()
role = "Councillor"
elif "Regional" in district:
role = "Regional Councillor"
district = f"Markham (seat {regional_councillor_seat_number})"
regional_councillor_seat_number += 1
else:
role = district
district = "Markham"

image = councillor.xpath(".//img/@src")[0]
url = councillor.xpath(".//a/@href")[0]
image = councillor.xpath(".//img/@src")[0]
url = councillor.xpath(".//a/@href")[0]

address, phone, email, links = self.get_contact(url)
address, phone, email, links = self.get_contact(url)

p = Person(primary_org="legislature", name=name, district=district, role=role)
p.add_source(COUNCIL_PAGE)
p.add_source(url)
p = Person(primary_org="legislature", name=name, district=district, role=role)
p.add_source(COUNCIL_PAGE)
p.add_source(url)

p.image = image
p.add_contact("address", address, "legislature")
p.add_contact("voice", phone, "legislature")
p.add_contact("email", email)
p.image = image
p.add_contact("address", address, "legislature")
p.add_contact("voice", phone, "legislature")
p.add_contact("email", email)

for link in links:
p.add_link(link)
for link in links:
p.add_link(link)

yield p
yield p

def get_contact(self, url):
page = self.lxmlize(url)
Expand All @@ -56,22 +61,20 @@ def get_contact(self, url):
links = []

if contact_node.xpath('.//span[@class="address-line1"]/text()'):
address = (
contact_node.xpath('.//span[@class="address-line1"]/text()')[0]
+ " "
+ contact_node.xpath('.//span[@class="locality"]/text()')[0]
+ " "
+ contact_node.xpath('.//span[@class="administrative-area"]/text()')[0]
+ " "
+ contact_node.xpath('.//span[@class="postal-code"]/text()')[0]
+ " "
+ contact_node.xpath('.//span[@class="country"]/text()')[0]
address = " ".join(
(
contact_node.xpath('.//span[@class="address-line1"]/text()')[0],
contact_node.xpath('.//span[@class="locality"]/text()')[0],
contact_node.xpath('.//span[@class="administrative-area"]/text()')[0],
contact_node.xpath('.//span[@class="postal-code"]/text()')[0],
contact_node.xpath('.//span[@class="country"]/text()')[0],
)
)
else:
contact_node = page.xpath(
'//div[@class="formatted-text field-content field-content--label--body field-content--entity-type--block-content field-content--name--body"]'
)[0]
address = contact_node.xpath(".//p/text()")[0] + " " + contact_node.xpath(".//p/text()")[1]
address = f'{contact_node.xpath(".//p/text()")[0]} {contact_node.xpath(".//p/text()")[1]}'

links = get_links(contact_node)
phone = self.get_phone(contact_node)
Expand Down

0 comments on commit 746369d

Please sign in to comment.