Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(NewOpinionSite): support returning nested data structures #952

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
440 changes: 440 additions & 0 deletions juriscraper/NewOpinionSite.py

Large diffs are not rendered by default.

103 changes: 83 additions & 20 deletions juriscraper/opinions/united_states/state/alaska.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
from typing import Dict

from requests.exceptions import ChunkedEncodingError

from juriscraper.lib.html_utils import (
get_row_column_links,
get_row_column_text,
)
from juriscraper.OpinionSiteLinear import OpinionSiteLinear
from juriscraper.NewOpinionSite import NewOpinionSite, logger


class Site(OpinionSiteLinear):
class Site(NewOpinionSite):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.court_id = self.__module__
Expand All @@ -18,32 +20,93 @@ def __init__(self, *args, **kwargs):
self.request["headers"]["user-agent"] = "Free Law Project"

def _download(self, request_dict={}):
# Unfortunately, about 2/3 of calls are rejected by alaska but
# if we just ignore those encoding errors we can live with it
"""
Unfortunately, about 2/3 of calls are rejected by alaska but
if we just ignore those encoding errors we can live with it
"""
try:
return super()._download(request_dict)
except ChunkedEncodingError:
return None

def _process_html(self) -> None:
"""
Have only seen combined opinions in this source
See for example: S17910 - State of Alaska v. John William McKelvey III
with opinion 7690 published on 3/8/2024. After the conclusion, it has a concurring opinion
Case link:
https://appellate-records.courts.alaska.gov/CMSPublic/Case/General?q=w6sobc/DATfJtIRGLf4mqQ==%27
"""
if not self.html:
logger.warning("Unable to load page from Alaska")
return

for table in self.html.xpath("//table"):
date = table.xpath("./preceding-sibling::h5")[0].text_content()
for row in table.xpath(".//tr"):
if row.text_content().strip():
# skip rows without PDF links in first column
try:
url = get_row_column_links(row, 1)
except IndexError:
continue

self.cases.append(
{
"date": date,
"docket": get_row_column_text(row, 3),
"name": get_row_column_text(row, 4),
"citation": get_row_column_text(row, 5),
"url": url,
}
)
if not row.text_content().strip():
continue
case = {}

# rows without PDF links in first column have the opinion
# link inside the case page
try:
url = get_row_column_links(row, 1)
except IndexError:
url = self.placeholder_url
case["case_page_link"] = get_row_column_links(row, 3)

# Have only seen combined opinions in this source
case.update(
{
"oc.date_filed": date,
"d.docket_number": get_row_column_text(row, 3),
"d.case_name": get_row_column_text(row, 4),
"oc.citation_strings": [get_row_column_text(row, 5)],
"opinions": [{"download_url": url}],
}
)
self.cases.append(case)

def get_deferred_download_url(self, case: Dict) -> bool:
""" """
# No need to go into case page, we already have the URL
if not case.get("case_page_link"):
return False

link = case["case_page_link"]

if self.test_mode_enabled():
self.url = link
self._request_url_mock(link)
html = self._return_response_text_object()
else:
html = self._get_html_tree_by_url(link)

nos, case["d.date_filed"] = html.xpath(
"//dl[dt[text()='Case Type:']]/dd/text()"
)
case["oc.nature_of_suit"] = nos.split(" ", 1)[-1]

# Parse opinion table
opinion_row = html.xpath("//tr[td[@title='Document Download']]")[0]
case["opinions"][0]["download_url"] = opinion_row.xpath("td/a/@href")[
0
]
case["oc.disposition"] = opinion_row.xpath("td[3]/text()")[0]

# Parse lower court table
oci_row = html.xpath(
"//h5[text()='Lower Court or Agency Information']/following-sibling::table/tbody/tr"
)
if oci_row:
oci_row = oci_row[0]
oci = {
"docket_number": oci_row.xpath("td[1]/text()")[0],
"date_judgment": oci_row.xpath("td[2]/text()")[0],
"assigned_to_str": oci_row.xpath("td[5]/text()")[0],
}
case["oci"] = oci
case["d.appeal_from_str"] = oci_row.xpath("td[4]/text()")[0]

return True
2 changes: 1 addition & 1 deletion juriscraper/opinions/united_states/state/alaskactapp.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from . import alaska
from juriscraper.opinions.united_states.state import alaska


class Site(alaska.Site):
Expand Down
Loading
Loading