adding new nyag scraper for New York Attorney General. A backscraper …

…is included and should be run after deployment. It will capture 533 cases and take only about 1 second to run. Relates to #168
freelawproject · Feb 4, 2017 · c253180 · c253180
1 parent c2f0710
commit c253180
Show file tree

Hide file tree

Showing 3 changed files with 6,568 additions and 0 deletions.
diff --git a/juriscraper/opinions/united_states/state/__init__.py b/juriscraper/opinions/united_states/state/__init__.py
@@ -96,6 +96,7 @@
     'nj',
     'njsuperctappdiv',
     'ny',
+    'nyag',
     'nyappdiv_1st',
     'nyappdiv_2nd',
     'nyappdiv_3rd',

diff --git a/juriscraper/opinions/united_states/state/nyag.py b/juriscraper/opinions/united_states/state/nyag.py
@@ -0,0 +1,62 @@
+"""Scraper for the California Attorney General
+CourtID: nyag
+Court Short Name: New York Attorney General
+"""
+
+import datetime
+
+from juriscraper.OpinionSite import OpinionSite
+from juriscraper.lib.string_utils import convert_date_string
+
+
+class Site(OpinionSite):
+    def __init__(self, *args, **kwargs):
+        super(Site, self).__init__(*args, **kwargs)
+        self.court_id = self.__module__
+        self.year = datetime.date.today().year
+        self.url = "https://ag.ny.gov/appeals-and-opinions/numerical-index"
+        self.back_scrape_iterable = range(1995, self.year + 1)
+        self.row_path = False
+        self.cell_path = False
+        self.set_paths()
+
+    def _download(self, request_dict={}):
+        html = super(Site, self)._download(request_dict)
+        if self.method == 'LOCAL':
+            # Make sure the year-table you want to test is first in example file
+            self.year = int(html.xpath('//table[1]/caption')[0].text_content())
+            self.set_paths()
+        return html
+
+    def _get_case_dates(self):
+        """All we have are years, so estimate middle most day of year"""
+        return [convert_date_string('July 2, %d' % self.year)] * len(self.html.xpath(self.row_path))
+
+    def _get_case_names(self):
+        """No case names available"""
+        return ["Untitled New York Attorney General Opinion"] * len(self.case_dates)
+
+    def _get_download_urls(self):
+        path = '%s//a/@href' % (self.cell_path % 4)
+        return [href for href in self.html.xpath(path)]
+
+    def _get_docket_numbers(self):
+        return [cell.text_content().strip() for cell in self.html.xpath(self.cell_path % 1)]
+
+    def _get_precedential_statuses(self):
+        return ['Published'] * len(self.case_dates)
+
+    def _get_summaries(self):
+        """Use Abstract column value"""
+        return [cell.text_content().strip() for cell in self.html.xpath(self.cell_path % 2)]
+
+    def _get_date_filed_is_approximate(self):
+        return [True] * len(self.case_dates)
+
+    def _download_backwards(self, year):
+        self.year = year
+        self.set_paths()
+
+    def set_paths(self):
+        self.row_path = '//table[contains(caption, "%d")]/tbody/tr' % self.year
+        self.cell_path = self.row_path + '/td[%d]'