refactored

joeyism · Oct 26, 2019 · 865a05a · 865a05a
1 parent ca15855
commit 865a05a
Show file tree

Hide file tree

Showing 4 changed files with 54 additions and 67 deletions.
diff --git a/README.md b/README.md
@@ -13,24 +13,24 @@ To get a company's latest 5 10-Ks, run
 ``` python
 from edgar import Company
 company = Company("Oracle Corp", "0001341439")
-tree = company.getAllFilings(filingType = "10-K")
-docs = edgar.getDocuments(tree, noOfDocuments=5)
+tree = company.get_all_filings(filing_type = "10-K")
+docs = edgar.get_documents(tree, no_of_documents=5)
 ```
 or
 ```python
 from edgar import Company, TXTML
 
 company = Company("INTERNATIONAL BUSINESS MACHINES CORP", "0000051143")
-doc = company.get10K()
-text = TXTML.parseFull10K(doc)
+doc = company.get_10K()
+text = TXTML.parse_full_10K(doc)
 ```
 
 To get all companies and find a specific one, run
 
 ``` python
 from edgar import Edgar
 edgar = Edgar()
-possible_companies = edgar.findCompanyName("Cisco System")
+possible_companies = edgar.find_company_name("Cisco System")
 ```
 
 ## API
@@ -41,49 +41,38 @@ The **Company** class has two fields:
 * name (company name)
 * cik (company CIK number)
 
-##### getFilingsUrl
+##### get_filings_url
 Returns a url to fetch filings data
 * **Input**
-    * filingType: The type of document you want. i.e. 10-K, S-8, 8-K. If not specified, it'll return all documents
-    * priorTo: Time prior which documents are to be retrieved. If not specified, it'll return all documents
+    * filing_type: The type of document you want. i.e. 10-K, S-8, 8-K. If not specified, it'll return all documents
+    * prior_to: Time prior which documents are to be retrieved. If not specified, it'll return all documents
     * ownership: defaults to include. Options are include, exclude, only.
-    * noOfEntries: defaults to 100. Returns the number of entries to be returned. Maximum is 100.
+    * no_of_entries: defaults to 100. Returns the number of entries to be returned. Maximum is 100.
 
-##### getAllFilings
+##### get_all_filings
 Returns the HTML in the form of [lxml.html](http://lxml.de/lxmlhtml.html)
 * **Input**
-    * filingType: The type of document you want. i.e. 10-K, S-8, 8-K. If not specified, it'll return all documents
-    * priorTo: Time prior which documents are to be retrieved. If not specified, it'll return all documents
+    * filing_type: The type of document you want. i.e. 10-K, S-8, 8-K. If not specified, it'll return all documents
+    * prior_to: Time prior which documents are to be retrieved. If not specified, it'll return all documents
     * ownership: defaults to include. Options are include, exclude, only.
-    * noOfEntries: defaults to 100. Returns the number of entries to be returned. Maximum is 100.
+    * no_of_entries: defaults to 100. Returns the number of entries to be returned. Maximum is 100.
 
 ### Edgar
 Gets all companies from EDGAR
-##### getCikByCompanyName
+##### get_cik_by_company_name
 * **Input**
     * name: name of the company
 
-##### getCompanyNameByCik
+##### get_company_name_by_cik
 * **Input**
     * cik: cik of the company
 
-##### findCompanyName
+##### find_company_name
 * **Input**
     * words: input words to search the company
 
-### getDocuments
+### get_documents
 Returns a list of strings, each string contains the body of the specified document from input
 * **Input**
     * tree: lxml.html form that is returned from Company.getAllFilings
-    * noOfDocuments: number of document returned. If it is 1, the returned result is just one string, instead of a list of strings. Defaults to 1.
-
-
-## Release Notes
-**0.3.0**
-* Added findCompanyName to Edgar
-
-**0.2.0**
-* Added Edgar
-
-**0.1.0**
-* First release
+    * no_of_documents: number of document returned. If it is 1, the returned result is just one string, instead of a list of strings. Defaults to 1.
diff --git a/edgar/company.py b/edgar/company.py
@@ -9,59 +9,57 @@ def __init__(self, name, cik):
         self.name = name
         self.cik = cik
 
-    def _getFilingsUrl(self, filingType="", priorTo="", ownership="include", noOfEntries=100):
-        url = "https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=" + self.cik + "&type=" + filingType + "&dateb=" + priorTo + "&owner=" +  ownership + "&count=" + str(noOfEntries)
+    def _get_filings_url(self, filing_type="", prior_to="", ownership="include", no_of_entries=100):
+        url = "https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=" + self.cik + "&type=" + filing_type + "&dateb=" + prior_to + "&owner=" +  ownership + "&count=" + str(no_of_entries)
         return url
 
-    def getAllFilings(self, filingType="", priorTo="", ownership="include", noOfEntries=100):
-        url = self._getFilingsUrl(filingType, priorTo, ownership, noOfEntries)
+    def get_all_filings(self, filing_type="", prior_to="", ownership="include", no_of_entries=100):
+        url = self._get_filings_url(filing_type, prior_to, ownership, no_of_entries)
         page = requests.get(url)
         return html.fromstring(page.content)
 
-    def get10Ks(self, noOfDocuments=1):
-      tree = self.getAllFilings(filingType="10-K")
-      elems = tree.xpath('//*[@id="documentsbutton"]')[:noOfDocuments]
+    def get_10Ks(self, no_of_documents=1):
+      tree = self.get_all_filings(filing_type="10-K")
+      elems = tree.xpath('//*[@id="documentsbutton"]')[:no_of_documents]
       result = []
       for elem in elems:
           url = BASE_URL + elem.attrib["href"]
-          contentPage = getRequest(url)
-          table = contentPage.find_class("tableFile")[0]
-          lastRow = table.getchildren()[-1]
-          href = lastRow.getchildren()[2].getchildren()[0].attrib["href"]
+          content_page = get_request(url)
+          table = content_page.find_class("tableFile")[0]
+          last_row = table.getchildren()[-1]
+          href = last_row.getchildren()[2].getchildren()[0].attrib["href"]
           href = BASE_URL + href
-          doc = getRequest(href)
+          doc = get_request(href)
           result.append(doc)
       return result
 
-    def get10K(self):
-      return self.get10Ks(noOfDocuments=1)[0]
+    def get_10K(self):
+      return self.get_10Ks(no_of_documents=1)[0]
 
 
-def getRequest(href):
+def get_request(href):
     page = requests.get(href)
     return html.fromstring(page.content)
 
-def getDocuments(tree, noOfDocuments=1):
+def get_documents(tree, no_of_documents=1):
     BASE_URL = "https://www.sec.gov"
-    elems = tree.xpath('//*[@id="documentsbutton"]')[:noOfDocuments]
+    elems = tree.xpath('//*[@id="documentsbutton"]')[:no_of_documents]
     result = []
     for elem in elems:
         url = BASE_URL + elem.attrib["href"]
-        contentPage = getRequest(url)
-        url = BASE_URL + contentPage.xpath('//*[@id="formDiv"]/div/table/tr[2]/td[3]/a')[0].attrib["href"]
-        filing = getRequest(url)
+        content_page = get_request(url)
+        url = BASE_URL + content_page.xpath('//*[@id="formDiv"]/div/table/tr[2]/td[3]/a')[0].attrib["href"]
+        filing = get_request(url)
         result.append(filing.body.text_content())
 
     if len(result) == 1:
         return result[0]
     return result
 
-def getCIKFromCompany(companyName):
-    tree = getRequest("https://www.sec.gov/cgi-bin/browse-edgar?company=" + companyName)
+def get_CIK_from_company(company_name):
+    tree = get_request("https://www.sec.gov/cgi-bin/browse-edgar?company=" + company_name)
     CIKList = tree.xpath('//*[@id="seriesDiv"]/table/tr[*]/td[1]/a/text()')
-    namesList = []
+    names_list = []
     for elem in tree.xpath('//*[@id="seriesDiv"]/table/tr[*]/td[2]'):
-        namesList.append(elem.text_content())
+        names_list.append(elem.text_content())
     return list(zip(CIKList, namesList))
-
-
diff --git a/edgar/edgar.py b/edgar/edgar.py
@@ -18,21 +18,21 @@ def __init__(self):
         self.all_companies_dict = dict(all_companies_array)
         self.all_companies_dict_rev = dict(all_companies_array_rev)
 
-    def getCikByCompanyName(self, name):
+    def get_cik_by_company_name(self, name):
         return self.all_companies_dict[name]
 
-    def getCompanyNameByCik(self, cik):
+    def get_company_name_by_cik(self, cik):
         return self.all_companies_dict_rev[cik]
 
-    def findCompanyName(self, words):
-        possibleCompanies = []
+    def find_company_name(self, words):
+        possible_companies = []
         words = words.lower()
         for company in self.all_companies_dict:
             if all(word in company.lower() for word in words.split(" ")):
-                possibleCompanies.append(company)
-        return possibleCompanies
+                possible_companies.append(company)
+        return possible_companies
 
 def test():
     com = Company("Oracle Corp", "0001341439")
-    tree = com.getAllFilings(filingType = "10-K")
-    return getDocuments(tree)
+    tree = com.get_all_filings(filingType = "10-K")
+    return get_documents(tree)
diff --git a/edgar/txtml.py b/edgar/txtml.py
@@ -5,11 +5,11 @@ def _clean_text_(cls, text):
     return text.replace('\n', '')
 
   @classmethod
-  def getDocumentType(cls, document):
+  def get_document_type(cls, document):
     return document.getchildren()[0].text
 
   @classmethod
-  def getHTMLFromDocument(cls, document):
+  def get_HTML_from_document(cls, document):
     properties = {}
 
     while document.tag != 'text':
@@ -19,12 +19,12 @@ def getHTMLFromDocument(cls, document):
     return document, properties
 
   @classmethod
-  def parseFull10K(cls, doc):
+  def parse_full_10K(cls, doc):
     text = ""
     for child in doc.getchildren():
       if child.tag == 'sec-header':
           continue
-      html, properties = TXTML.getHTMLFromDocument(child)
+      html, properties = TXTML.get_HTML_from_document(child)
       if properties['type'] == '10-K':
         text = text + html.text_content()
     return text