diff --git a/pycorenlp/__init__.py b/pycorenlp/__init__.py index b031cf7..8d7744b 100644 --- a/pycorenlp/__init__.py +++ b/pycorenlp/__init__.py @@ -1 +1 @@ -from pycorenlp.corenlp import StanfordCoreNLP +from pycorenlp.corenlp import StanfordCoreNLP, StanfordCoreNLPError, NoStanfordCoreNLPServer diff --git a/pycorenlp/corenlp.py b/pycorenlp/corenlp.py index 6eb2175..51147f3 100644 --- a/pycorenlp/corenlp.py +++ b/pycorenlp/corenlp.py @@ -1,7 +1,24 @@ import json, requests -class StanfordCoreNLP: +class NoStanfordCoreNLPServer(Exception): + def __init__(self, server_url): + self.server_url = server_url + + def __str__(self): + return ('Cannot connect to <%s>.\nPlease start the CoreNLP server, e.g.:\n' + '$ cd stanford-corenlp-full-2015-12-09/\n' + '$ java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer' + % (self.server_url)) +class StanfordCoreNLPError(Exception): + def __init__(self, reason, message): + self.reason = reason + self.message = message + + def __str__(self): + return "%s(%s): %s" % (self.__class__.__name__,self.reason,self.message) + +class StanfordCoreNLP: def __init__(self, server_url): if server_url[-1] == '/': server_url = server_url[:-1] @@ -18,23 +35,17 @@ def annotate(self, text, properties=None): try: requests.get(self.server_url) except requests.exceptions.ConnectionError: - raise Exception('Check whether you have started the CoreNLP server e.g.\n' - '$ cd stanford-corenlp-full-2015-12-09/ \n' - '$ java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer') + raise NoStanfordCoreNLPServer(self.server_url) - data = text.encode() r = requests.post( self.server_url, params={ 'properties': str(properties) - }, data=data, headers={'Connection': 'close'}) - output = r.text - if ('outputFormat' in properties - and properties['outputFormat'] == 'json'): - try: - output = json.loads(output, encoding='utf-8', strict=True) - except: - pass - return output + }, data=text.encode(), headers={'Connection': 'close'}) + if not r.ok: + raise StanfordCoreNLPError(r.reason, r.text) + if properties.get('outputFormat') == 'json': + return json.loads(r.text) + return r.text def tokensregex(self, text, pattern, filter): return self.regex('/tokensregex', text, pattern, filter) @@ -48,9 +59,6 @@ def regex(self, endpoint, text, pattern, filter): 'pattern': pattern, 'filter': filter }, data=text) - output = r.text - try: - output = json.loads(r.text) - except: - pass - return output + if not r.ok: + raise StanfordCoreNLPError(r.reason, r.text) + return json.loads(r.text)