From a01f6630da4e85e9c550d1e6f458136137244186 Mon Sep 17 00:00:00 2001 From: Sander Date: Tue, 14 Jun 2016 11:37:22 +0200 Subject: [PATCH 1/5] ADD: A holder for WHOIS responses. It contains information about the success of the retrieval. --- pythonwhois/net.py | 44 +++++++++++++++++++++----- pythonwhois/response/__init__.py | 0 pythonwhois/response/whois_response.py | 16 ++++++++++ 3 files changed, 52 insertions(+), 8 deletions(-) create mode 100644 pythonwhois/response/__init__.py create mode 100644 pythonwhois/response/whois_response.py diff --git a/pythonwhois/net.py b/pythonwhois/net.py index 5c7f2c2..0d77014 100644 --- a/pythonwhois/net.py +++ b/pythonwhois/net.py @@ -7,6 +7,9 @@ from pythonwhois.caching.whois_server_cache import server_cache from pythonwhois.ratelimit.cool_down import CoolDown +from pythonwhois.response.whois_response import WhoisResponse + +incomplete_result_message = "THE_WHOIS_ORACLE_INCOMPLETE_RESULT" cool_down_tracker = CoolDown() @@ -47,7 +50,8 @@ def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=Fals target_server = get_target_server(domain, previous, server) query = prepare_query(target_server, domain) - response = query_server(target_server, query) + whois_response = query_server(target_server, query) + response = whois_response.response if never_cut: # If the caller has requested to 'never cut' responses, he will get the original response from the server (this is @@ -64,8 +68,15 @@ def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=Fals if re.search("Domain Name: %s\n" % domain.upper(), record): response = record break - if never_cut == False: + if not never_cut: new_list = [response] + previous + + if whois_response.server_is_dead: + return build_return_value(with_server_list, new_list, server_list) + elif whois_response.request_failure or whois_response.cool_down_failure: + new_list = [incomplete_result_message] + previous + return build_return_value(with_server_list, new_list, server_list) + server_list.append(target_server) # Ignore redirects from registries who publish the registrar data themselves @@ -82,10 +93,25 @@ def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=Fals return get_whois_raw(domain, referal_server, new_list, server_list=server_list, with_server_list=with_server_list) + return build_return_value(with_server_list, new_list, server_list) + + +def build_return_value(with_server_list, responses, server_list): + """ + Create a return value + :param with_server_list: Whether the server list should be returned as well + :param responses: The list of responses + :param server_list: The server list + :return: A list of responses without the empty ones, plus possibly a server list + """ + non_empty_responses = filter((lambda text: text is not ''), responses) + if len(non_empty_responses) == 0: + non_empty_responses = [''] + if with_server_list: - return (new_list, server_list) + return non_empty_responses, server_list else: - return new_list + return non_empty_responses def query_server(whois_server, query): @@ -99,7 +125,7 @@ def query_server(whois_server, query): if whois_server and cool_down_tracker.try_to_use_server(whois_server): return whois_request(query, whois_server) else: - return "" + return WhoisResponse(cool_down_failure=True) def prepare_query(whois_server, domain): @@ -169,7 +195,7 @@ def get_tld(domain): def get_root_server(domain): - data = whois_request(domain, "whois.iana.org") + data = whois_request(domain, "whois.iana.org").response or "" for line in [x.strip() for x in data.splitlines()]: match = re.match("refer:\s*([^\s]+)", line) if match is None: @@ -181,6 +207,7 @@ def get_root_server(domain): def whois_request(domain, server, port=43): try: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.settimeout(10) sock.connect((server, port)) sock.send(("%s\r\n" % domain).encode("utf-8")) buff = b"" @@ -189,6 +216,7 @@ def whois_request(domain, server, port=43): if len(data) == 0: break buff += data - return buff.decode("utf-8", "replace") + return WhoisResponse(buff.decode("utf-8", "replace")) except Exception: - return "" + server_is_dead = not server_is_alive(server) + return WhoisResponse(request_failure=True, server_is_dead=server_is_dead) diff --git a/pythonwhois/response/__init__.py b/pythonwhois/response/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/pythonwhois/response/whois_response.py b/pythonwhois/response/whois_response.py new file mode 100644 index 0000000..5268797 --- /dev/null +++ b/pythonwhois/response/whois_response.py @@ -0,0 +1,16 @@ +class WhoisResponse: + """ + Holder class for WHOIS responses. Is capable of marking the retrieval as a failure. + """ + + def __init__(self, response=None, request_failure=False, cool_down_failure=False, server_is_dead=False): + """ + Hold the WHOIS response + :param response: The received response, if any + :param request_failure: If the request was a failure + :param cool_down_failure: Whether the server was unavailable due to a cool down or not + """ + self.response = response + self.request_failure = request_failure + self.cool_down_failure = cool_down_failure + self.server_is_dead = server_is_dead From d9aa27c6bc8007fb909bdfcde5577ad61fbd3f33 Mon Sep 17 00:00:00 2001 From: Sander Date: Wed, 15 Jun 2016 13:27:22 +0200 Subject: [PATCH 2/5] REF: Increased default cool down length from 1 second to 2 REF: Extracted the starting of a cool down to a separate method ADD: Can now check whether a rate limit has been exceeded or not FIX: If there are now results at all, parse.py simply returns an empty dictionary instead of crashing --- pythonwhois/__init__.py | 6 +++- pythonwhois/net.py | 38 +++++++++++++++------- pythonwhois/parse.py | 2 ++ pythonwhois/ratelimit/cool_down.py | 11 ++++++- pythonwhois/ratelimit/cool_down_tracker.py | 14 ++++++++ pythonwhois/response/whois_response.py | 19 +++++++++-- setup.py | 2 +- 7 files changed, 76 insertions(+), 16 deletions(-) diff --git a/pythonwhois/__init__.py b/pythonwhois/__init__.py index f8729e4..c56c784 100644 --- a/pythonwhois/__init__.py +++ b/pythonwhois/__init__.py @@ -6,8 +6,12 @@ def get_whois(domain, normalized=[]): # Unlisted handles will be looked up on the last WHOIS server that was queried. This may be changed to also query # other servers in the future, if it turns out that there are cases where the last WHOIS server in the chain doesn't # actually hold the handle contact details, but another WHOIS server in the chain does. + if len(server_list) > 0: + handle_server = server_list[-1] + else: + handle_server = "" return parse.parse_raw_whois(raw_data, normalized=normalized, never_query_handles=False, - handle_server=server_list[-1]) + handle_server=handle_server) def set_persistent_cache(path_to_cache): diff --git a/pythonwhois/net.py b/pythonwhois/net.py index 0d77014..3c01132 100644 --- a/pythonwhois/net.py +++ b/pythonwhois/net.py @@ -7,7 +7,7 @@ from pythonwhois.caching.whois_server_cache import server_cache from pythonwhois.ratelimit.cool_down import CoolDown -from pythonwhois.response.whois_response import WhoisResponse +from pythonwhois.response.whois_response import RawWhoisResponse incomplete_result_message = "THE_WHOIS_ORACLE_INCOMPLETE_RESULT" @@ -72,8 +72,14 @@ def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=Fals new_list = [response] + previous if whois_response.server_is_dead: + # That's probably as far as we can go, the road ends here return build_return_value(with_server_list, new_list, server_list) - elif whois_response.request_failure or whois_response.cool_down_failure: + elif whois_response.request_failure: + # Mark this result as incomplete, so we can try again later + new_list = [incomplete_result_message] + previous + cool_down_tracker.warn_limit_exceeded(target_server) + return build_return_value(with_server_list, new_list, server_list) + elif whois_response.cool_down_failure: new_list = [incomplete_result_message] + previous return build_return_value(with_server_list, new_list, server_list) @@ -104,9 +110,7 @@ def build_return_value(with_server_list, responses, server_list): :param server_list: The server list :return: A list of responses without the empty ones, plus possibly a server list """ - non_empty_responses = filter((lambda text: text is not ''), responses) - if len(non_empty_responses) == 0: - non_empty_responses = [''] + non_empty_responses = filter((lambda text: text is not '' and text is not None), responses) if with_server_list: return non_empty_responses, server_list @@ -117,15 +121,15 @@ def build_return_value(with_server_list, responses, server_list): def query_server(whois_server, query): """ Send out the query, if the server is available. if the server is still in cool down, - return an empty string + return a RawWhoisResponse instance describing the failure :param whois_server: The WHOIS server to query :param query: The query to send - :return: The result, or an empty string if the server is unavailable + :return: A RawWhoisResponse containing either the response or the reason of failure """ if whois_server and cool_down_tracker.try_to_use_server(whois_server): return whois_request(query, whois_server) else: - return WhoisResponse(cool_down_failure=True) + return RawWhoisResponse(cool_down_failure=True) def prepare_query(whois_server, domain): @@ -151,7 +155,7 @@ def get_target_server(domain, previous_results, given_server): :param domain: The domain to get the server for :param previous_results: The previously acquired results, as a result of referrals :param given_server: - :return: + :return: The server to use """ if len(previous_results) == 0 and given_server == "": # Root query @@ -195,6 +199,11 @@ def get_tld(domain): def get_root_server(domain): + """ + Find the WHOIS server for a given domain + :param domain: The domain to find a WHOIS server for + :return: The WHOIS server, or an empty string if no server is found + """ data = whois_request(domain, "whois.iana.org").response or "" for line in [x.strip() for x in data.splitlines()]: match = re.match("refer:\s*([^\s]+)", line) @@ -205,6 +214,13 @@ def get_root_server(domain): def whois_request(domain, server, port=43): + """ + Request WHOIS information. Has a timeout of 10 seconds + :param domain: The domain to request WHOIS information for + :param server: The WHOIS server to use + :param port: The port to use, 43 by default + :return: A WHOIS response containing either the result, or containing information about the failure + """ try: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.settimeout(10) @@ -216,7 +232,7 @@ def whois_request(domain, server, port=43): if len(data) == 0: break buff += data - return WhoisResponse(buff.decode("utf-8", "replace")) + return RawWhoisResponse(buff.decode("utf-8", "replace")) except Exception: server_is_dead = not server_is_alive(server) - return WhoisResponse(request_failure=True, server_is_dead=server_is_dead) + return RawWhoisResponse(request_failure=True, server_is_dead=server_is_dead) diff --git a/pythonwhois/parse.py b/pythonwhois/parse.py index 025211c..1125587 100644 --- a/pythonwhois/parse.py +++ b/pythonwhois/parse.py @@ -702,6 +702,8 @@ def filter_characters(string, delete_characters): def parse_raw_whois(raw_data, normalized=None, never_query_handles=True, handle_server=""): + if len(raw_data) == 0: + return {} normalized = normalized or [] data = {} diff --git a/pythonwhois/ratelimit/cool_down.py b/pythonwhois/ratelimit/cool_down.py index a323a26..98c5929 100644 --- a/pythonwhois/ratelimit/cool_down.py +++ b/pythonwhois/ratelimit/cool_down.py @@ -14,7 +14,7 @@ def __init__(self): Creates a dictionary for storing cool downs. """ self.servers_on_cool_down = {} - self.default_cool_down_length = 1.0 + self.default_cool_down_length = 2.0 self.last_request_time = datetime.datetime.now() def can_use_server(self, whois_server): @@ -51,6 +51,15 @@ def decrement_cool_downs(self): for server, cool_down_tracker in self.servers_on_cool_down.iteritems(): cool_down_tracker.decrement_cool_down(time_diff) + def warn_limit_exceeded(self, whois_server): + """ + Warn the CoolDown instance of an exceeded limit for a WHOIS server. + The CoolDown instance will then make sure that the cool down for the WHOIS server + will be longer next time + :param whois_server: The WHOIS server the limit has been exceeded for + """ + self.servers_on_cool_down[whois_server].double_cool_down() + def get_time_difference(self): """ Get the difference in time between te last time this was called diff --git a/pythonwhois/ratelimit/cool_down_tracker.py b/pythonwhois/ratelimit/cool_down_tracker.py index 98ceca8..cf42927 100644 --- a/pythonwhois/ratelimit/cool_down_tracker.py +++ b/pythonwhois/ratelimit/cool_down_tracker.py @@ -26,6 +26,12 @@ def use_whois_server(self): It will set the cool down, based on the amount of requests that already have been made """ self.request_count += 1 + self.start_cool_down() + + def start_cool_down(self): + """ + Start a new cool_down + """ if self.max_requests_reached(self.max_requests_day): self.current_cool_down = 86400 elif self.max_requests_reached(self.max_requests_hour): @@ -50,3 +56,11 @@ def max_requests_reached(self, limit): :return: True if the limit has been reached, false if not """ return limit is not None and self.request_count % limit == 0 + + def double_cool_down(self): + """ + Double the cool down length, as in, the cool down length that is always used, + not the current cool down that is going on. + """ + self.cool_down_length *= 2 + self.start_cool_down() diff --git a/pythonwhois/response/whois_response.py b/pythonwhois/response/whois_response.py index 5268797..8c0942f 100644 --- a/pythonwhois/response/whois_response.py +++ b/pythonwhois/response/whois_response.py @@ -1,9 +1,9 @@ -class WhoisResponse: +class RawWhoisResponse: """ Holder class for WHOIS responses. Is capable of marking the retrieval as a failure. """ - def __init__(self, response=None, request_failure=False, cool_down_failure=False, server_is_dead=False): + def __init__(self, response="", request_failure=False, cool_down_failure=False, server_is_dead=False): """ Hold the WHOIS response :param response: The received response, if any @@ -14,3 +14,18 @@ def __init__(self, response=None, request_failure=False, cool_down_failure=False self.request_failure = request_failure self.cool_down_failure = cool_down_failure self.server_is_dead = server_is_dead + + if len(response) > 0: + self.request_failure = self.check_for_exceeded_limit() + + def check_for_exceeded_limit(self): + """ + Check whether the limit has been exceeded. This is done by + looking at the size of the response. If it has less than 4 lines, + it is probably not a useful response and most likely a message about spamming + the WHOIS server + :return: True if the message is really short, false if not + """ + if self.response is not None and len(self.response.splitlines()) < 4: + return True + return False diff --git a/setup.py b/setup.py index 11b4f08..b02bc83 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ description='Module for retrieving and parsing the WHOIS data for a domain. Supports most domains. No dependencies.', author='Sander ten Hoor, original by Sven Slootweg', url='https://github.com/MasterFenrir/whois-oracle', - packages=['pythonwhois', 'pythonwhois.caching', 'pythonwhois.ratelimit'], + packages=['pythonwhois', 'pythonwhois.caching', 'pythonwhois.ratelimit', 'pythonwhois.response'], package_data={"pythonwhois": ["*.dat"]}, install_requires=['argparse'], provides=['pythonwhois'], From 744ee349b0f7161af9e7aa2cc067616a78066be9 Mon Sep 17 00:00:00 2001 From: Sander Date: Wed, 15 Jun 2016 14:05:33 +0200 Subject: [PATCH 3/5] REF: Renamed whois_response.py to raw_whois_response.py REF: Made timeout an argument, but with a default value --- pythonwhois/net.py | 11 ++++++----- .../{whois_response.py => raw_whois_response.py} | 0 2 files changed, 6 insertions(+), 5 deletions(-) rename pythonwhois/response/{whois_response.py => raw_whois_response.py} (100%) diff --git a/pythonwhois/net.py b/pythonwhois/net.py index 3c01132..83ccdee 100644 --- a/pythonwhois/net.py +++ b/pythonwhois/net.py @@ -7,7 +7,7 @@ from pythonwhois.caching.whois_server_cache import server_cache from pythonwhois.ratelimit.cool_down import CoolDown -from pythonwhois.response.whois_response import RawWhoisResponse +from pythonwhois.response.raw_whois_response import RawWhoisResponse incomplete_result_message = "THE_WHOIS_ORACLE_INCOMPLETE_RESULT" @@ -75,7 +75,7 @@ def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=Fals # That's probably as far as we can go, the road ends here return build_return_value(with_server_list, new_list, server_list) elif whois_response.request_failure: - # Mark this result as incomplete, so we can try again later + # Mark this result as incomplete, so we can try again later but still use the data if we have any new_list = [incomplete_result_message] + previous cool_down_tracker.warn_limit_exceeded(target_server) return build_return_value(with_server_list, new_list, server_list) @@ -213,17 +213,18 @@ def get_root_server(domain): return "" -def whois_request(domain, server, port=43): +def whois_request(domain, server, port=43, timeout=10): """ - Request WHOIS information. Has a timeout of 10 seconds + Request WHOIS information. :param domain: The domain to request WHOIS information for :param server: The WHOIS server to use :param port: The port to use, 43 by default + :param timeout: The length of the time out, 10 seconds by default :return: A WHOIS response containing either the result, or containing information about the failure """ try: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - sock.settimeout(10) + sock.settimeout(timeout) sock.connect((server, port)) sock.send(("%s\r\n" % domain).encode("utf-8")) buff = b"" diff --git a/pythonwhois/response/whois_response.py b/pythonwhois/response/raw_whois_response.py similarity index 100% rename from pythonwhois/response/whois_response.py rename to pythonwhois/response/raw_whois_response.py From 2d5e2cac71462307ef240c7e2cabf589a7dcb57c Mon Sep 17 00:00:00 2001 From: Sander Date: Wed, 15 Jun 2016 17:37:30 +0200 Subject: [PATCH 4/5] REF: Removed the fix for parsing empty responses and moved it to the whois application that uses this REF: Processed Wytse his comments --- pythonwhois/net.py | 6 +++--- pythonwhois/parse.py | 2 -- pythonwhois/ratelimit/cool_down.py | 6 +++--- pythonwhois/response/raw_whois_response.py | 10 ++++------ 4 files changed, 10 insertions(+), 14 deletions(-) diff --git a/pythonwhois/net.py b/pythonwhois/net.py index 83ccdee..4e516a1 100644 --- a/pythonwhois/net.py +++ b/pythonwhois/net.py @@ -79,7 +79,7 @@ def get_whois_raw(domain, server="", previous=None, rfc3490=True, never_cut=Fals new_list = [incomplete_result_message] + previous cool_down_tracker.warn_limit_exceeded(target_server) return build_return_value(with_server_list, new_list, server_list) - elif whois_response.cool_down_failure: + elif whois_response.still_in_cool_down: new_list = [incomplete_result_message] + previous return build_return_value(with_server_list, new_list, server_list) @@ -110,7 +110,7 @@ def build_return_value(with_server_list, responses, server_list): :param server_list: The server list :return: A list of responses without the empty ones, plus possibly a server list """ - non_empty_responses = filter((lambda text: text is not '' and text is not None), responses) + non_empty_responses = filter((lambda text: text), responses) if with_server_list: return non_empty_responses, server_list @@ -129,7 +129,7 @@ def query_server(whois_server, query): if whois_server and cool_down_tracker.try_to_use_server(whois_server): return whois_request(query, whois_server) else: - return RawWhoisResponse(cool_down_failure=True) + return RawWhoisResponse(still_in_cool_down=True) def prepare_query(whois_server, domain): diff --git a/pythonwhois/parse.py b/pythonwhois/parse.py index 1125587..025211c 100644 --- a/pythonwhois/parse.py +++ b/pythonwhois/parse.py @@ -702,8 +702,6 @@ def filter_characters(string, delete_characters): def parse_raw_whois(raw_data, normalized=None, never_query_handles=True, handle_server=""): - if len(raw_data) == 0: - return {} normalized = normalized or [] data = {} diff --git a/pythonwhois/ratelimit/cool_down.py b/pythonwhois/ratelimit/cool_down.py index 98c5929..b658864 100644 --- a/pythonwhois/ratelimit/cool_down.py +++ b/pythonwhois/ratelimit/cool_down.py @@ -14,7 +14,7 @@ def __init__(self): Creates a dictionary for storing cool downs. """ self.servers_on_cool_down = {} - self.default_cool_down_length = 2.0 + self.default_cool_down_seconds = 2.0 self.last_request_time = datetime.datetime.now() def can_use_server(self, whois_server): @@ -39,7 +39,7 @@ def try_to_use_server(self, whois_server): return False if whois_server not in self.servers_on_cool_down: - self.servers_on_cool_down[whois_server] = CoolDownTracker(self.default_cool_down_length) + self.servers_on_cool_down[whois_server] = CoolDownTracker(self.default_cool_down_seconds) self.servers_on_cool_down[whois_server].use_whois_server() return True @@ -78,6 +78,6 @@ def set_cool_down_config(self, path_to_file): the cool down dictionary. :param path_to_file: The path to the configuration file """ - cool_down_config = CoolDownConfig(path_to_file, self.default_cool_down_length) + cool_down_config = CoolDownConfig(path_to_file, self.default_cool_down_seconds) for whois_server in cool_down_config.get_sections(): self.servers_on_cool_down[whois_server] = cool_down_config.get_cool_down_tracker_for_server(whois_server) diff --git a/pythonwhois/response/raw_whois_response.py b/pythonwhois/response/raw_whois_response.py index 8c0942f..e0c3a50 100644 --- a/pythonwhois/response/raw_whois_response.py +++ b/pythonwhois/response/raw_whois_response.py @@ -3,16 +3,16 @@ class RawWhoisResponse: Holder class for WHOIS responses. Is capable of marking the retrieval as a failure. """ - def __init__(self, response="", request_failure=False, cool_down_failure=False, server_is_dead=False): + def __init__(self, response="", request_failure=False, still_in_cool_down=False, server_is_dead=False): """ Hold the WHOIS response :param response: The received response, if any :param request_failure: If the request was a failure - :param cool_down_failure: Whether the server was unavailable due to a cool down or not + :param still_in_cool_down: Whether the server was unavailable due to a cool down or not """ self.response = response self.request_failure = request_failure - self.cool_down_failure = cool_down_failure + self.still_in_cool_down = still_in_cool_down self.server_is_dead = server_is_dead if len(response) > 0: @@ -26,6 +26,4 @@ def check_for_exceeded_limit(self): the WHOIS server :return: True if the message is really short, false if not """ - if self.response is not None and len(self.response.splitlines()) < 4: - return True - return False + return self.response is not None and len(self.response.splitlines()) < 4 From b0f201e9797179e61c31076009d9750d1ee4253e Mon Sep 17 00:00:00 2001 From: Sander Date: Thu, 16 Jun 2016 10:01:18 +0200 Subject: [PATCH 5/5] ENH: Wording in a comment --- pythonwhois/ratelimit/cool_down_tracker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pythonwhois/ratelimit/cool_down_tracker.py b/pythonwhois/ratelimit/cool_down_tracker.py index cf42927..44a5650 100644 --- a/pythonwhois/ratelimit/cool_down_tracker.py +++ b/pythonwhois/ratelimit/cool_down_tracker.py @@ -60,7 +60,7 @@ def max_requests_reached(self, limit): def double_cool_down(self): """ Double the cool down length, as in, the cool down length that is always used, - not the current cool down that is going on. + not the current cool down that happening. """ self.cool_down_length *= 2 self.start_cool_down()