Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make it work with python 3.x #4

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 41 additions & 32 deletions pythonwhois/parse.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,13 @@
import re, datetime
from __future__ import print_function
import re, sys, datetime

if sys.version_info[0] >= 3:
def iteritems(d):
return iter(d.items())
else:
def iteritems(d):
return d.iteritems()


grammar = {
"_data": {
Expand Down Expand Up @@ -142,8 +151,8 @@ def parse_raw_whois(raw_data, normalized=[]):
raw_data = [segment.replace("\r", "") for segment in raw_data] # Carriage returns are the devil

for segment in raw_data:
for rule_key, rule_regexes in grammar['_data'].iteritems():
if data.has_key(rule_key) == False:
for rule_key, rule_regexes in iteritems(grammar['_data']):
if rule_key not in data:
for line in segment.splitlines():
for regex in rule_regexes:
result = re.search(regex, line, re.IGNORECASE)
Expand All @@ -153,7 +162,7 @@ def parse_raw_whois(raw_data, normalized=[]):
if val != "":
try:
data[rule_key].append(val)
except KeyError, e:
except KeyError as e:
data[rule_key] = [val]

# Whois.com is a bit special... Fabulous.com also seems to use this format.
Expand All @@ -163,7 +172,7 @@ def parse_raw_whois(raw_data, normalized=[]):
for match in re.findall("[ ]+(.+)\n", chunk):
try:
data["nameservers"].append(match.strip())
except KeyError, e:
except KeyError as e:
data["nameservers"] = [match.strip()]
# Nominet also needs some special attention
match = re.search(" Registrar:\n (.+)\n", segment)
Expand All @@ -176,7 +185,7 @@ def parse_raw_whois(raw_data, normalized=[]):
match = match.split()[0]
try:
data["nameservers"].append(match.strip())
except KeyError, e:
except KeyError as e:
data["nameservers"] = [match.strip()]
# .am plays the same game
match = re.search(" DNS servers:([\s\S]*?\n)\n", segment)
Expand All @@ -186,7 +195,7 @@ def parse_raw_whois(raw_data, normalized=[]):
match = match.split()[0]
try:
data["nameservers"].append(match.strip())
except KeyError, e:
except KeyError as e:
data["nameservers"] = [match.strip()]
# SIDN isn't very standard either.
match = re.search("Registrar:\n\s+(\S.*)", segment)
Expand All @@ -199,7 +208,7 @@ def parse_raw_whois(raw_data, normalized=[]):
match = match.split()[0]
try:
data["nameservers"].append(match.strip())
except KeyError, e:
except KeyError as e:
data["nameservers"] = [match.strip()]
# The .ie WHOIS server puts ambiguous status information in an unhelpful order
match = re.search('ren-status:\s*(.+)', segment)
Expand All @@ -212,34 +221,34 @@ def parse_raw_whois(raw_data, normalized=[]):
try:
data['expiration_date'] = remove_duplicates(data['expiration_date'])
data['expiration_date'] = parse_dates(data['expiration_date'])
except KeyError, e:
except KeyError as e:
pass # Not present

try:
data['creation_date'] = remove_duplicates(data['creation_date'])
data['creation_date'] = parse_dates(data['creation_date'])
except KeyError, e:
except KeyError as e:
pass # Not present

try:
data['updated_date'] = remove_duplicates(data['updated_date'])
data['updated_date'] = parse_dates(data['updated_date'])
except KeyError, e:
except KeyError as e:
pass # Not present

try:
data['nameservers'] = remove_duplicates([ns.rstrip(".") for ns in data['nameservers']])
except KeyError, e:
except KeyError as e:
pass # Not present

try:
data['emails'] = remove_duplicates(data['emails'])
except KeyError, e:
except KeyError as e:
pass # Not present

try:
data['registrar'] = remove_duplicates(data['registrar'])
except KeyError, e:
except KeyError as e:
pass # Not present

# Remove e-mail addresses if they are already listed for any of the contacts
Expand All @@ -248,11 +257,11 @@ def parse_raw_whois(raw_data, normalized=[]):
if data["contacts"][contact] is not None:
try:
known_emails.append(data["contacts"][contact]["email"])
except KeyError, e:
except KeyError as e:
pass # No e-mail recorded for this contact...
try:
data['emails'] = [email for email in data["emails"] if email not in known_emails]
except KeyError, e:
except KeyError as e:
pass # Not present

for key in data.keys():
Expand Down Expand Up @@ -281,7 +290,7 @@ def normalize_data(data, normalized):
else:
data[key] = [normalize_name(item, abbreviation_threshold=threshold, length_threshold=1) for item in data[key]]

for contact_type, contact in data['contacts'].iteritems():
for contact_type, contact in iteritems(data['contacts']):
if contact is not None:
for key in ("email",):
if key in contact and contact[key] is not None and (normalized == True or key in normalized):
Expand All @@ -301,7 +310,7 @@ def normalize_data(data, normalized):
for key in contact.keys():
try:
contact[key] = contact[key].strip(", ")
except AttributeError, e:
except AttributeError as e:
pass # Not a string
return data

Expand Down Expand Up @@ -368,54 +377,54 @@ def parse_dates(dates):
# This will require some more guesswork - some WHOIS servers present the name of the month
try:
month = int(result.group("month"))
except ValueError, e:
except ValueError as e:
# Apparently not a number. Look up the corresponding number.
try:
month = grammar['_months'][result.group("month").lower()]
except KeyError, e:
except KeyError as e:
# Unknown month name, default to 0
month = 0

try:
hour = int(result.group("hour"))
except IndexError, e:
except IndexError as e:
hour = 0
except TypeError, e:
except TypeError as e:
hour = 0

try:
minute = int(result.group("minute"))
except IndexError, e:
except IndexError as e:
minute = 0
except TypeError, e:
except TypeError as e:
minute = 0

try:
second = int(result.group("second"))
except IndexError, e:
except IndexError as e:
second = 0
except TypeError, e:
except TypeError as e:
second = 0

break
except ValueError, e:
except ValueError as e:
# Something went horribly wrong, maybe there is no valid date present?
year = 0
month = 0
day = 0
hour = 0
minute = 0
second = 0
print e.message
print(e.message)
try:
if year > 0:
try:
parsed_dates.append(datetime.datetime(year, month, day, hour, minute, second))
except ValueError, e:
except ValueError as e:
# We might have gotten the day and month the wrong way around, let's try it the other way around
# If you're not using an ISO-standard date format, you're an evil registrar!
parsed_dates.append(datetime.datetime(year, day, month, hour, minute, second))
except UnboundLocalError, e:
except UnboundLocalError as e:
pass

if len(parsed_dates) > 0:
Expand Down Expand Up @@ -623,7 +632,7 @@ def parse_registrants(data):
admin_contact = data_reference
break

# Post-processing
# Post-processing
for obj in (registrant, tech_contact, billing_contact, admin_contact):
if obj is not None:
for key in obj.keys():
Expand All @@ -642,7 +651,7 @@ def parse_registrants(data):
try:
street_items.append(obj["street%d" % i])
del obj["street%d" % i]
except KeyError, e:
except KeyError as e:
break
i += 1
obj["street"] = "\n".join(street_items)
Expand Down