Skip to content

Commit 6e37589

Browse files
committed
PERF: leverage tzlocal package to provide 2000x speedup for dateutil.tz.tzlocal operations
1 parent e3b0950 commit 6e37589

17 files changed

+1139
-11
lines changed

LICENSES/TZLOCAL_LICENSE

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
Copyright 2011-2017 Lennart Regebro
2+
3+
Permission is hereby granted, free of charge, to any person obtaining a copy
4+
of this software and associated documentation files (the "Software"), to deal
5+
in the Software without restriction, including without limitation the rights
6+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7+
copies of the Software, and to permit persons to whom the Software is
8+
furnished to do so, subject to the following conditions:
9+
10+
The above copyright notice and this permission notice shall be included in
11+
all copies or substantial portions of the Software.
12+
13+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
19+
SOFTWARE.

asv_bench/benchmarks/timeseries.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ def time_to_pydatetime(self, index_type):
6262

6363
class TzLocalize(object):
6464

65-
params = [None, 'US/Eastern', 'UTC', dateutil.tz.tzutc()]
65+
params = [None, 'US/Eastern', 'UTC', dateutil.tz.tzutc(),
66+
dateutil.tz.tzlocal()]
6667
param_names = 'tz'
6768

6869
def setup(self, tz):
@@ -394,7 +395,8 @@ def time_dup_string_tzoffset_dates(self, cache):
394395

395396
class DatetimeAccessor(object):
396397

397-
params = [None, 'US/Eastern', 'UTC', dateutil.tz.tzutc()]
398+
params = [None, 'US/Eastern', 'UTC', dateutil.tz.tzutc(),
399+
dateutil.tz.tzlocal()]
398400
param_names = 'tz'
399401

400402
def setup(self, tz):

pandas/_libs/src/__init__.py

Whitespace-only changes.

pandas/_libs/src/tzlocal/__init__.py

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import sys
2+
if sys.platform == 'win32':
3+
from pandas._libs.src.tzlocal.win32 import get_localzone, reload_localzone
4+
else:
5+
from pandas._libs.src.tzlocal.unix import get_localzone, reload_localzone

pandas/_libs/src/tzlocal/unix.py

+164
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
import os
2+
import pytz
3+
import re
4+
5+
from pandas._libs.src.tzlocal import utils
6+
7+
_cache_tz = None
8+
9+
10+
def _tz_from_env(tzenv):
11+
if tzenv[0] == ':':
12+
tzenv = tzenv[1:]
13+
14+
# TZ specifies a file
15+
if os.path.exists(tzenv):
16+
with open(tzenv, 'rb') as tzfile:
17+
return pytz.tzfile.build_tzinfo('local', tzfile)
18+
19+
# TZ specifies a zoneinfo zone.
20+
try:
21+
tz = pytz.timezone(tzenv)
22+
# That worked, so we return this:
23+
return tz
24+
except pytz.UnknownTimeZoneError:
25+
raise pytz.UnknownTimeZoneError(
26+
"tzlocal() does not support non-zoneinfo timezones like %s. \n"
27+
"Please use a timezone in the form of Continent/City")
28+
29+
30+
def _try_tz_from_env():
31+
tzenv = os.environ.get('TZ')
32+
if tzenv:
33+
try:
34+
return _tz_from_env(tzenv)
35+
except pytz.UnknownTimeZoneError:
36+
pass
37+
38+
39+
def _get_localzone(_root='/'):
40+
"""Tries to find the local timezone configuration.
41+
42+
This method prefers finding the timezone name and passing that to pytz,
43+
over passing in the localtime file, as in the later case the zoneinfo
44+
name is unknown.
45+
46+
The parameter _root makes the function look for files like /etc/localtime
47+
beneath the _root directory. This is primarily used by the tests.
48+
In normal usage you call the function without parameters."""
49+
50+
tzenv = _try_tz_from_env()
51+
if tzenv:
52+
return tzenv
53+
54+
# Now look for distribution specific configuration files
55+
# that contain the timezone name.
56+
for configfile in ('etc/timezone', 'var/db/zoneinfo'):
57+
tzpath = os.path.join(_root, configfile)
58+
try:
59+
with open(tzpath, 'rb') as tzfile:
60+
data = tzfile.read()
61+
62+
# Issue #3 was that /etc/timezone was a zoneinfo file.
63+
# That's a misconfiguration, but we need to handle it gracefully:
64+
if data[:5] == b'TZif2':
65+
continue
66+
67+
etctz = data.strip().decode()
68+
if not etctz:
69+
# Empty file, skip
70+
continue
71+
for etctz in data.decode().splitlines():
72+
# Get rid of host definitions and comments:
73+
if ' ' in etctz:
74+
etctz, dummy = etctz.split(' ', 1)
75+
if '#' in etctz:
76+
etctz, dummy = etctz.split('#', 1)
77+
if not etctz:
78+
continue
79+
return pytz.timezone(etctz.replace(' ', '_'))
80+
except IOError:
81+
# File doesn't exist or is a directory
82+
continue
83+
84+
# CentOS has a ZONE setting in /etc/sysconfig/clock,
85+
# OpenSUSE has a TIMEZONE setting in /etc/sysconfig/clock and
86+
# Gentoo has a TIMEZONE setting in /etc/conf.d/clock
87+
# We look through these files for a timezone:
88+
89+
zone_re = re.compile(r'\s*ZONE\s*=\s*\"')
90+
timezone_re = re.compile(r'\s*TIMEZONE\s*=\s*\"')
91+
end_re = re.compile('\"')
92+
93+
for filename in ('etc/sysconfig/clock', 'etc/conf.d/clock'):
94+
tzpath = os.path.join(_root, filename)
95+
try:
96+
with open(tzpath, 'rt') as tzfile:
97+
data = tzfile.readlines()
98+
99+
for line in data:
100+
# Look for the ZONE= setting.
101+
match = zone_re.match(line)
102+
if match is None:
103+
# No ZONE= setting. Look for the TIMEZONE= setting.
104+
match = timezone_re.match(line)
105+
if match is not None:
106+
# Some setting existed
107+
line = line[match.end():]
108+
etctz = line[:end_re.search(line).start()]
109+
110+
# We found a timezone
111+
return pytz.timezone(etctz.replace(' ', '_'))
112+
except IOError:
113+
# File doesn't exist or is a directory
114+
continue
115+
116+
# systemd distributions use symlinks that include the zone name,
117+
# see manpage of localtime(5) and timedatectl(1)
118+
tzpath = os.path.join(_root, 'etc/localtime')
119+
if os.path.exists(tzpath) and os.path.islink(tzpath):
120+
tzpath = os.path.realpath(tzpath)
121+
start = tzpath.find("/")+1
122+
while start is not 0:
123+
tzpath = tzpath[start:]
124+
try:
125+
return pytz.timezone(tzpath)
126+
except pytz.UnknownTimeZoneError:
127+
pass
128+
start = tzpath.find("/")+1
129+
130+
# Are we under Termux on Android? It's not officially supported, because
131+
# there is no reasonable way to run tests for this, but let's make an effort.
132+
if os.path.exists('/system/bin/getprop'):
133+
import subprocess
134+
androidtz = subprocess.check_output(['getprop', 'persist.sys.timezone'])
135+
return pytz.timezone(androidtz.strip().decode())
136+
137+
# No explicit setting existed. Use localtime
138+
for filename in ('etc/localtime', 'usr/local/etc/localtime'):
139+
tzpath = os.path.join(_root, filename)
140+
141+
if not os.path.exists(tzpath):
142+
continue
143+
with open(tzpath, 'rb') as tzfile:
144+
return pytz.tzfile.build_tzinfo('local', tzfile)
145+
146+
raise pytz.UnknownTimeZoneError('Can not find any timezone configuration')
147+
148+
149+
def get_localzone():
150+
"""Get the computers configured local timezone, if any."""
151+
global _cache_tz
152+
if _cache_tz is None:
153+
_cache_tz = _get_localzone()
154+
155+
utils.assert_tz_offset(_cache_tz)
156+
return _cache_tz
157+
158+
159+
def reload_localzone():
160+
"""Reload the cached localzone. You need to call this if the timezone has changed."""
161+
global _cache_tz
162+
_cache_tz = _get_localzone()
163+
utils.assert_tz_offset(_cache_tz)
164+
return _cache_tz

pandas/_libs/src/tzlocal/utils.py

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# -*- coding: utf-8 -*-
2+
import datetime
3+
4+
5+
def get_system_offset():
6+
"""Get system's timezone offset using built-in library time.
7+
8+
For the Timezone constants (altzone, daylight, timezone, and tzname), the
9+
value is determined by the timezone rules in effect at module load time or
10+
the last time tzset() is called and may be incorrect for times in the past.
11+
12+
To keep compatibility with Windows, we're always importing time module here.
13+
"""
14+
import time
15+
if time.daylight and time.localtime().tm_isdst > 0:
16+
return -time.altzone
17+
else:
18+
return -time.timezone
19+
20+
21+
def get_tz_offset(tz):
22+
"""Get timezone's offset using built-in function datetime.utcoffset()."""
23+
return int(datetime.datetime.now(tz).utcoffset().total_seconds())
24+
25+
26+
def assert_tz_offset(tz):
27+
"""Assert that system's timezone offset equals to the timezone offset found.
28+
29+
If they don't match, we probably have a misconfiguration, for example, an
30+
incorrect timezone set in /etc/timezone file in systemd distributions."""
31+
tz_offset = get_tz_offset(tz)
32+
system_offset = get_system_offset()
33+
if tz_offset != system_offset:
34+
msg = ('Timezone offset does not match system offset: {0} != {1}. '
35+
'Please, check your config files.').format(
36+
tz_offset, system_offset
37+
)
38+
raise ValueError(msg)

pandas/_libs/src/tzlocal/win32.py

+104
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
try:
2+
import _winreg as winreg
3+
except ImportError:
4+
import winreg
5+
6+
import pytz
7+
8+
from pandas._libs.src.tzlocal.windows_tz import win_tz
9+
from pandas._libs.src.tzlocal import utils
10+
11+
_cache_tz = None
12+
13+
14+
def valuestodict(key):
15+
"""Convert a registry key's values to a dictionary."""
16+
dict = {}
17+
size = winreg.QueryInfoKey(key)[1]
18+
for i in range(size):
19+
data = winreg.EnumValue(key, i)
20+
dict[data[0]] = data[1]
21+
return dict
22+
23+
24+
def get_localzone_name():
25+
# Windows is special. It has unique time zone names (in several
26+
# meanings of the word) available, but unfortunately, they can be
27+
# translated to the language of the operating system, so we need to
28+
# do a backwards lookup, by going through all time zones and see which
29+
# one matches.
30+
handle = winreg.ConnectRegistry(None, winreg.HKEY_LOCAL_MACHINE)
31+
32+
TZLOCALKEYNAME = r"SYSTEM\CurrentControlSet\Control\TimeZoneInformation"
33+
localtz = winreg.OpenKey(handle, TZLOCALKEYNAME)
34+
keyvalues = valuestodict(localtz)
35+
localtz.Close()
36+
37+
if 'TimeZoneKeyName' in keyvalues:
38+
# Windows 7 (and Vista?)
39+
40+
# For some reason this returns a string with loads of NUL bytes at
41+
# least on some systems. I don't know if this is a bug somewhere, I
42+
# just work around it.
43+
tzkeyname = keyvalues['TimeZoneKeyName'].split('\x00', 1)[0]
44+
else:
45+
# Windows 2000 or XP
46+
47+
# This is the localized name:
48+
tzwin = keyvalues['StandardName']
49+
50+
# Open the list of timezones to look up the real name:
51+
TZKEYNAME = r"SOFTWARE\Microsoft\Windows NT\CurrentVersion\Time Zones"
52+
tzkey = winreg.OpenKey(handle, TZKEYNAME)
53+
54+
# Now, match this value to Time Zone information
55+
tzkeyname = None
56+
for i in range(winreg.QueryInfoKey(tzkey)[0]):
57+
subkey = winreg.EnumKey(tzkey, i)
58+
sub = winreg.OpenKey(tzkey, subkey)
59+
data = valuestodict(sub)
60+
sub.Close()
61+
try:
62+
if data['Std'] == tzwin:
63+
tzkeyname = subkey
64+
break
65+
except KeyError:
66+
# This timezone didn't have proper configuration.
67+
# Ignore it.
68+
pass
69+
70+
tzkey.Close()
71+
handle.Close()
72+
73+
if tzkeyname is None:
74+
raise LookupError('Can not find Windows timezone configuration')
75+
76+
timezone = win_tz.get(tzkeyname)
77+
if timezone is None:
78+
# Nope, that didn't work. Try adding "Standard Time",
79+
# it seems to work a lot of times:
80+
timezone = win_tz.get(tzkeyname + " Standard Time")
81+
82+
# Return what we have.
83+
if timezone is None:
84+
raise pytz.UnknownTimeZoneError('Can not find timezone ' + tzkeyname)
85+
86+
return timezone
87+
88+
89+
def get_localzone():
90+
"""Returns the zoneinfo-based tzinfo object that matches the Windows-configured timezone."""
91+
global _cache_tz
92+
if _cache_tz is None:
93+
_cache_tz = pytz.timezone(get_localzone_name())
94+
95+
utils.assert_tz_offset(_cache_tz)
96+
return _cache_tz
97+
98+
99+
def reload_localzone():
100+
"""Reload the cached localzone. You need to call this if the timezone has changed."""
101+
global _cache_tz
102+
_cache_tz = pytz.timezone(get_localzone_name())
103+
utils.assert_tz_offset(_cache_tz)
104+
return _cache_tz

0 commit comments

Comments
 (0)