datatracker/tzparse.py
2019-06-27 14:40:54 +00:00

186 lines
7 KiB
Python
Executable file

# Copyright The IETF Trust 2012-2019, All Rights Reserved
#!/usr/bin/env python
"""
NAME
tzparse
SYNOPSIS
>>> tzparse("2008-09-08 14:40:35 +0200", "%Y-%m-%d %H:%M:%S %Z")
datetime.datetime(2008, 9, 8, 14, 40, 35, tzinfo=pytz.FixedOffset(120))
>>> print(tzparse("14:40:35 CEST, 08 Sep 2008", "%H:%M:%S %Z, %d %b %Y"))
2008-09-08 14:40:35+02:00
DESCRIPTION
This describes the python 'tzparse' module. It exports only one function: tzparse().
tzparse() parses a string according to a specified format, exactly as time.strptime()
does, but with the added capability to parse most common timezone specifications,
such as 'UTC', the standard timezones ('NST', 'EST', 'CST', 'MST', 'PST', 'HNY'
[North America], 'WET', 'CET', 'EET', 'MSK' [Europe], and more), the summer timezones
('CEST', 'EEST', 'EDT', PDT' etc.), military timezones ('A' .. 'Z') and numeric
timezone indications ('+0200', '-0700', '-03:30' etc.).
The time zone specification may be placed anywhere, not only at the end.
tzparse() calls time.strptime() to parse everything except the timezone. To parse
the timezone, it first tries to use the pytz module, but if that doesn't give
any joy, it falls back to a hardcoded list of common time zone abbreviations and
their offset from UTC.
BUGS
* tzparse() cannot parse all valid RFC 3339 formats: it doesn't extract
fractional seconds, and the underlying time.strptime() doesn't parse fractional
seconds.
* Parsing according to format specifications using the generic %c, %x and %X
specifiers will only succeed if there are explicit delimiting characters
between the %Z specifier and the %c, %x or %X part.
COPYRIGHT
Copyright 2009 Henrik Levkowetz
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import re
import time
from datetime import datetime as Datetime, timedelta as Timedelta # For re-export. pyflakes:ignore
import pytz
tzdef = {
"A": "+0100", "ACDT": "+1030", "ACST": "+0930", "ADT": "-0300",
"AEDT": "+1100", "AEST": "+1000", "AKDT": "-0800", "AKST": "-0900",
"AST": "-0400", "AWDT": "+0900", "AWST": "+0800", "B": "+0200",
"BST": "+0100", "C": "+0300", "CDT": "+1030", "CDT": "-0500",
"CEDT": "+0200", "CEST": "+0200", "CET": "+0100", "CST": "+1030",
"CST": "+0930", "CST": "-0600", "CXT": "+0700", "D": "+0400",
"E": "+0500", "EDT": "+1100", "EDT": "-0400", "EEDT": "+0300",
"EEST": "+0300", "EET": "+0200", "EST": "+1100", "EST": "+1000",
"EST": "-0500", "F": "+0600", "G": "+0700", "GMT": "+0000",
"H": "+0800", "HAA": "-0300", "HAC": "-0500", "HADT": "-0900",
"HAE": "-0400", "HAP": "-0700", "HAR": "-0600", "HAST": "-1000",
"HAT": "-0230", "HAY": "-0800", "HNA": "-0400", "HNC": "-0600",
"HNE": "-0500", "HNP": "-0800", "HNR": "-0700", "HNT": "-0330",
"HNY": "-0900", "I": "+0900", "IST": "+0100", "K": "+1000",
"L": "+1100", "M": "+1200", "MDT": "-0600", "MESZ": "+0200",
"MEZ": "+0100", "MSD": "+0400", "MSK": "+0300", "MST": "-0700",
"N": "-0100", "NDT": "-0230", "NFT": "+1130", "NST": "-0330",
"O": "-0200", "P": "-0300", "PDT": "-0700", "PST": "-0800",
"Q": "-0400", "R": "-0500", "S": "-0600", "T": "-0700",
"U": "-0800", "UTC": "+0000", "V": "-0900", "W": "-1000",
"WDT": "+0900", "WEDT": "+0100", "WEST": "+0100", "WET": "+0000",
"WST": "+0900", "WST": "+0800", "X": "-1100", "Y": "-1200",
"Z": "+0000",
}
def tzparse(string, format):
# It's surprising that there's no tz parsing capability in the python standard
# library...
"""
Given a time specification string and a format, tzparse() returns a localized
datetime.datetime.
>>> print(tzparse("9 Oct 2009 CEST 13:58", "%d %b %Y %Z %H:%M"))
2009-10-09 13:58:00+02:00
>>> print(tzparse("9 Oct 2009 13:58:00 Europe/Stockholm", "%d %b %Y %H:%M:%S %Z"))
2009-10-09 13:58:00+02:00
>>> print(tzparse("9 Oct 2009 13:58:00 +0200", "%d %b %Y %H:%M:%S %Z"))
2009-10-09 13:58:00+02:00
>>> print(tzparse("Fri, 9 Oct 2009 13:58:00 +0200", "%a, %d %b %Y %H:%M:%S %Z"))
2009-10-09 13:58:00+02:00
>>> print(tzparse("2009-10-09 13:58:00 EST", '%Y-%m-%d %H:%M:%S %Z'))
2009-10-09 13:58:00-05:00
>>> print(tzparse("2009-10-09 13:58:00+02:00", "%Y-%m-%d %H:%M:%S%Z"))
2009-10-09 13:58:00+02:00
>>> print(tzparse("1985-04-12T23:20:50Z", "%Y-%m-%dT%H:%M:%S%Z"))
1985-04-12 23:20:50+00:00
>>> print(tzparse("1996-12-19T16:39:57-08:00", "%Y-%m-%dT%H:%M:%S%Z"))
1996-12-19 16:39:57-08:00
>>> print(tzparse("1996-12-19T16:39:57", "%Y-%m-%dT%H:%M:%S"))
1996-12-19 16:39:57+01:00
"""
if not "%Z" in format:
timetuple = time.strptime(string, format)
tzstr = time.tzname[0]
else:
# extract the %Z part from the format and build a pattern to extract it
# from the string, too.
def fmt2pat(s):
s = re.sub("%[dHIjmMSUwWyY]", r"\\d+", s)
s = re.sub("%[aAbBp]", r"\\w+", s)
s = re.sub("%[cxX]", ".+", s)
s = s.replace("%%", "%")
return s
frontfmt, backfmt = format.split("%Z")
frontpat = "^" + fmt2pat(frontfmt)
backpat = fmt2pat(backfmt) + "$"
frontstr = re.search(frontpat, string) and re.search(frontpat, string).group(0) or ""
backstr = re.search(backpat, string) and re.search(backpat, string).group(0) or ""
tzstr = string.replace(frontstr, "").replace(backstr, "") # This will fail is backstr occurs twice
timetuple = time.strptime(frontstr+backstr, frontfmt+backfmt)
dt = Datetime(*timetuple[:6])
if not tzstr:
tzstr = time.tzname[0]
#raise ValueError("No timezone string found in '%s', but format contained %Z: '%s'."%(string, format))
try:
tz = pytz.timezone(tzstr)
except KeyError:
if tzstr in tzdef:
# if we know the offset of the abbreviation, fall back to that
tzstr = tzdef[tzstr]
if re.search("^[+-][0-9][0-9]:?[0-9][0-9]$", tzstr):
if ":" in tzstr:
tzstr = tzstr[:3]+tzstr[4:]
# convert numeric timezone to minutes
sign = tzstr[0]
h = int(tzstr[1:3])
m = h*60 + int(tzstr[3:5])
if sign == "-":
m = -m
tz = pytz.FixedOffset(m)
else:
raise ValueError("Unknown timezone '%s'" % tzstr)
dt = tz.localize(dt)
return dt
if __name__ == "__main__":
import sys
if len(sys.argv[1:]) == 2:
print(tzparse(sys.argv[1], sys.argv[2]))
else:
print("Running module tests:\n")
import doctest
print(doctest.testmod())