Added a module which handles parsing of time strings which include timezone information, tzparse.py.
- Legacy-Id: 4975
This commit is contained in:
parent
8aa4922978
commit
1a887c1f7f
185
tzparse.py
Executable file
185
tzparse.py
Executable file
|
@ -0,0 +1,185 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
"""
|
||||
NAME
|
||||
tzparse
|
||||
|
||||
SYNOPSIS
|
||||
>>> tzparse("2008-09-08 14:40:35 +0200", "%Y-%m-%d %H:%M:%S %Z")
|
||||
datetime.datetime(2008, 9, 8, 14, 40, 35, tzinfo=pytz.FixedOffset(120))
|
||||
|
||||
>>> print tzparse("14:40:35 CEST, 08 Sep 2008", "%H:%M:%S %Z, %d %b %Y")
|
||||
2008-09-08 14:40:35+02:00
|
||||
|
||||
DESCRIPTION
|
||||
This describes the python 'tzparse' module. It exports only one function: tzparse().
|
||||
|
||||
tzparse() parses a string according to a specified format, exactly as time.strptime()
|
||||
does, but with the added capability to parse most common timezone specifications,
|
||||
such as 'UTC', the standard timezones ('NST', 'EST', 'CST', 'MST', 'PST', 'HNY'
|
||||
[North America], 'WET', 'CET', 'EET', 'MSK' [Europe], and more), the summer timezones
|
||||
('CEST', 'EEST', 'EDT', PDT' etc.), military timezones ('A' .. 'Z') and numeric
|
||||
timezone indications ('+0200', '-0700', '-03:30' etc.).
|
||||
|
||||
The time zone specification may be placed anywhere, not only at the end.
|
||||
|
||||
tzparse() calls time.strptime() to parse everything except the timezone. To parse
|
||||
the timezone, it first tries to use the pytz module, but if that doesn't give
|
||||
any joy, it falls back to a hardcoded list of common time zone abbreviations and
|
||||
their offset from UTC.
|
||||
|
||||
BUGS
|
||||
|
||||
* tzparse() cannot parse all valid RFC 3339 formats: it doesn't extract
|
||||
fractional seconds, and the underlying time.strptime() doesn't parse fractional
|
||||
seconds.
|
||||
|
||||
* Parsing according to format specifications using the generic %c, %x and %X
|
||||
specifiers will only succeed if there are explicit delimiting characters
|
||||
between the %Z specifier and the %c, %x or %X part.
|
||||
|
||||
COPYRIGHT
|
||||
Copyright 2009 Henrik Levkowetz
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
"""
|
||||
|
||||
import re
|
||||
import time
|
||||
from datetime import datetime as Datetime, timedelta as Timedelta
|
||||
import pytz
|
||||
|
||||
tzdef = {
|
||||
"A": "+0100", "ACDT": "+1030", "ACST": "+0930", "ADT": "-0300",
|
||||
"AEDT": "+1100", "AEST": "+1000", "AKDT": "-0800", "AKST": "-0900",
|
||||
"AST": "-0400", "AWDT": "+0900", "AWST": "+0800", "B": "+0200",
|
||||
"BST": "+0100", "C": "+0300", "CDT": "+1030", "CDT": "-0500",
|
||||
"CEDT": "+0200", "CEST": "+0200", "CET": "+0100", "CST": "+1030",
|
||||
"CST": "+0930", "CST": "-0600", "CXT": "+0700", "D": "+0400",
|
||||
"E": "+0500", "EDT": "+1100", "EDT": "-0400", "EEDT": "+0300",
|
||||
"EEST": "+0300", "EET": "+0200", "EST": "+1100", "EST": "+1000",
|
||||
"EST": "-0500", "F": "+0600", "G": "+0700", "GMT": "+0000",
|
||||
"H": "+0800", "HAA": "-0300", "HAC": "-0500", "HADT": "-0900",
|
||||
"HAE": "-0400", "HAP": "-0700", "HAR": "-0600", "HAST": "-1000",
|
||||
"HAT": "-0230", "HAY": "-0800", "HNA": "-0400", "HNC": "-0600",
|
||||
"HNE": "-0500", "HNP": "-0800", "HNR": "-0700", "HNT": "-0330",
|
||||
"HNY": "-0900", "I": "+0900", "IST": "+0100", "K": "+1000",
|
||||
"L": "+1100", "M": "+1200", "MDT": "-0600", "MESZ": "+0200",
|
||||
"MEZ": "+0100", "MSD": "+0400", "MSK": "+0300", "MST": "-0700",
|
||||
"N": "-0100", "NDT": "-0230", "NFT": "+1130", "NST": "-0330",
|
||||
"O": "-0200", "P": "-0300", "PDT": "-0700", "PST": "-0800",
|
||||
"Q": "-0400", "R": "-0500", "S": "-0600", "T": "-0700",
|
||||
"U": "-0800", "UTC": "+0000", "V": "-0900", "W": "-1000",
|
||||
"WDT": "+0900", "WEDT": "+0100", "WEST": "+0100", "WET": "+0000",
|
||||
"WST": "+0900", "WST": "+0800", "X": "-1100", "Y": "-1200",
|
||||
"Z": "+0000",
|
||||
}
|
||||
|
||||
|
||||
def tzparse(string, format):
|
||||
# It's surprising that there's no tz parsing capability in the python standard
|
||||
# library...
|
||||
|
||||
"""
|
||||
Given a time specification string and a format, tzparse() returns a localized
|
||||
datetime.datetime.
|
||||
|
||||
>>> print tzparse("9 Oct 2009 CEST 13:58", "%d %b %Y %Z %H:%M")
|
||||
2009-10-09 13:58:00+02:00
|
||||
|
||||
>>> print tzparse("9 Oct 2009 13:58:00 Europe/Stockholm", "%d %b %Y %H:%M:%S %Z")
|
||||
2009-10-09 13:58:00+02:00
|
||||
|
||||
>>> print tzparse("9 Oct 2009 13:58:00 +0200", "%d %b %Y %H:%M:%S %Z")
|
||||
2009-10-09 13:58:00+02:00
|
||||
|
||||
>>> print tzparse("Fri, 9 Oct 2009 13:58:00 +0200", "%a, %d %b %Y %H:%M:%S %Z")
|
||||
2009-10-09 13:58:00+02:00
|
||||
|
||||
>>> print tzparse("2009-10-09 13:58:00 EST", '%Y-%m-%d %H:%M:%S %Z')
|
||||
2009-10-09 13:58:00-05:00
|
||||
|
||||
>>> print tzparse("2009-10-09 13:58:00+02:00", "%Y-%m-%d %H:%M:%S%Z")
|
||||
2009-10-09 13:58:00+02:00
|
||||
|
||||
>>> print tzparse("1985-04-12T23:20:50Z", "%Y-%m-%dT%H:%M:%S%Z")
|
||||
1985-04-12 23:20:50+00:00
|
||||
|
||||
>>> print tzparse("1996-12-19T16:39:57-08:00", "%Y-%m-%dT%H:%M:%S%Z")
|
||||
1996-12-19 16:39:57-08:00
|
||||
|
||||
>>> print tzparse("1996-12-19T16:39:57", "%Y-%m-%dT%H:%M:%S")
|
||||
1996-12-19 16:39:57+01:00
|
||||
|
||||
"""
|
||||
|
||||
if not "%Z" in format:
|
||||
timetuple = time.strptime(string, format)
|
||||
tzstr = time.tzname[0]
|
||||
else:
|
||||
# extract the %Z part from the format and build a pattern to extract it
|
||||
# from the string, too.
|
||||
|
||||
def fmt2pat(s):
|
||||
s = re.sub("%[dHIjmMSUwWyY]", "\d+", s)
|
||||
s = re.sub("%[aAbBp]", "\w+", s)
|
||||
s = re.sub("%[cxX]", ".+", s)
|
||||
s = s.replace("%%", "%")
|
||||
return s
|
||||
|
||||
frontfmt, backfmt = format.split("%Z")
|
||||
frontpat = "^" + fmt2pat(frontfmt)
|
||||
backpat = fmt2pat(backfmt) + "$"
|
||||
|
||||
|
||||
frontstr = re.search(frontpat, string) and re.search(frontpat, string).group(0) or ""
|
||||
backstr = re.search(backpat, string) and re.search(backpat, string).group(0) or ""
|
||||
tzstr = string.replace(frontstr, "").replace(backstr, "") # This will fail is backstr occurs twice
|
||||
|
||||
timetuple = time.strptime(frontstr+backstr, frontfmt+backfmt)
|
||||
dt = Datetime(*timetuple[:6])
|
||||
|
||||
if not tzstr:
|
||||
tzstr = time.tzname[0]
|
||||
#raise ValueError("No timezone string found in '%s', but format contained %Z: '%s'."%(string, format))
|
||||
try:
|
||||
tz = pytz.timezone(tzstr)
|
||||
except KeyError:
|
||||
if tzstr in tzdef:
|
||||
# if we know the offset of the abbreviation, fall back to that
|
||||
tzstr = tzdef[tzstr]
|
||||
if re.search("^[+-][0-9][0-9]:?[0-9][0-9]$", tzstr):
|
||||
if ":" in tzstr:
|
||||
tzstr = tzstr[:3]+tzstr[4:]
|
||||
# convert numeric timezone to minutes
|
||||
sign = tzstr[0]
|
||||
h = int(tzstr[1:3])
|
||||
m = h*60 + int(tzstr[3:5])
|
||||
if sign == "-":
|
||||
m = -m
|
||||
tz = pytz.FixedOffset(m)
|
||||
else:
|
||||
raise ValueError("Unknown timezone '%s'" % tzstr)
|
||||
dt = tz.localize(dt)
|
||||
|
||||
return dt
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
if len(sys.argv[1:]) == 2:
|
||||
print tzparse(sys.argv[1], sys.argv[2])
|
||||
else:
|
||||
print "Running module tests:\n"
|
||||
import doctest
|
||||
print doctest.testmod()
|
||||
|
Loading…
Reference in a new issue