diff --git a/tzparse.py b/tzparse.py new file mode 100755 index 000000000..4fffdcb3a --- /dev/null +++ b/tzparse.py @@ -0,0 +1,185 @@ +#!/usr/bin/env python + +""" +NAME + tzparse + +SYNOPSIS + >>> tzparse("2008-09-08 14:40:35 +0200", "%Y-%m-%d %H:%M:%S %Z") + datetime.datetime(2008, 9, 8, 14, 40, 35, tzinfo=pytz.FixedOffset(120)) + + >>> print tzparse("14:40:35 CEST, 08 Sep 2008", "%H:%M:%S %Z, %d %b %Y") + 2008-09-08 14:40:35+02:00 + +DESCRIPTION + This describes the python 'tzparse' module. It exports only one function: tzparse(). + + tzparse() parses a string according to a specified format, exactly as time.strptime() + does, but with the added capability to parse most common timezone specifications, + such as 'UTC', the standard timezones ('NST', 'EST', 'CST', 'MST', 'PST', 'HNY' + [North America], 'WET', 'CET', 'EET', 'MSK' [Europe], and more), the summer timezones + ('CEST', 'EEST', 'EDT', PDT' etc.), military timezones ('A' .. 'Z') and numeric + timezone indications ('+0200', '-0700', '-03:30' etc.). + + The time zone specification may be placed anywhere, not only at the end. + + tzparse() calls time.strptime() to parse everything except the timezone. To parse + the timezone, it first tries to use the pytz module, but if that doesn't give + any joy, it falls back to a hardcoded list of common time zone abbreviations and + their offset from UTC. + +BUGS + + * tzparse() cannot parse all valid RFC 3339 formats: it doesn't extract + fractional seconds, and the underlying time.strptime() doesn't parse fractional + seconds. + + * Parsing according to format specifications using the generic %c, %x and %X + specifiers will only succeed if there are explicit delimiting characters + between the %Z specifier and the %c, %x or %X part. + +COPYRIGHT + Copyright 2009 Henrik Levkowetz + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +""" + +import re +import time +from datetime import datetime as Datetime, timedelta as Timedelta +import pytz + +tzdef = { + "A": "+0100", "ACDT": "+1030", "ACST": "+0930", "ADT": "-0300", + "AEDT": "+1100", "AEST": "+1000", "AKDT": "-0800", "AKST": "-0900", + "AST": "-0400", "AWDT": "+0900", "AWST": "+0800", "B": "+0200", + "BST": "+0100", "C": "+0300", "CDT": "+1030", "CDT": "-0500", + "CEDT": "+0200", "CEST": "+0200", "CET": "+0100", "CST": "+1030", + "CST": "+0930", "CST": "-0600", "CXT": "+0700", "D": "+0400", + "E": "+0500", "EDT": "+1100", "EDT": "-0400", "EEDT": "+0300", + "EEST": "+0300", "EET": "+0200", "EST": "+1100", "EST": "+1000", + "EST": "-0500", "F": "+0600", "G": "+0700", "GMT": "+0000", + "H": "+0800", "HAA": "-0300", "HAC": "-0500", "HADT": "-0900", + "HAE": "-0400", "HAP": "-0700", "HAR": "-0600", "HAST": "-1000", + "HAT": "-0230", "HAY": "-0800", "HNA": "-0400", "HNC": "-0600", + "HNE": "-0500", "HNP": "-0800", "HNR": "-0700", "HNT": "-0330", + "HNY": "-0900", "I": "+0900", "IST": "+0100", "K": "+1000", + "L": "+1100", "M": "+1200", "MDT": "-0600", "MESZ": "+0200", + "MEZ": "+0100", "MSD": "+0400", "MSK": "+0300", "MST": "-0700", + "N": "-0100", "NDT": "-0230", "NFT": "+1130", "NST": "-0330", + "O": "-0200", "P": "-0300", "PDT": "-0700", "PST": "-0800", + "Q": "-0400", "R": "-0500", "S": "-0600", "T": "-0700", + "U": "-0800", "UTC": "+0000", "V": "-0900", "W": "-1000", + "WDT": "+0900", "WEDT": "+0100", "WEST": "+0100", "WET": "+0000", + "WST": "+0900", "WST": "+0800", "X": "-1100", "Y": "-1200", + "Z": "+0000", + } + + +def tzparse(string, format): + # It's surprising that there's no tz parsing capability in the python standard + # library... + + """ + Given a time specification string and a format, tzparse() returns a localized + datetime.datetime. + + >>> print tzparse("9 Oct 2009 CEST 13:58", "%d %b %Y %Z %H:%M") + 2009-10-09 13:58:00+02:00 + + >>> print tzparse("9 Oct 2009 13:58:00 Europe/Stockholm", "%d %b %Y %H:%M:%S %Z") + 2009-10-09 13:58:00+02:00 + + >>> print tzparse("9 Oct 2009 13:58:00 +0200", "%d %b %Y %H:%M:%S %Z") + 2009-10-09 13:58:00+02:00 + + >>> print tzparse("Fri, 9 Oct 2009 13:58:00 +0200", "%a, %d %b %Y %H:%M:%S %Z") + 2009-10-09 13:58:00+02:00 + + >>> print tzparse("2009-10-09 13:58:00 EST", '%Y-%m-%d %H:%M:%S %Z') + 2009-10-09 13:58:00-05:00 + + >>> print tzparse("2009-10-09 13:58:00+02:00", "%Y-%m-%d %H:%M:%S%Z") + 2009-10-09 13:58:00+02:00 + + >>> print tzparse("1985-04-12T23:20:50Z", "%Y-%m-%dT%H:%M:%S%Z") + 1985-04-12 23:20:50+00:00 + + >>> print tzparse("1996-12-19T16:39:57-08:00", "%Y-%m-%dT%H:%M:%S%Z") + 1996-12-19 16:39:57-08:00 + + >>> print tzparse("1996-12-19T16:39:57", "%Y-%m-%dT%H:%M:%S") + 1996-12-19 16:39:57+01:00 + + """ + + if not "%Z" in format: + timetuple = time.strptime(string, format) + tzstr = time.tzname[0] + else: + # extract the %Z part from the format and build a pattern to extract it + # from the string, too. + + def fmt2pat(s): + s = re.sub("%[dHIjmMSUwWyY]", "\d+", s) + s = re.sub("%[aAbBp]", "\w+", s) + s = re.sub("%[cxX]", ".+", s) + s = s.replace("%%", "%") + return s + + frontfmt, backfmt = format.split("%Z") + frontpat = "^" + fmt2pat(frontfmt) + backpat = fmt2pat(backfmt) + "$" + + + frontstr = re.search(frontpat, string) and re.search(frontpat, string).group(0) or "" + backstr = re.search(backpat, string) and re.search(backpat, string).group(0) or "" + tzstr = string.replace(frontstr, "").replace(backstr, "") # This will fail is backstr occurs twice + + timetuple = time.strptime(frontstr+backstr, frontfmt+backfmt) + dt = Datetime(*timetuple[:6]) + + if not tzstr: + tzstr = time.tzname[0] + #raise ValueError("No timezone string found in '%s', but format contained %Z: '%s'."%(string, format)) + try: + tz = pytz.timezone(tzstr) + except KeyError: + if tzstr in tzdef: + # if we know the offset of the abbreviation, fall back to that + tzstr = tzdef[tzstr] + if re.search("^[+-][0-9][0-9]:?[0-9][0-9]$", tzstr): + if ":" in tzstr: + tzstr = tzstr[:3]+tzstr[4:] + # convert numeric timezone to minutes + sign = tzstr[0] + h = int(tzstr[1:3]) + m = h*60 + int(tzstr[3:5]) + if sign == "-": + m = -m + tz = pytz.FixedOffset(m) + else: + raise ValueError("Unknown timezone '%s'" % tzstr) + dt = tz.localize(dt) + + return dt + +if __name__ == "__main__": + import sys + if len(sys.argv[1:]) == 2: + print tzparse(sys.argv[1], sys.argv[2]) + else: + print "Running module tests:\n" + import doctest + print doctest.testmod() + \ No newline at end of file