# -*- coding: utf-8 -*- from ctypes import * # Copyright Michael Anthony Puls II, http://shadow2531.com/ # Distributed under the Boost Software License, Version 1.0. # http://boost.org/LICENSE_1_0.txt . # Newest versions: http://shadow2531.com/opera/testcases/mailto/MailtoURIParserPack.zip # This is a python (more or less a direct convert) version of the c++ MailtoURIParser at http://shadow2531.com/cpp/mailto_funcs11.zip # It also uses the exact same test cases. # Make sure all newline pairs or single newlines are represented in the string as \n def normalizeNewlines(s): return s.replace("\r\n", "\n").replace("\r", "\n") # Decode %HH. Treat invalid %HH literally. # The result is a string of raw UTF-8 sequences def safe_decode(s): hexits = "0123456789ABCDEF" ret = "" i = 0 size = len(s) while i < size: c = s[i] if c == "%" and i + 2 < size: h1 = s[i + 1].upper() h2 = s[i + 2].upper() a = hexits.find(h1) b = hexits.find(h2) if a == -1 or b == -1: ret += c else: x = "%c" % (a * 16 + b) ret += x i += 2 else: ret += c i += 1 return normalizeNewlines(ret) # encode hvalue data def encodex(s): hexits = "0123456789ABCDEF" noencode = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.!*()" ret = "" for i in s: if i == '\n': ret += "%0D%0A" elif noencode.find(i) != -1: ret += i else: ret += '%' i_as_char = c_char(i) unsigned_i_as_char = cast(i_as_char.value, POINTER(c_ubyte)); h1 = hexits[unsigned_i_as_char.contents.value >> 4] h2 = hexits[unsigned_i_as_char.contents.value & 0x0F] ret += h1 ret += h2 return ret class MailtoURIParser: def __init__(self, inuri): self.__eval(inuri) def setURI(self, inuri): self.__eval(inuri) def getURI(self): return self.__uri def getNormalizedURI(self): return self.__nuri def getEncodedTO(self): return self.__to def getDecodedTO(self): return self.__dto def getEncodedSubject(self): return self.__subject def getDecodedSubject(self): return self.__dsubject def getEncodedBody(self): return self.__body def getDecodedBody(self): return self.__dbody def getEncodedCC(self): return self.__cc def getDecodedCC(self): return self.__dcc def getEncodedBCC(self): return self.__bcc def getDecodedBCC(self): return self.__dbcc # %w = original uri # %n = normalized uri (mailto URI with just the basic hnames with no duplicate hnames) # %t = decoded TO value # %T = encoded TO value # %s = decoded Subject value # %m = deocded Body value # %M = encoded Body value # %c = decoded CC value # %C = encoded CC value # %b = decoded BCC value # %B = encoded BCC value # %% = % # An invalid %key or a % at the end of the string is treated literally. # By example: # resolveCommandFormatString("%T") would return the value of the to string. # resolveCommandFormatString("%%") would return %. def resolveCommandFormatString(self, s): ret = "" ident = "%" i = 0 size = len(s) while i < size: c = s[i] if c == ident and i + 1 < size: next = s[i + 1] if next == ident: ret += ident elif next == "w": ret += self.__uri elif next == "n": ret += self.__nuri elif next == "T": ret += self.__to elif next == "t": ret += self.__dto elif next == "S": ret += self.__subject elif next == "s": ret += self.__dsubject elif next == "M": ret += self.__body elif next == "m": ret += self.__dbody elif next == "C": ret += self.__cc elif next == "c": ret += self.__dcc elif next == "B": ret += self.__bcc elif next == "b": ret += self.__dbcc else: ret += c ret += next i += 1 else: ret += c i += 1 return ret # Private variables __uri = __nuri = __to = __dto = __subject = __dsubject = __body = __dbody = __cc = __dcc = __bcc = __dbcc = "" # Split the parsable data and store it. Rules are from http://shadow2531.com/opera/testcases/mailto/rfc2368-3.html # For to, cc and bcc: Join all non-empty hvalues by %2C%20 and decode # For body: Join the first non-empty hvalue and all hvalues (even if they're empty) after that with %0D%0A and decode # For subject: Use only the last subject hvalue (even if it's empty and even if a previous one is not) and decode def __storeData(self, sdata): self.__to = self.__subject = self.__body = self.__cc = self.__bcc = "" hlist = sdata.split('&') for i in hlist: eq = i.find('=') if eq == -1: continue hname = i[:eq].lower() if hname == "to": value = i[eq + 1:] if value != "": if self.__to != "": self.__to += "%2C%20" self.__to += value elif hname == "cc": value = i[eq + 1:] if value != "": if self.__cc != "": self.__cc += "%2C%20" self.__cc += value elif hname == "bcc": value = i[eq + 1:] if value != "": if self.__bcc != "": self.__bcc += "%2C%20" self.__bcc += value elif hname == "subject": self.__subject = i[eq + 1:] elif hname == "body": value = i[eq + 1:] if (self.__body == "" and value == "") == 0: if self.__body != "": self.__body += "%0D%0A" self.__body += value self.__dto = safe_decode(self.__to); self.__dcc = safe_decode(self.__cc); self.__dbcc = safe_decode(self.__bcc); self.__dsubject = safe_decode(self.__subject); self.__dbody = safe_decode(self.__body); # Recreate the encoded values from the decoded ones to fix any chars that should not be encoded or fix ones that should, but are not. self.__to = encodex(self.__dto); self.__subject = encodex(self.__dsubject); self.__body = encodex(self.__dbody); self.__cc = encodex(self.__dcc); self.__bcc = encodex(self.__dbcc) # Whenever a URI is set, if it's a mailto URI, parse and store the data. # If it's not a mailto URI, reset the data. setURI("") can be used to # clear the data. def __eval(self, inuri): if inuri.lower().startswith("mailto:"): parseable = inuri[4:] parseable = parseable.replace(':', '=', 1) parseable = parseable.replace('?', '&', 1) self.__storeData(parseable) self.__uri = inuri # Create a normalized mailto URI with just the basic hnames and no duplicate hnames (for mail clients that can't handle duplicate hnames) self.__nuri = "mailto:"; self.__nuri += self.__to; self.__nuri += "?subject="; self.__nuri += self.__subject; self.__nuri += "&body="; self.__nuri += self.__body; self.__nuri += "&cc="; self.__nuri += self.__cc; self.__nuri += "&bcc="; self.__nuri += self.__bcc; else: self.__uri = self.__nuri = "mailto:" self.__to = self.__dto = self.__subject = self.__dsubject = self.__body = self.__dbody = self.__cc = self.__dcc = self.__bcc = self.__dbcc = "" def test(t, inc, out): if inc == out: print "test", t, "= pass" else: print "test", t, "= fail" print "before_" + inc + "_after" # Test cases to show usage x = MailtoURIParser("") test(1, x.getEncodedTO(), "") x.setURI("mailto:") test(2, x.getEncodedTO(), "") x.setURI("mailto:?"); test(3, x.getEncodedTO(), "") x.setURI("mailto:email%40site.com") test(4, x.getEncodedTO(), "email%40site.com") x.setURI("mailto:?to=email%40site.com") test(5, x.getEncodedTO(), "email%40site.com") x.setURI("mailto:email1%40site.com%2C%20email2%40site.com?to=email3%40site.com%2C%20email4%40site.com") test(6, x.getEncodedTO(), "email1%40site.com%2C%20email2%40site.com%2C%20email3%40site.com%2C%20email4%40site.com") x.setURI("mailto:?to=email1%40site.com&to=email2%40site.com&to=email3%40site.com") test(7, x.getEncodedTO(), "email1%40site.com%2C%20email2%40site.com%2C%20email3%40site.com") x.setURI("mailto:?to=&to=&to=&to=&to=&to=&to=&to=&to=&to=&to=&to=&to=") test(8, x.getEncodedTO(), "") x.setURI("mailto:?to=&to=email1%40site.com&to=&to=email2%40site.com&to=") test(9, x.getEncodedTO(), "email1%40site.com%2C%20email2%40site.com") x.setURI("mailto:?subject=bark%20bark") test(10, x.getEncodedSubject(), "bark%20bark") x.setURI("mailto:?subject=") test(11, x.getEncodedSubject(), "") x.setURI("mailto:?subject=&subject=") test(12, x.getEncodedSubject(), "") x.setURI("mailto:?subject=1&subject=2&subject=last%20one") test(13, x.getEncodedSubject(), "last%20one") x.setURI("mailto:?subject=1&subject=") test(14, x.getEncodedSubject(), "") x.setURI("mailto:?body=line1%0D%0Aline2") test(15, x.getEncodedBody(), "line1%0D%0Aline2") x.setURI("mailto:?body=line1&body=line2") test(16, x.getEncodedBody(), "line1%0D%0Aline2") x.setURI("mailto:?body=&body=&body=line1") test(17, x.getEncodedBody(), "line1") x.setURI("mailto:?body=&body=&body=line1&body=&body=&body=line4&body=&body=") test(18, x.getEncodedBody(), "line1%0D%0A%0D%0A%0D%0Aline4%0D%0A%0D%0A") x.setURI("mailto:?body=line1&body=") test(19, x.getEncodedBody(), "line1%0D%0A") x.setURI("mailto:email1%40site.com?to=&to=email2%40site.com&subject=1&subject=&subject=2&subject=&body=&body=&body=line1&body=&body=&body=line4&body=&cc=zam%40site.com&bcc=bam%40site.com") test(20, x.getNormalizedURI(), "mailto:email1%40site.com%2C%20email2%40site.com?subject=&body=line1%0D%0A%0D%0A%0D%0Aline4%0D%0A&cc=zam%40site.com&bcc=bam%40site.com") big = "mailto:%22a%5C%5Cb%22%20%3Cemail1%40site.com%3E%2C%20email2%40site.com?to=&to=email3%40site.com%2C%20email4%40site.com&to=email5%40site.com%2C%20email6%40site.com&to=&to=&subject=You%20should%20not%20see%20me%2E&subject=last%20call%20for%20cats%20%26%20dogs%2E&body=&body=&body=&body=line1%0D%0Aline2&body=&body=&body=&body=line6&body=line7%0D%0A1+2+3+4+5%0D%0AColumn1A%09Column1B%09Column1C%09Column1D%0D%0A%E2%88%9A&cc=%22a%5C%5Cb%22%20%3Csomeone1%40site.com%3E%2C%20someone2%40site.com&cc=&cc=someone3%40site.com%2C%20someone4%40site.com%2C%20%22foo%20%5C%22bar%5C%22%22%20%3Cfoo%40bar%2Ecom%3E&cc=&cc=&bcc=%22a%5C%5Cb%22%20%3Csomeoneelse1%40site.com%3E%2C%20someoneelse2%40site.com&bcc=&bcc=someoneelse3%40site.com%2C%20someoneelse4%40site.com&bcc=&bcc=" x.setURI(big) test(21, x.getNormalizedURI(), "mailto:%22a%5C%5Cb%22%20%3Cemail1%40site.com%3E%2C%20email2%40site.com%2C%20email3%40site.com%2C%20email4%40site.com%2C%20email5%40site.com%2C%20email6%40site.com?subject=last%20call%20for%20cats%20%26%20dogs.&body=line1%0D%0Aline2%0D%0A%0D%0A%0D%0A%0D%0Aline6%0D%0Aline7%0D%0A1%2B2%2B3%2B4%2B5%0D%0AColumn1A%09Column1B%09Column1C%09Column1D%0D%0A%E2%88%9A&cc=%22a%5C%5Cb%22%20%3Csomeone1%40site.com%3E%2C%20someone2%40site.com%2C%20someone3%40site.com%2C%20someone4%40site.com%2C%20%22foo%20%5C%22bar%5C%22%22%20%3Cfoo%40bar.com%3E&bcc=%22a%5C%5Cb%22%20%3Csomeoneelse1%40site.com%3E%2C%20someoneelse2%40site.com%2C%20someoneelse3%40site.com%2C%20someoneelse4%40site.com") test(22, x.resolveCommandFormatString("\"c:\\program files\\program\\program.exe\" \"%T\" \"%S\" \"%M\" \"%C\" \"%B\""), "\"c:\\program files\\program\\program.exe\" \"%22a%5C%5Cb%22%20%3Cemail1%40site.com%3E%2C%20email2%40site.com%2C%20email3%40site.com%2C%20email4%40site.com%2C%20email5%40site.com%2C%20email6%40site.com\" \"last%20call%20for%20cats%20%26%20dogs.\" \"line1%0D%0Aline2%0D%0A%0D%0A%0D%0A%0D%0Aline6%0D%0Aline7%0D%0A1%2B2%2B3%2B4%2B5%0D%0AColumn1A%09Column1B%09Column1C%09Column1D%0D%0A%E2%88%9A\" \"%22a%5C%5Cb%22%20%3Csomeone1%40site.com%3E%2C%20someone2%40site.com%2C%20someone3%40site.com%2C%20someone4%40site.com%2C%20%22foo%20%5C%22bar%5C%22%22%20%3Cfoo%40bar.com%3E\" \"%22a%5C%5Cb%22%20%3Csomeoneelse1%40site.com%3E%2C%20someoneelse2%40site.com%2C%20someoneelse3%40site.com%2C%20someoneelse4%40site.com\"") test(23, x.resolveCommandFormatString("%%T"), "%T") x.setURI("mailto:?subject=%E2%88%9A") test(24, x.getDecodedSubject(), "\xE2\x88\x9A") x.setURI("mailto:?subject=1+2+3+4+5") test(25, x.getDecodedSubject(), "1+2+3+4+5") x.setURI("mailto:?subject=%G3") test(26, x.getDecodedSubject(), "%G3") x.setURI("mailto:?subject=%%%") test(27, x.getDecodedSubject(), "%%%") x.setURI("mailto:?subject=%") test(28, x.getDecodedSubject(), "%") x.setURI("mailto:?body=%0A%0D%0A%0D") test(29, x.getDecodedBody(), "\n\n\n") x.setURI("mailto:?&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&body=line1") test(30, x.getEncodedBody(), "line1") x.setURI("") test(31, x.getEncodedCC(), "") x.setURI("mailto:") test(32, x.getEncodedCC(), "") x.setURI("mailto:?") test(33, x.getEncodedCC(), "") x.setURI("mailto:?cc=email%40site.com") test(34, x.getEncodedCC(), "email%40site.com") x.setURI("mailto:?cc=email1%40site.com%2C%20email2%40site.com&cc=email3%40site.com%2C%20email4%40site.com") test(35, x.getEncodedCC(), "email1%40site.com%2C%20email2%40site.com%2C%20email3%40site.com%2C%20email4%40site.com") x.setURI("mailto:?cc=email1%40site.com&cc=email2%40site.com&cc=email3%40site.com") test(36, x.getEncodedCC(), "email1%40site.com%2C%20email2%40site.com%2C%20email3%40site.com") x.setURI("mailto:?cc=&cc=&cc=&cc=&cc=&cc=&cc=&cc=&cc=&cc=&cc=&cc=&cc=") test(37, x.getEncodedCC(), "") x.setURI("mailto:?cc=&cc=email1%40site.com&cc=&cc=email2%40site.com&cc=") test(38, x.getEncodedCC(), "email1%40site.com%2C%20email2%40site.com") x.setURI("") test(39, x.getEncodedBCC(), "") x.setURI("mailto:") test(40, x.getEncodedBCC(), "") x.setURI("mailto:?") test(41, x.getEncodedBCC(), "") x.setURI("mailto:?bcc=email%40site.com") test(42, x.getEncodedBCC(), "email%40site.com") x.setURI("mailto:?bcc=email1%40site.com%2C%20email2%40site.com&bcc=email3%40site.com%2C%20email4%40site.com") test(43, x.getEncodedBCC(), "email1%40site.com%2C%20email2%40site.com%2C%20email3%40site.com%2C%20email4%40site.com") x.setURI("mailto:?bcc=email1%40site.com&bcc=email2%40site.com&bcc=email3%40site.com") test(44, x.getEncodedBCC(), "email1%40site.com%2C%20email2%40site.com%2C%20email3%40site.com") x.setURI("mailto:?bcc=&bcc=&bcc=&bcc=&bcc=&bcc=&bcc=&bcc=&bcc=&bcc=&bcc=&bcc=&bcc=") test(45, x.getEncodedBCC(), "") x.setURI("mailto:?bcc=&bcc=email1%40site.com&bcc=&bcc=email2%40site.com&bcc=") test(46, x.getEncodedBCC(), "email1%40site.com%2C%20email2%40site.com") x.setURI("mailto:?subject=m%26m%09bob") test(47, x.getDecodedSubject(), "m&m\tbob") x.setURI("mailto:email%40site.com") test(48, x.resolveCommandFormatString("%%%%%t"), "%%email@site.com") x.setURI("mailto:?cc=email%40site.com") test(49, x.getDecodedCC(), "email@site.com") x.setURI("mailto:?to=email%40site.com") test(50, x.getDecodedTO(), "email@site.com") x.setURI("MaIlTo:?BcC=email%40site.com") test(51, x.getDecodedBCC(), "email@site.com") x.setURI(big) fmt = "program_that_wants_decoded_values -to \"%t\" -subj \"%s\" -cc \"%c\" -bcc \"%b\" -body \"%m\"" test(52, x.resolveCommandFormatString(fmt), "program_that_wants_decoded_values -to \"\"a\\\\b\" , email2@site.com, email3@site.com, email4@site.com, email5@site.com, email6@site.com\" -subj \"last call for cats & dogs.\" -cc \"\"a\\\\b\" , someone2@site.com, someone3@site.com, someone4@site.com, \"foo \\\"bar\\\"\" \" -bcc \"\"a\\\\b\" , someoneelse2@site.com, someoneelse3@site.com, someoneelse4@site.com\" -body \"line1\nline2\n\n\n\nline6\nline7\n1+2+3+4+5\nColumn1A\tColumn1B\tColumn1C\tColumn1D\n\xE2\x88\x9A\"") x.setURI("") test(53, x.getEncodedTO(), "") test(54, x.getEncodedBCC(), "") test(55, x.getEncodedCC(), "") test(56, x.getEncodedBody(), "") test(57, x.getEncodedSubject(), "") test(58, x.getURI(), "mailto:") test(59, x.getNormalizedURI(), "mailto:") test(60, x.getDecodedTO(), "") test(61, x.getDecodedBCC(), "") test(62, x.getDecodedCC(), "") test(63, x.getDecodedBody(), "") test(64, x.getDecodedSubject(), "") x.setURI("mailto:?body=+++") test(65, x.getEncodedBody(), "%2B%2B%2B") test(66, x.getDecodedBody(), "+++")