/* _Example_ mailto uri parser (This is junk, see mailto_funcs1.zip and mailto_funcs2.zip instead)*/ // Newest versions: http://shadow2531.com/opera/testcases/mailto/MailtoURIParserPack.zip #include #include #include #include #include #include #include using namespace std; /* Note: this code counts on a pretty perfect mailto uri. Possibly some extra error handling needed. The bloated code is for example purposes. */ /* returns a lowercase _copy_ of a string */ string toLowerCase( const string& s ) { std::string temp( s ); for ( std::string::size_type i = 0; i < s.size(); ++i ) { temp[i] = tolower( temp[i] ); } return temp; } /* returns a _copy_ of the mailto string with some modifications so it can be tokenized properly */ string prepare( const string& s ) { /* first, need to change 'mailto:' to 'to=' */ string temp = "to="; for ( string::size_type i = 7; i < s.size(); ++i ) { if ( s[i] == ' ' ) { /* Just in case there are any raw spaces. ( Like if passing from the command line where things might not get resolved */ temp += "%20"; } else if ( s[i] == '?' ) { /* only the first ? should be changed to & as there shouldn't be any more ?, bu t change them all */ temp.push_back( '&' ); } else { temp.push_back( s[i] ); } } return temp; } /* My tokenizer function for splitting by &. The 'keep' param controls whether you wan an entry in the vector for && */ std::vector split( const std::string& s, const std::string& f, const bool keep = false ) { std::vector temp; if ( f.empty() ) { temp.push_back( s ); return temp; } typedef std::string::const_iterator iter; const iter::difference_type f_size( distance( f.begin(), f.end() ) ); iter i( s.begin() ); for ( iter pos; ( pos = search( i , s.end(), f.begin(), f.end() ) ) != s.end(); ) { temp.push_back( std::string( i, pos ) ); advance( pos, f_size ); i = pos; if ( keep ) { temp.push_back( f ); } } temp.push_back( std::string( i, s.end() ) ); return temp; } /* Returns an unescaped version of a string. If there's an invalid %HH, it is treated literally. */ string decode( const string& s ) { const string hex("0123456789ABCDEF"); string n; for ( string::size_type i = 0; i < s.size(); ++i ) { if ( s[i] == '%' && i + 2 < s.size() && hex.find( toupper( s[ i + 1 ] ) ) != string::npos && hex.find( toupper( s[ i + 2 ] ) ) != string::npos ) { n.push_back( static_cast( hex.find( toupper( s[ i + 1 ] ) ) * 16 + hex.find( toupper( s[ i + 2 ] ) ) ) ); i += 2; } else { n.push_back( s[i] ); } } return n; } int main() { /* all this stuff belongs in a function etc., but it's not for example purposes */ /* the test mailto uri */ const string test( "mailto:test1@site.com,test2@site.com?to=test3@site.com,test4@site.com&to=test5@site.com,test6@site.com&body=line1%0D%0Aline2&body=line3&body=line4&subject=first&subject=second&subject=last%20m%26m&cc=fred1@site.com,fred2@site.com&cc=fred3@site.com,fred4@site.com&bcc=bob1@site.com,bob2@site.com&bcc=bob3@site.com,bob4@site.com" ); /* if the uri doesn't start with mailto:, we don't want it */ if ( toLowerCase( test ).find( "mailto:" ) == 0 ) { const string href( prepare( test ) ); /* keys stores multiple occurences of key */ map > keys; /* split the uri up by & so it can be examined */ vector parts ( split( href, "&", false ) ); /* check out each part to see if it contains a key=value that we want. If it does, add it to the keys map. */ for ( vector::size_type i = 0; i < parts.size(); ++i ) { string k = toLowerCase( parts[i].substr(0, parts[i].find("=") ) ); if ( k == "to" || k == "body" || k == "subject" || k == "cc" || k == "bcc" ) { keys[ k ].push_back( parts[i].substr( parts[i].find("=") + 1 ) ); } } /* a map to contain the final encoded keyword=value pairs. Using a map for ease of use with the loop below. */ map fin; fin["to"] = ""; fin["subject"] = ""; fin["body"] = ""; fin["cc"] = ""; fin["bcc"] = ""; /* run through the keys map and accumlate the values of all occurences of the key and store the string in the fin map */ for ( map >::const_iterator i = keys.begin(); i != keys.end(); ++i ) { if ( i->first == "to" || i->first == "cc" || i->first == "bcc" ) { /* to, cc and bcc are a comma-separated list of all email address from all occurences of the key */ for ( vector::const_iterator z = i->second.begin(); z != i->second.end(); ++z ) { fin[i->first] += *z; if ( z < i->second.end() - 1 ) { fin[i->first].push_back( ',' ); } } } else if ( i->first == "subject" ) { /* only the last subject keyword counts */ fin["subject"] = *( i->second.end() - 1 ); } else if ( i->first == "body" ) { /* If there are multiple body keywords, they are joined by %0D%0A */ for ( vector::const_iterator z = i->second.begin(); z != i->second.end(); ++z ) { fin["body"] += *z; if ( z < i->second.end() - 1 ) { fin["body"] += "%0D%0A"; } } } } /* Create the decoded strings. Not decoding the original mailto URI because that wouldn't make sense */ const string d_to( decode( fin["to"] ) ); const string d_subject( decode( fin["subject"] ) ); const string d_body( decode( fin["body"] ) ); const string d_cc( decode( fin["cc"] ) ); const string d_bcc( decode( fin["bcc"] ) ); /* print out the encoded and decoded versions of the keys */ cout << fin["to"] << endl << endl; cout << d_to << endl << endl; cout << fin["subject"] << endl << endl; cout << d_subject << endl << endl; cout << fin["body"] << endl << endl; cout << d_body << endl << endl; cout << fin["cc"] << endl << endl; cout << d_cc << endl << endl; cout << fin["bcc"] << endl << endl; cout << d_bcc << endl << endl; /* the original encoded mailto URI */ cout << test << endl << endl; /* _example_ extra stuff that could be done. The following is out of sequence from the above code. */ /* split up the comma-separated lists of email addresses so each email address is separate. Then, each email address _could_ be checked for errors and the offending ones removed */ const vector to_bank( split( fin["to"], ",", false) ); const vector cc_bank( split( fin["cc"], ",", false) ); const vector bcc_bank( split( fin["bcc"], ",", false) ); /* just printing them out for example */ copy( to_bank.begin(), to_bank.end(), ostream_iterator( cout, "\n" ) ); copy( cc_bank.begin(), cc_bank.end(), ostream_iterator( cout, "\n" ) ); copy( bcc_bank.begin(), bcc_bank.end(), ostream_iterator( cout, "\n" ) ); } } /* g++ -Wall -Wextra this.cpp -o this -O3 -msse3 -mtune=i686 -s */