# Generates urlencoded.txt from utf-8.txt # # urlencoded.txt is used by Tests_Formatting_Utf8UriEncode import urllib, codecs, re import sys # uncapitalize pct-encoded values, leave the rest alone capfix = re.compile("%([0-9A-Z]{2})"); def fix(match): octet = match.group(1) intval = int(octet, 16) if intval < 128: return chr(intval).lower() return '%' + octet.lower() def urlencode(line): """Percent-encode each byte of non-ASCII unicode characters.""" line = urllib.quote(line.strip().encode("utf-8")) line = capfix.sub(fix, line) return line if __name__ == "__main__": args = sys.argv[1:] if args and args[0] in ("-h", "--help"): print "Usage: python urlencode.py < utf-8.txt > urlencoded.txt" sys.exit(2) sys.stdin = codecs.getreader("utf-8")(sys.stdin) sys.stdout = codecs.getwriter("ascii")(sys.stdout) lines = sys.stdin.readlines() sys.stdout.write( "\n".join(map(urlencode, lines)) )