windows1252.py 765 B

123456789101112131415161718192021222324252627
  1. # Generates test data for functions converting between
  2. # dodgy windows-1252-only values and their unicode counterparts
  3. unichars = ["201A", "0192", "201E", "2026", "2020", "2021",
  4. "02C6", "2030", "0160", "2039", "0152", "2018",
  5. "2019", "201C", "201D", "2022", "2013", "2014",
  6. "02DC", "2122", "0161", "203A", "0153", "0178"];
  7. winpoints = []
  8. unipoints = []
  9. for char in unichars:
  10. char = unichr(int(char, 16))
  11. dec = ord(char)
  12. win = ord(char.encode("windows-1252"))
  13. unipoints.append(dec)
  14. winpoints.append(win)
  15. def entitize(s):
  16. return "&#%s;" % s
  17. winpoints = map(entitize, winpoints)
  18. unipoints = map(entitize, unipoints)
  19. print "".join(winpoints), "".join(unipoints)