escsm.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261
  1. ######################## BEGIN LICENSE BLOCK ########################
  2. # The Original Code is mozilla.org code.
  3. #
  4. # The Initial Developer of the Original Code is
  5. # Netscape Communications Corporation.
  6. # Portions created by the Initial Developer are Copyright (C) 1998
  7. # the Initial Developer. All Rights Reserved.
  8. #
  9. # Contributor(s):
  10. # Mark Pilgrim - port to Python
  11. #
  12. # This library is free software; you can redistribute it and/or
  13. # modify it under the terms of the GNU Lesser General Public
  14. # License as published by the Free Software Foundation; either
  15. # version 2.1 of the License, or (at your option) any later version.
  16. #
  17. # This library is distributed in the hope that it will be useful,
  18. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  19. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  20. # Lesser General Public License for more details.
  21. #
  22. # You should have received a copy of the GNU Lesser General Public
  23. # License along with this library; if not, write to the Free Software
  24. # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
  25. # 02110-1301 USA
  26. ######################### END LICENSE BLOCK #########################
  27. from .codingstatemachinedict import CodingStateMachineDict
  28. from .enums import MachineState
  29. # fmt: off
  30. HZ_CLS = (
  31. 1, 0, 0, 0, 0, 0, 0, 0, # 00 - 07
  32. 0, 0, 0, 0, 0, 0, 0, 0, # 08 - 0f
  33. 0, 0, 0, 0, 0, 0, 0, 0, # 10 - 17
  34. 0, 0, 0, 1, 0, 0, 0, 0, # 18 - 1f
  35. 0, 0, 0, 0, 0, 0, 0, 0, # 20 - 27
  36. 0, 0, 0, 0, 0, 0, 0, 0, # 28 - 2f
  37. 0, 0, 0, 0, 0, 0, 0, 0, # 30 - 37
  38. 0, 0, 0, 0, 0, 0, 0, 0, # 38 - 3f
  39. 0, 0, 0, 0, 0, 0, 0, 0, # 40 - 47
  40. 0, 0, 0, 0, 0, 0, 0, 0, # 48 - 4f
  41. 0, 0, 0, 0, 0, 0, 0, 0, # 50 - 57
  42. 0, 0, 0, 0, 0, 0, 0, 0, # 58 - 5f
  43. 0, 0, 0, 0, 0, 0, 0, 0, # 60 - 67
  44. 0, 0, 0, 0, 0, 0, 0, 0, # 68 - 6f
  45. 0, 0, 0, 0, 0, 0, 0, 0, # 70 - 77
  46. 0, 0, 0, 4, 0, 5, 2, 0, # 78 - 7f
  47. 1, 1, 1, 1, 1, 1, 1, 1, # 80 - 87
  48. 1, 1, 1, 1, 1, 1, 1, 1, # 88 - 8f
  49. 1, 1, 1, 1, 1, 1, 1, 1, # 90 - 97
  50. 1, 1, 1, 1, 1, 1, 1, 1, # 98 - 9f
  51. 1, 1, 1, 1, 1, 1, 1, 1, # a0 - a7
  52. 1, 1, 1, 1, 1, 1, 1, 1, # a8 - af
  53. 1, 1, 1, 1, 1, 1, 1, 1, # b0 - b7
  54. 1, 1, 1, 1, 1, 1, 1, 1, # b8 - bf
  55. 1, 1, 1, 1, 1, 1, 1, 1, # c0 - c7
  56. 1, 1, 1, 1, 1, 1, 1, 1, # c8 - cf
  57. 1, 1, 1, 1, 1, 1, 1, 1, # d0 - d7
  58. 1, 1, 1, 1, 1, 1, 1, 1, # d8 - df
  59. 1, 1, 1, 1, 1, 1, 1, 1, # e0 - e7
  60. 1, 1, 1, 1, 1, 1, 1, 1, # e8 - ef
  61. 1, 1, 1, 1, 1, 1, 1, 1, # f0 - f7
  62. 1, 1, 1, 1, 1, 1, 1, 1, # f8 - ff
  63. )
  64. HZ_ST = (
  65. MachineState.START, MachineState.ERROR, 3, MachineState.START, MachineState.START, MachineState.START, MachineState.ERROR, MachineState.ERROR, # 00-07
  66. MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, # 08-0f
  67. MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, MachineState.START, MachineState.START, 4, MachineState.ERROR, # 10-17
  68. 5, MachineState.ERROR, 6, MachineState.ERROR, 5, 5, 4, MachineState.ERROR, # 18-1f
  69. 4, MachineState.ERROR, 4, 4, 4, MachineState.ERROR, 4, MachineState.ERROR, # 20-27
  70. 4, MachineState.ITS_ME, MachineState.START, MachineState.START, MachineState.START, MachineState.START, MachineState.START, MachineState.START, # 28-2f
  71. )
  72. # fmt: on
  73. HZ_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)
  74. HZ_SM_MODEL: CodingStateMachineDict = {
  75. "class_table": HZ_CLS,
  76. "class_factor": 6,
  77. "state_table": HZ_ST,
  78. "char_len_table": HZ_CHAR_LEN_TABLE,
  79. "name": "HZ-GB-2312",
  80. "language": "Chinese",
  81. }
  82. # fmt: off
  83. ISO2022CN_CLS = (
  84. 2, 0, 0, 0, 0, 0, 0, 0, # 00 - 07
  85. 0, 0, 0, 0, 0, 0, 0, 0, # 08 - 0f
  86. 0, 0, 0, 0, 0, 0, 0, 0, # 10 - 17
  87. 0, 0, 0, 1, 0, 0, 0, 0, # 18 - 1f
  88. 0, 0, 0, 0, 0, 0, 0, 0, # 20 - 27
  89. 0, 3, 0, 0, 0, 0, 0, 0, # 28 - 2f
  90. 0, 0, 0, 0, 0, 0, 0, 0, # 30 - 37
  91. 0, 0, 0, 0, 0, 0, 0, 0, # 38 - 3f
  92. 0, 0, 0, 4, 0, 0, 0, 0, # 40 - 47
  93. 0, 0, 0, 0, 0, 0, 0, 0, # 48 - 4f
  94. 0, 0, 0, 0, 0, 0, 0, 0, # 50 - 57
  95. 0, 0, 0, 0, 0, 0, 0, 0, # 58 - 5f
  96. 0, 0, 0, 0, 0, 0, 0, 0, # 60 - 67
  97. 0, 0, 0, 0, 0, 0, 0, 0, # 68 - 6f
  98. 0, 0, 0, 0, 0, 0, 0, 0, # 70 - 77
  99. 0, 0, 0, 0, 0, 0, 0, 0, # 78 - 7f
  100. 2, 2, 2, 2, 2, 2, 2, 2, # 80 - 87
  101. 2, 2, 2, 2, 2, 2, 2, 2, # 88 - 8f
  102. 2, 2, 2, 2, 2, 2, 2, 2, # 90 - 97
  103. 2, 2, 2, 2, 2, 2, 2, 2, # 98 - 9f
  104. 2, 2, 2, 2, 2, 2, 2, 2, # a0 - a7
  105. 2, 2, 2, 2, 2, 2, 2, 2, # a8 - af
  106. 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7
  107. 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf
  108. 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7
  109. 2, 2, 2, 2, 2, 2, 2, 2, # c8 - cf
  110. 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7
  111. 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df
  112. 2, 2, 2, 2, 2, 2, 2, 2, # e0 - e7
  113. 2, 2, 2, 2, 2, 2, 2, 2, # e8 - ef
  114. 2, 2, 2, 2, 2, 2, 2, 2, # f0 - f7
  115. 2, 2, 2, 2, 2, 2, 2, 2, # f8 - ff
  116. )
  117. ISO2022CN_ST = (
  118. MachineState.START, 3, MachineState.ERROR, MachineState.START, MachineState.START, MachineState.START, MachineState.START, MachineState.START, # 00-07
  119. MachineState.START, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 08-0f
  120. MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, # 10-17
  121. MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, 4, MachineState.ERROR, # 18-1f
  122. MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 20-27
  123. 5, 6, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 28-2f
  124. MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 30-37
  125. MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ERROR, MachineState.START, # 38-3f
  126. )
  127. # fmt: on
  128. ISO2022CN_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0)
  129. ISO2022CN_SM_MODEL: CodingStateMachineDict = {
  130. "class_table": ISO2022CN_CLS,
  131. "class_factor": 9,
  132. "state_table": ISO2022CN_ST,
  133. "char_len_table": ISO2022CN_CHAR_LEN_TABLE,
  134. "name": "ISO-2022-CN",
  135. "language": "Chinese",
  136. }
  137. # fmt: off
  138. ISO2022JP_CLS = (
  139. 2, 0, 0, 0, 0, 0, 0, 0, # 00 - 07
  140. 0, 0, 0, 0, 0, 0, 2, 2, # 08 - 0f
  141. 0, 0, 0, 0, 0, 0, 0, 0, # 10 - 17
  142. 0, 0, 0, 1, 0, 0, 0, 0, # 18 - 1f
  143. 0, 0, 0, 0, 7, 0, 0, 0, # 20 - 27
  144. 3, 0, 0, 0, 0, 0, 0, 0, # 28 - 2f
  145. 0, 0, 0, 0, 0, 0, 0, 0, # 30 - 37
  146. 0, 0, 0, 0, 0, 0, 0, 0, # 38 - 3f
  147. 6, 0, 4, 0, 8, 0, 0, 0, # 40 - 47
  148. 0, 9, 5, 0, 0, 0, 0, 0, # 48 - 4f
  149. 0, 0, 0, 0, 0, 0, 0, 0, # 50 - 57
  150. 0, 0, 0, 0, 0, 0, 0, 0, # 58 - 5f
  151. 0, 0, 0, 0, 0, 0, 0, 0, # 60 - 67
  152. 0, 0, 0, 0, 0, 0, 0, 0, # 68 - 6f
  153. 0, 0, 0, 0, 0, 0, 0, 0, # 70 - 77
  154. 0, 0, 0, 0, 0, 0, 0, 0, # 78 - 7f
  155. 2, 2, 2, 2, 2, 2, 2, 2, # 80 - 87
  156. 2, 2, 2, 2, 2, 2, 2, 2, # 88 - 8f
  157. 2, 2, 2, 2, 2, 2, 2, 2, # 90 - 97
  158. 2, 2, 2, 2, 2, 2, 2, 2, # 98 - 9f
  159. 2, 2, 2, 2, 2, 2, 2, 2, # a0 - a7
  160. 2, 2, 2, 2, 2, 2, 2, 2, # a8 - af
  161. 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7
  162. 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf
  163. 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7
  164. 2, 2, 2, 2, 2, 2, 2, 2, # c8 - cf
  165. 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7
  166. 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df
  167. 2, 2, 2, 2, 2, 2, 2, 2, # e0 - e7
  168. 2, 2, 2, 2, 2, 2, 2, 2, # e8 - ef
  169. 2, 2, 2, 2, 2, 2, 2, 2, # f0 - f7
  170. 2, 2, 2, 2, 2, 2, 2, 2, # f8 - ff
  171. )
  172. ISO2022JP_ST = (
  173. MachineState.START, 3, MachineState.ERROR, MachineState.START, MachineState.START, MachineState.START, MachineState.START, MachineState.START, # 00-07
  174. MachineState.START, MachineState.START, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 08-0f
  175. MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, # 10-17
  176. MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, # 18-1f
  177. MachineState.ERROR, 5, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, 4, MachineState.ERROR, MachineState.ERROR, # 20-27
  178. MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, 6, MachineState.ITS_ME, MachineState.ERROR, MachineState.ITS_ME, MachineState.ERROR, # 28-2f
  179. MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ITS_ME, # 30-37
  180. MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 38-3f
  181. MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ERROR, MachineState.START, MachineState.START, # 40-47
  182. )
  183. # fmt: on
  184. ISO2022JP_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0, 0, 0, 0, 0)
  185. ISO2022JP_SM_MODEL: CodingStateMachineDict = {
  186. "class_table": ISO2022JP_CLS,
  187. "class_factor": 10,
  188. "state_table": ISO2022JP_ST,
  189. "char_len_table": ISO2022JP_CHAR_LEN_TABLE,
  190. "name": "ISO-2022-JP",
  191. "language": "Japanese",
  192. }
  193. # fmt: off
  194. ISO2022KR_CLS = (
  195. 2, 0, 0, 0, 0, 0, 0, 0, # 00 - 07
  196. 0, 0, 0, 0, 0, 0, 0, 0, # 08 - 0f
  197. 0, 0, 0, 0, 0, 0, 0, 0, # 10 - 17
  198. 0, 0, 0, 1, 0, 0, 0, 0, # 18 - 1f
  199. 0, 0, 0, 0, 3, 0, 0, 0, # 20 - 27
  200. 0, 4, 0, 0, 0, 0, 0, 0, # 28 - 2f
  201. 0, 0, 0, 0, 0, 0, 0, 0, # 30 - 37
  202. 0, 0, 0, 0, 0, 0, 0, 0, # 38 - 3f
  203. 0, 0, 0, 5, 0, 0, 0, 0, # 40 - 47
  204. 0, 0, 0, 0, 0, 0, 0, 0, # 48 - 4f
  205. 0, 0, 0, 0, 0, 0, 0, 0, # 50 - 57
  206. 0, 0, 0, 0, 0, 0, 0, 0, # 58 - 5f
  207. 0, 0, 0, 0, 0, 0, 0, 0, # 60 - 67
  208. 0, 0, 0, 0, 0, 0, 0, 0, # 68 - 6f
  209. 0, 0, 0, 0, 0, 0, 0, 0, # 70 - 77
  210. 0, 0, 0, 0, 0, 0, 0, 0, # 78 - 7f
  211. 2, 2, 2, 2, 2, 2, 2, 2, # 80 - 87
  212. 2, 2, 2, 2, 2, 2, 2, 2, # 88 - 8f
  213. 2, 2, 2, 2, 2, 2, 2, 2, # 90 - 97
  214. 2, 2, 2, 2, 2, 2, 2, 2, # 98 - 9f
  215. 2, 2, 2, 2, 2, 2, 2, 2, # a0 - a7
  216. 2, 2, 2, 2, 2, 2, 2, 2, # a8 - af
  217. 2, 2, 2, 2, 2, 2, 2, 2, # b0 - b7
  218. 2, 2, 2, 2, 2, 2, 2, 2, # b8 - bf
  219. 2, 2, 2, 2, 2, 2, 2, 2, # c0 - c7
  220. 2, 2, 2, 2, 2, 2, 2, 2, # c8 - cf
  221. 2, 2, 2, 2, 2, 2, 2, 2, # d0 - d7
  222. 2, 2, 2, 2, 2, 2, 2, 2, # d8 - df
  223. 2, 2, 2, 2, 2, 2, 2, 2, # e0 - e7
  224. 2, 2, 2, 2, 2, 2, 2, 2, # e8 - ef
  225. 2, 2, 2, 2, 2, 2, 2, 2, # f0 - f7
  226. 2, 2, 2, 2, 2, 2, 2, 2, # f8 - ff
  227. )
  228. ISO2022KR_ST = (
  229. MachineState.START, 3, MachineState.ERROR, MachineState.START, MachineState.START, MachineState.START, MachineState.ERROR, MachineState.ERROR, # 00-07
  230. MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ITS_ME, # 08-0f
  231. MachineState.ITS_ME, MachineState.ITS_ME, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, 4, MachineState.ERROR, MachineState.ERROR, # 10-17
  232. MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, 5, MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, # 18-1f
  233. MachineState.ERROR, MachineState.ERROR, MachineState.ERROR, MachineState.ITS_ME, MachineState.START, MachineState.START, MachineState.START, MachineState.START, # 20-27
  234. )
  235. # fmt: on
  236. ISO2022KR_CHAR_LEN_TABLE = (0, 0, 0, 0, 0, 0)
  237. ISO2022KR_SM_MODEL: CodingStateMachineDict = {
  238. "class_table": ISO2022KR_CLS,
  239. "class_factor": 6,
  240. "state_table": ISO2022KR_ST,
  241. "char_len_table": ISO2022KR_CHAR_LEN_TABLE,
  242. "name": "ISO-2022-KR",
  243. "language": "Korean",
  244. }