enums.py 1.6 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. """
  2. All of the Enums that are used throughout the chardet package.
  3. :author: Dan Blanchard (dan.blanchard@gmail.com)
  4. """
  5. from enum import Enum, Flag
  6. class InputState:
  7. """
  8. This enum represents the different states a universal detector can be in.
  9. """
  10. PURE_ASCII = 0
  11. ESC_ASCII = 1
  12. HIGH_BYTE = 2
  13. class LanguageFilter(Flag):
  14. """
  15. This enum represents the different language filters we can apply to a
  16. ``UniversalDetector``.
  17. """
  18. NONE = 0x00
  19. CHINESE_SIMPLIFIED = 0x01
  20. CHINESE_TRADITIONAL = 0x02
  21. JAPANESE = 0x04
  22. KOREAN = 0x08
  23. NON_CJK = 0x10
  24. ALL = 0x1F
  25. CHINESE = CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL
  26. CJK = CHINESE | JAPANESE | KOREAN
  27. class ProbingState(Enum):
  28. """
  29. This enum represents the different states a prober can be in.
  30. """
  31. DETECTING = 0
  32. FOUND_IT = 1
  33. NOT_ME = 2
  34. class MachineState:
  35. """
  36. This enum represents the different states a state machine can be in.
  37. """
  38. START = 0
  39. ERROR = 1
  40. ITS_ME = 2
  41. class SequenceLikelihood:
  42. """
  43. This enum represents the likelihood of a character following the previous one.
  44. """
  45. NEGATIVE = 0
  46. UNLIKELY = 1
  47. LIKELY = 2
  48. POSITIVE = 3
  49. @classmethod
  50. def get_num_categories(cls) -> int:
  51. """:returns: The number of likelihood categories in the enum."""
  52. return 4
  53. class CharacterCategory:
  54. """
  55. This enum represents the different categories language models for
  56. ``SingleByteCharsetProber`` put characters into.
  57. Anything less than CONTROL is considered a letter.
  58. """
  59. UNDEFINED = 255
  60. LINE_BREAK = 254
  61. SYMBOL = 253
  62. DIGIT = 252
  63. CONTROL = 251