summaryrefslogtreecommitdiffstats
path: root/lib/chardet/enums.py
diff options
context:
space:
mode:
authorxiubuzhe <xiubuzhe@sina.com>2023-10-08 20:59:00 +0800
committerxiubuzhe <xiubuzhe@sina.com>2023-10-08 20:59:00 +0800
commit1dac2263372df2b85db5d029a45721fa158a5c9d (patch)
tree0365f9c57df04178a726d7584ca6a6b955a7ce6a /lib/chardet/enums.py
parentb494be364bb39e1de128ada7dc576a729d99907e (diff)
downloadsunhpc-1dac2263372df2b85db5d029a45721fa158a5c9d.tar.gz
sunhpc-1dac2263372df2b85db5d029a45721fa158a5c9d.tar.bz2
sunhpc-1dac2263372df2b85db5d029a45721fa158a5c9d.zip
first add files
Diffstat (limited to 'lib/chardet/enums.py')
-rw-r--r--lib/chardet/enums.py82
1 files changed, 82 insertions, 0 deletions
diff --git a/lib/chardet/enums.py b/lib/chardet/enums.py
new file mode 100644
index 0000000..32a77e7
--- /dev/null
+++ b/lib/chardet/enums.py
@@ -0,0 +1,82 @@
+"""
+All of the Enums that are used throughout the chardet package.
+
+:author: Dan Blanchard (dan.blanchard@gmail.com)
+"""
+
+
+class InputState:
+ """
+ This enum represents the different states a universal detector can be in.
+ """
+
+ PURE_ASCII = 0
+ ESC_ASCII = 1
+ HIGH_BYTE = 2
+
+
+class LanguageFilter:
+ """
+ This enum represents the different language filters we can apply to a
+ ``UniversalDetector``.
+ """
+
+ CHINESE_SIMPLIFIED = 0x01
+ CHINESE_TRADITIONAL = 0x02
+ JAPANESE = 0x04
+ KOREAN = 0x08
+ NON_CJK = 0x10
+ ALL = 0x1F
+ CHINESE = CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL
+ CJK = CHINESE | JAPANESE | KOREAN
+
+
+class ProbingState:
+ """
+ This enum represents the different states a prober can be in.
+ """
+
+ DETECTING = 0
+ FOUND_IT = 1
+ NOT_ME = 2
+
+
+class MachineState:
+ """
+ This enum represents the different states a state machine can be in.
+ """
+
+ START = 0
+ ERROR = 1
+ ITS_ME = 2
+
+
+class SequenceLikelihood:
+ """
+ This enum represents the likelihood of a character following the previous one.
+ """
+
+ NEGATIVE = 0
+ UNLIKELY = 1
+ LIKELY = 2
+ POSITIVE = 3
+
+ @classmethod
+ def get_num_categories(cls):
+ """:returns: The number of likelihood categories in the enum."""
+ return 4
+
+
+class CharacterCategory:
+ """
+ This enum represents the different categories language models for
+ ``SingleByteCharsetProber`` put characters into.
+
+ Anything less than CONTROL is considered a letter.
+ """
+
+ UNDEFINED = 255
+ LINE_BREAK = 254
+ SYMBOL = 253
+ DIGIT = 252
+ CONTROL = 251