diff options
author | xiubuzhe <xiubuzhe@sina.com> | 2023-10-08 20:59:00 +0800 |
---|---|---|
committer | xiubuzhe <xiubuzhe@sina.com> | 2023-10-08 20:59:00 +0800 |
commit | 1dac2263372df2b85db5d029a45721fa158a5c9d (patch) | |
tree | 0365f9c57df04178a726d7584ca6a6b955a7ce6a /lib/chardet/enums.py | |
parent | b494be364bb39e1de128ada7dc576a729d99907e (diff) | |
download | sunhpc-1dac2263372df2b85db5d029a45721fa158a5c9d.tar.gz sunhpc-1dac2263372df2b85db5d029a45721fa158a5c9d.tar.bz2 sunhpc-1dac2263372df2b85db5d029a45721fa158a5c9d.zip |
first add files
Diffstat (limited to 'lib/chardet/enums.py')
-rw-r--r-- | lib/chardet/enums.py | 82 |
1 files changed, 82 insertions, 0 deletions
diff --git a/lib/chardet/enums.py b/lib/chardet/enums.py new file mode 100644 index 0000000..32a77e7 --- /dev/null +++ b/lib/chardet/enums.py @@ -0,0 +1,82 @@ +""" +All of the Enums that are used throughout the chardet package. + +:author: Dan Blanchard (dan.blanchard@gmail.com) +""" + + +class InputState: + """ + This enum represents the different states a universal detector can be in. + """ + + PURE_ASCII = 0 + ESC_ASCII = 1 + HIGH_BYTE = 2 + + +class LanguageFilter: + """ + This enum represents the different language filters we can apply to a + ``UniversalDetector``. + """ + + CHINESE_SIMPLIFIED = 0x01 + CHINESE_TRADITIONAL = 0x02 + JAPANESE = 0x04 + KOREAN = 0x08 + NON_CJK = 0x10 + ALL = 0x1F + CHINESE = CHINESE_SIMPLIFIED | CHINESE_TRADITIONAL + CJK = CHINESE | JAPANESE | KOREAN + + +class ProbingState: + """ + This enum represents the different states a prober can be in. + """ + + DETECTING = 0 + FOUND_IT = 1 + NOT_ME = 2 + + +class MachineState: + """ + This enum represents the different states a state machine can be in. + """ + + START = 0 + ERROR = 1 + ITS_ME = 2 + + +class SequenceLikelihood: + """ + This enum represents the likelihood of a character following the previous one. + """ + + NEGATIVE = 0 + UNLIKELY = 1 + LIKELY = 2 + POSITIVE = 3 + + @classmethod + def get_num_categories(cls): + """:returns: The number of likelihood categories in the enum.""" + return 4 + + +class CharacterCategory: + """ + This enum represents the different categories language models for + ``SingleByteCharsetProber`` put characters into. + + Anything less than CONTROL is considered a letter. + """ + + UNDEFINED = 255 + LINE_BREAK = 254 + SYMBOL = 253 + DIGIT = 252 + CONTROL = 251 |