From 1dac2263372df2b85db5d029a45721fa158a5c9d Mon Sep 17 00:00:00 2001 From: xiubuzhe Date: Sun, 8 Oct 2023 20:59:00 +0800 Subject: first add files --- lib/chardet/cli/__init__.py | 0 lib/chardet/cli/chardetect.py | 86 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 lib/chardet/cli/__init__.py create mode 100644 lib/chardet/cli/chardetect.py (limited to 'lib/chardet/cli') diff --git a/lib/chardet/cli/__init__.py b/lib/chardet/cli/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/chardet/cli/chardetect.py b/lib/chardet/cli/chardetect.py new file mode 100644 index 0000000..7926fa3 --- /dev/null +++ b/lib/chardet/cli/chardetect.py @@ -0,0 +1,86 @@ +""" +Script which takes one or more file paths and reports on their detected +encodings + +Example:: + + % chardetect somefile someotherfile + somefile: windows-1252 with confidence 0.5 + someotherfile: ascii with confidence 1.0 + +If no paths are provided, it takes its input from stdin. + +""" + + +import argparse +import sys + +from .. import __version__ +from ..universaldetector import UniversalDetector + + +def description_of(lines, name="stdin"): + """ + Return a string describing the probable encoding of a file or + list of strings. + + :param lines: The lines to get the encoding of. + :type lines: Iterable of bytes + :param name: Name of file or collection of lines + :type name: str + """ + u = UniversalDetector() + for line in lines: + line = bytearray(line) + u.feed(line) + # shortcut out of the loop to save reading further - particularly useful if we read a BOM. + if u.done: + break + u.close() + result = u.result + if result["encoding"]: + return f'{name}: {result["encoding"]} with confidence {result["confidence"]}' + return f"{name}: no result" + + +def main(argv=None): + """ + Handles command line arguments and gets things started. + + :param argv: List of arguments, as if specified on the command-line. + If None, ``sys.argv[1:]`` is used instead. + :type argv: list of str + """ + # Get command line arguments + parser = argparse.ArgumentParser( + description="Takes one or more file paths and reports their detected \ + encodings" + ) + parser.add_argument( + "input", + help="File whose encoding we would like to determine. \ + (default: stdin)", + type=argparse.FileType("rb"), + nargs="*", + default=[sys.stdin.buffer], + ) + parser.add_argument( + "--version", action="version", version=f"%(prog)s {__version__}" + ) + args = parser.parse_args(argv) + + for f in args.input: + if f.isatty(): + print( + "You are running chardetect interactively. Press " + "CTRL-D twice at the start of a blank line to signal the " + "end of your input. If you want help, run chardetect " + "--help\n", + file=sys.stderr, + ) + print(description_of(f, f.name)) + + +if __name__ == "__main__": + main() -- cgit v1.2.3