Merge branch 'main' into code

cfi-gb · Nov 23, 2024 · e98708b · e98708b
2 parents df4c1d2 + 07a4645
commit e98708b
Show file tree

Hide file tree

Showing 15 changed files with 2,101 additions and 115 deletions.
diff --git a/.github/workflows/codespell-private.yml b/.github/workflows/codespell-private.yml
@@ -14,7 +14,7 @@ jobs:
       REQUIRE_ASPELL: true
       RUFF_OUTPUT_FORMAT: github
     # Make sure we're using the latest aspell dictionary
-    runs-on: ubuntu-22.04
+    runs-on: ubuntu-latest
     timeout-minutes: 10
     strategy:
       fail-fast: false
@@ -25,6 +25,7 @@ jobs:
           - "3.10"
           - "3.11"
           - "3.12"
+          - "3.13"
         no-toml:
           - ""
         include:
@@ -39,6 +40,7 @@ jobs:
         uses: actions/setup-python@v5
         with:
           python-version: ${{ matrix.python-version }}
+          allow-prereleases: true
       - run: sudo apt-get install libaspell-dev aspell-en
       - name: Install dependencies
         run: |
@@ -50,7 +52,7 @@ jobs:
       - run: codespell --help
       - run: codespell --version
       - run: make check
-      - uses: codecov/codecov-action@v4
+      - uses: codecov/codecov-action@v5
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
       # tomli should not be required for the next two steps (and make sure it's not)

diff --git a/.github/workflows/codespell-windows.yml b/.github/workflows/codespell-windows.yml
@@ -25,6 +25,6 @@ jobs:
       - run: codespell --help
       - run: codespell --version
       - run: pytest codespell_lib
-      - uses: codecov/codecov-action@v4
+      - uses: codecov/codecov-action@v5
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -6,10 +6,10 @@ on:
     types: [published]
   push:
     branches:
-      - master
+      - main
   pull_request:
     branches:
-      - master
+      - main
 
 permissions:
   contents: read

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -4,7 +4,7 @@ exclude: ^(\.[^/]*cache/.*)$
 repos:
   - repo: https://github.com/executablebooks/mdformat
     # Do this before other tools "fixing" the line endings
-    rev: 0.7.17
+    rev: 0.7.18
     hooks:
       - id: mdformat
         name: Format Markdown
@@ -26,10 +26,10 @@ repos:
     hooks:
       - id: rst-linter
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.6.0
+    rev: v5.0.0
     hooks:
       - id: no-commit-to-branch
-        args: [--branch, master]
+        args: [--branch, main]
       - id: check-yaml
         args: [--unsafe]
       - id: debug-statements
@@ -58,7 +58,7 @@ repos:
           - -d
           - "{extends: relaxed, rules: {line-length: {max: 90}}}"
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.4.9
+    rev: v0.7.3
     hooks:
       - id: ruff
       - id: ruff-format
@@ -75,11 +75,11 @@ repos:
         additional_dependencies:
           - tomli
   - repo: https://github.com/abravalheri/validate-pyproject
-    rev: v0.18
+    rev: v0.23
     hooks:
       - id: validate-pyproject
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.10.0
+    rev: v1.13.0
     hooks:
       - id: mypy
         args: ["--config-file", "pyproject.toml"]

diff --git a/README.rst b/README.rst
@@ -119,7 +119,7 @@ You can select the optional dictionaries with the ``--builtin`` option.
 Ignoring words
 --------------
 
-When ignoring false positives, note that spelling errors are *case-insensitive* but words to ignore are *case-sensitive*. For example, the dictionary entry ``wrod`` will also match the typo ``Wrod``, but to ignore it you must pass ``wrod``.
+When ignoring false positives, note that spelling errors are *case-insensitive* but words to ignore are *case-sensitive*. For example, the dictionary entry ``wrod`` will also match the typo ``Wrod``, but to ignore it you must pass ``Wrod``.
 
 The words to ignore can be passed in two ways:
 
@@ -161,7 +161,8 @@ Using a config file
 
 Command line options can also be specified in a config file.
 
-When running ``codespell``, it will check in the current directory for a file
+When running ``codespell``, it will check in the current directory for an
+`INI file <https://en.wikipedia.org/wiki/INI_file>`_
 named ``setup.cfg`` or ``.codespellrc`` (or a file specified via ``--config``),
 containing an entry named ``[codespell]``. Each command line argument can
 be specified in this file (without the preceding dashes), for example:
@@ -173,15 +174,16 @@ be specified in this file (without the preceding dashes), for example:
     count =
     quiet-level = 3
 
-The ``.codespellrc`` file is an `INI file <https://en.wikipedia.org/wiki/INI_file>`_,
-which is read using Python's
-`configparser <https://docs.python.org/3/library/configparser.html#supported-ini-file-structure>`_.
-For example, comments are possible using ``;`` or ``#`` as the first character.
+Python's
+`configparser <https://docs.python.org/3/library/configparser.html#supported-ini-file-structure>`_
+module defines the exact format of INI config files. For example,
+comments are possible using ``;`` or ``#`` as the first character.
 
 Codespell will also check in the current directory for a ``pyproject.toml``
-(or a path can be specified via ``--toml <filename>``) file, and the
-``[tool.codespell]`` entry will be used, but only if the tomli_ package
-is installed for versions of Python prior to 3.11. For example:
+file (or a file specified via ``--toml``), and the ``[tool.codespell]``
+entry will be used. For versions of Python prior to 3.11, this requires
+the tomli_ package. For example, here is the TOML equivalent of the
+previous config file:
 
 .. code-block:: toml
 
@@ -190,25 +192,40 @@ is installed for versions of Python prior to 3.11. For example:
     count = true
     quiet-level = 3
 
-These are both equivalent to running:
+The above INI and TOML files are equivalent to running:
 
 .. code-block:: sh
 
-    codespell --quiet-level 3 --count --skip "*.po,*.ts,./src/3rdParty,./src/Test"
+    codespell --skip "*.po,*.ts,./src/3rdParty,./src/Test" --count --quiet-level 3
 
 If several config files are present, they are read in the following order:
 
-#. ``pyproject.toml`` (only if the ``tomli`` library is available)
+#. ``pyproject.toml`` (only if the ``tomli`` library is available for Python < 3.11)
 #. ``setup.cfg``
 #. ``.codespellrc``
 #. any additional file supplied via ``--config``
 
 If a codespell configuration is supplied in several of these files,
 the configuration from the most recently read file overwrites previously
-specified configurations.
+specified configurations. Any options specified in the command line will
+*override* options from the config files.
 
-Any options specified in the command line will *override* options from the
-config files.
+Values in a config file entry cannot start with a ``-`` character, so if
+you need to do this, structure your entries like this:
+
+.. code-block:: ini
+
+    [codespell]
+    dictionary = mydict,-
+    ignore-words = bar,-foo
+
+instead of these invalid entries:
+
+.. code-block:: ini
+
+    [codespell]
+    dictionary = -,mydict
+    ignore-words = -foo,bar
 
 .. _tomli: https://pypi.org/project/tomli/
 
@@ -347,13 +364,13 @@ In the scenario where the user prefers not to follow the development version of
 
 .. code-block:: sh
 
-    wget https://raw.githubusercontent.com/codespell-project/codespell/master/codespell_lib/data/dictionary.txt
+    wget https://raw.githubusercontent.com/codespell-project/codespell/main/codespell_lib/data/dictionary.txt
     codespell -D dictionary.txt
 
 The above simply downloads the latest ``dictionary.txt`` file and then by utilizing the ``-D`` flag allows the user to specify the freshly downloaded ``dictionary.txt`` as the custom dictionary instead of the default one.
 
 You can also do the same thing for the other dictionaries listed here:
-    https://github.com/codespell-project/codespell/tree/master/codespell_lib/data
+    https://github.com/codespell-project/codespell/tree/main/codespell_lib/data
 
 License
 -------

diff --git a/codespell_lib/__init__.py b/codespell_lib/__init__.py
@@ -1,4 +1,4 @@
 from ._codespell import _script_main, main
 from ._version import __version__  # type: ignore[import-not-found]
 
-__all__ = ["_script_main", "main", "__version__"]
+__all__ = ["__version__", "_script_main", "main"]
diff --git a/codespell_lib/__main__.py b/codespell_lib/__main__.py
@@ -3,7 +3,4 @@
 from ._codespell import _script_main
 
 if __name__ == "__main__":
-    try:
-        sys.exit(_script_main())
-    except KeyboardInterrupt:
-        pass
+    sys.exit(_script_main())
diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py
@@ -25,7 +25,6 @@
 import re
 import sys
 import textwrap
-from ctypes import wintypes
 from typing import (
     Any,
     Dict,
@@ -36,9 +35,16 @@
     Pattern,
     Sequence,
     Set,
+    TextIO,
     Tuple,
 )
 
+if sys.platform == "win32":
+    from ctypes import wintypes
+
+    ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x0004
+    STD_OUTPUT_HANDLE = wintypes.HANDLE(-11)
+
 from ._spellchecker import Misspelling, build_dict
 from ._text_util import fix_case
 
@@ -137,10 +143,6 @@
 EX_DATAERR = 65
 EX_CONFIG = 78
 
-# Windows specific constants
-ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x0004
-STD_OUTPUT_HANDLE = wintypes.HANDLE(-11)
-
 # OPTIONS:
 #
 # ARGUMENTS:
@@ -201,11 +203,17 @@ def __str__(self) -> str:
 
 
 class FileOpener:
-    def __init__(self, use_chardet: bool, quiet_level: int) -> None:
+    def __init__(
+        self,
+        use_chardet: bool,
+        quiet_level: int,
+        ignore_multiline_regex: Optional[Pattern[str]],
+    ) -> None:
         self.use_chardet = use_chardet
         if use_chardet:
             self.init_chardet()
         self.quiet_level = quiet_level
+        self.ignore_multiline_regex = ignore_multiline_regex
 
     def init_chardet(self) -> None:
         try:
@@ -247,7 +255,7 @@ def open_with_chardet(self, filename: str) -> Tuple[List[str], str]:
             )
             raise
         else:
-            lines = f.readlines()
+            lines = self.get_lines(f)
             f.close()
 
         return lines, f.encoding
@@ -262,7 +270,7 @@ def open_with_internal(self, filename: str) -> Tuple[List[str], str]:
                 print(f'WARNING: Trying next encoding "{encoding}"', file=sys.stderr)
             with open(filename, encoding=encoding, newline="") as f:
                 try:
-                    lines = f.readlines()
+                    lines = self.get_lines(f)
                 except UnicodeDecodeError:
                     if not self.quiet_level & QuietLevels.ENCODING:
                         print(
@@ -279,6 +287,22 @@ def open_with_internal(self, filename: str) -> Tuple[List[str], str]:
 
         return lines, encoding
 
+    def get_lines(self, f: TextIO) -> List[str]:
+        if self.ignore_multiline_regex:
+            text = f.read()
+            pos = 0
+            text2 = ""
+            for m in re.finditer(self.ignore_multiline_regex, text):
+                text2 += text[pos : m.start()]
+                # Replace with blank lines so line numbers are unchanged.
+                text2 += "\n" * m.group().count("\n")
+                pos = m.end()
+            text2 += text[pos:]
+            lines = text2.split("\n")
+        else:
+            lines = f.readlines()
+        return lines
+
 
 # -.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:-.-:-.-:-.-:-.:-.-:-
 
@@ -411,6 +435,19 @@ def parse_options(
         'e.g., "\\bmatch\\b". Defaults to '
         "empty/disabled.",
     )
+    parser.add_argument(
+        "--ignore-multiline-regex",
+        action="store",
+        type=str,
+        help="regular expression that is used to ignore "
+        "text that may span multi-line regions. "
+        "The regex is run with re.DOTALL. For example to "
+        "allow skipping of regions of Python code using "
+        "begin/end comments one could use: "
+        "--ignore-multiline-regex "
+        "'# codespell:ignore-begin *\\n.*# codespell:ignore-end *\\n'. "
+        "Defaults to empty/disabled.",
+    )
     parser.add_argument(
         "-I",
         "--ignore-words",
@@ -1063,7 +1100,12 @@ def flatten_clean_comma_separated_arguments(
 
 def _script_main() -> int:
     """Wrap to main() for setuptools."""
-    return main(*sys.argv[1:])
+    try:
+        return main(*sys.argv[1:])
+    except KeyboardInterrupt:
+        # User has typed CTRL+C
+        sys.stdout.write("\n")
+        return 130
 
 
 def _usage_error(parser: argparse.ArgumentParser, message: str) -> int:
@@ -1090,6 +1132,9 @@ def main(*args: str) -> int:
         for ifile, cfg_file in enumerate(used_cfg_files, start=1):
             print(f"    {ifile}: {cfg_file}")
 
+    if options.interactive > 0:
+        options.write_changes = True
+
     if options.regex and options.write_changes:
         return _usage_error(
             parser,
@@ -1115,6 +1160,20 @@ def main(*args: str) -> int:
     else:
         ignore_word_regex = None
 
+    if options.ignore_multiline_regex:
+        try:
+            ignore_multiline_regex = re.compile(
+                options.ignore_multiline_regex, re.DOTALL
+            )
+        except re.error as e:
+            return _usage_error(
+                parser,
+                f"ERROR: invalid --ignore-multiline-regex "
+                f'"{options.ignore_multiline_regex}" ({e})',
+            )
+    else:
+        ignore_multiline_regex = None
+
     ignore_words, ignore_words_cased = parse_ignore_words_option(
         options.ignore_words_list
     )
@@ -1203,7 +1262,11 @@ def main(*args: str) -> int:
         for exclude_file in exclude_files:
             build_exclude_hashes(exclude_file, exclude_lines)
 
-    file_opener = FileOpener(options.hard_encoding_detection, options.quiet_level)
+    file_opener = FileOpener(
+        options.hard_encoding_detection,
+        options.quiet_level,
+        ignore_multiline_regex,
+    )
 
     glob_match = GlobMatch(
         flatten_clean_comma_separated_arguments(options.skip) if options.skip else []