From 7391f74cbcf24b9c09244a3376bbb2a0ca8929d4 Mon Sep 17 00:00:00 2001 From: Felix Williams <87987318+felix-cw@users.noreply.github.com> Date: Wed, 8 Nov 2023 02:32:40 +0000 Subject: [PATCH] Add hidden `--extension` to override inference of source type from file extension (#8373) ## Summary This PR addresses the incompatibility with `jupyterlab-lsp` + `python-lsp-ruff` arising from the inference of source type from file extension, raised in #6847. In particular it follows the suggestion in https://github.com/astral-sh/ruff/issues/6847#issuecomment-1765724679 to specify a mapping from file extension to source type. The source types are - python - pyi - ipynb Usage: ```sh ruff check --no-cache --stdin-filename Untitled.ipynb --extension ipynb:python ``` Unlike the original suggestion, `:` instead of `=` is used to associate file extensions to language since that is what is used with `--per-file-ignores` which is an existing option that accepts a mapping. ## Test Plan 2 tests added to `integration_test.rs` to ensure the override works as expected --------- Co-authored-by: Charlie Marsh --- crates/ruff_cli/src/args.rs | 12 ++- crates/ruff_cli/src/commands/check.rs | 2 + crates/ruff_cli/src/diagnostics.rs | 72 ++++++++----- crates/ruff_cli/tests/integration_test.rs | 113 ++++++++++++++++++++ crates/ruff_linter/src/settings/mod.rs | 4 +- crates/ruff_linter/src/settings/types.rs | 117 ++++++++++++++++++++- crates/ruff_workspace/src/configuration.rs | 10 +- 7 files changed, 296 insertions(+), 34 deletions(-) diff --git a/crates/ruff_cli/src/args.rs b/crates/ruff_cli/src/args.rs index 8d2756c082bae..d14c0a965870c 100644 --- a/crates/ruff_cli/src/args.rs +++ b/crates/ruff_cli/src/args.rs @@ -8,8 +8,8 @@ use ruff_linter::line_width::LineLength; use ruff_linter::logging::LogLevel; use ruff_linter::registry::Rule; use ruff_linter::settings::types::{ - FilePattern, PatternPrefixPair, PerFileIgnore, PreviewMode, PythonVersion, SerializationFormat, - UnsafeFixes, + ExtensionPair, FilePattern, PatternPrefixPair, PerFileIgnore, PreviewMode, PythonVersion, + SerializationFormat, UnsafeFixes, }; use ruff_linter::{RuleParser, RuleSelector, RuleSelectorParser}; use ruff_workspace::configuration::{Configuration, RuleSelection}; @@ -351,6 +351,9 @@ pub struct CheckCommand { conflicts_with = "watch", )] pub show_settings: bool, + /// List of mappings from file extension to language (one of ["python", "ipynb", "pyi"]). + #[arg(long, value_delimiter = ',', hide = true)] + pub extension: Option>, /// Dev-only argument to show fixes #[arg(long, hide = true)] pub ecosystem_ci: bool, @@ -535,6 +538,7 @@ impl CheckCommand { force_exclude: resolve_bool_arg(self.force_exclude, self.no_force_exclude), output_format: self.output_format, show_fixes: resolve_bool_arg(self.show_fixes, self.no_show_fixes), + extension: self.extension, }, ) } @@ -647,6 +651,7 @@ pub struct CliOverrides { pub force_exclude: Option, pub output_format: Option, pub show_fixes: Option, + pub extension: Option>, } impl ConfigurationTransformer for CliOverrides { @@ -731,6 +736,9 @@ impl ConfigurationTransformer for CliOverrides { if let Some(target_version) = &self.target_version { config.target_version = Some(*target_version); } + if let Some(extension) = &self.extension { + config.lint.extension = Some(extension.clone().into_iter().collect()); + } config } diff --git a/crates/ruff_cli/src/commands/check.rs b/crates/ruff_cli/src/commands/check.rs index 7d63e299f4fa1..c4e63f3e86b6e 100644 --- a/crates/ruff_cli/src/commands/check.rs +++ b/crates/ruff_cli/src/commands/check.rs @@ -30,6 +30,7 @@ use crate::diagnostics::Diagnostics; use crate::panic::catch_unwind; /// Run the linter over a collection of files. +#[allow(clippy::too_many_arguments)] pub(crate) fn check( files: &[PathBuf], pyproject_config: &PyprojectConfig, @@ -184,6 +185,7 @@ pub(crate) fn check( /// Wraps [`lint_path`](crate::diagnostics::lint_path) in a [`catch_unwind`](std::panic::catch_unwind) and emits /// a diagnostic if the linting the file panics. +#[allow(clippy::too_many_arguments)] fn lint_path( path: &Path, package: Option<&Path>, diff --git a/crates/ruff_cli/src/diagnostics.rs b/crates/ruff_cli/src/diagnostics.rs index e7627a9c2a52d..076963fee53b1 100644 --- a/crates/ruff_cli/src/diagnostics.rs +++ b/crates/ruff_cli/src/diagnostics.rs @@ -17,13 +17,13 @@ use ruff_linter::logging::DisplayParseError; use ruff_linter::message::Message; use ruff_linter::pyproject_toml::lint_pyproject_toml; use ruff_linter::registry::AsRule; -use ruff_linter::settings::types::UnsafeFixes; +use ruff_linter::settings::types::{ExtensionMapping, UnsafeFixes}; use ruff_linter::settings::{flags, LinterSettings}; use ruff_linter::source_kind::{SourceError, SourceKind}; use ruff_linter::{fs, IOError, SyntaxError}; use ruff_notebook::{Notebook, NotebookError, NotebookIndex}; use ruff_python_ast::imports::ImportMap; -use ruff_python_ast::{SourceType, TomlSourceType}; +use ruff_python_ast::{PySourceType, SourceType, TomlSourceType}; use ruff_source_file::{LineIndex, SourceCode, SourceFileBuilder}; use ruff_text_size::{TextRange, TextSize}; use ruff_workspace::Settings; @@ -177,6 +177,11 @@ impl AddAssign for FixMap { } } +fn override_source_type(path: Option<&Path>, extension: &ExtensionMapping) -> Option { + let ext = path?.extension()?.to_str()?; + extension.get(ext).map(PySourceType::from) +} + /// Lint the source code at the given `Path`. pub(crate) fn lint_path( path: &Path, @@ -221,31 +226,35 @@ pub(crate) fn lint_path( debug!("Checking: {}", path.display()); - let source_type = match SourceType::from(path) { - SourceType::Toml(TomlSourceType::Pyproject) => { - let messages = if settings - .rules - .iter_enabled() - .any(|rule_code| rule_code.lint_source().is_pyproject_toml()) - { - let contents = match std::fs::read_to_string(path).map_err(SourceError::from) { - Ok(contents) => contents, - Err(err) => { - return Ok(Diagnostics::from_source_error(&err, Some(path), settings)); - } + let source_type = match override_source_type(Some(path), &settings.extension) { + Some(source_type) => source_type, + None => match SourceType::from(path) { + SourceType::Toml(TomlSourceType::Pyproject) => { + let messages = if settings + .rules + .iter_enabled() + .any(|rule_code| rule_code.lint_source().is_pyproject_toml()) + { + let contents = match std::fs::read_to_string(path).map_err(SourceError::from) { + Ok(contents) => contents, + Err(err) => { + return Ok(Diagnostics::from_source_error(&err, Some(path), settings)); + } + }; + let source_file = + SourceFileBuilder::new(path.to_string_lossy(), contents).finish(); + lint_pyproject_toml(source_file, settings) + } else { + vec![] }; - let source_file = SourceFileBuilder::new(path.to_string_lossy(), contents).finish(); - lint_pyproject_toml(source_file, settings) - } else { - vec![] - }; - return Ok(Diagnostics { - messages, - ..Diagnostics::default() - }); - } - SourceType::Toml(_) => return Ok(Diagnostics::default()), - SourceType::Python(source_type) => source_type, + return Ok(Diagnostics { + messages, + ..Diagnostics::default() + }); + } + SourceType::Toml(_) => return Ok(Diagnostics::default()), + SourceType::Python(source_type) => source_type, + }, }; // Extract the sources from the file. @@ -370,8 +379,15 @@ pub(crate) fn lint_stdin( fix_mode: flags::FixMode, ) -> Result { // TODO(charlie): Support `pyproject.toml`. - let SourceType::Python(source_type) = path.map(SourceType::from).unwrap_or_default() else { - return Ok(Diagnostics::default()); + let source_type = if let Some(source_type) = + override_source_type(path, &settings.linter.extension) + { + source_type + } else { + let SourceType::Python(source_type) = path.map(SourceType::from).unwrap_or_default() else { + return Ok(Diagnostics::default()); + }; + source_type }; // Extract the sources from the file. diff --git a/crates/ruff_cli/tests/integration_test.rs b/crates/ruff_cli/tests/integration_test.rs index 9153f3d0d5b0d..0cd9b9dd5ba4f 100644 --- a/crates/ruff_cli/tests/integration_test.rs +++ b/crates/ruff_cli/tests/integration_test.rs @@ -320,6 +320,119 @@ fn stdin_fix_jupyter() { Found 2 errors (2 fixed, 0 remaining). "###); } +#[test] +fn stdin_override_parser_ipynb() { + let args = ["--extension", "py:ipynb", "--stdin-filename", "Jupyter.py"]; + assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) + .args(STDIN_BASE_OPTIONS) + .args(args) + .pass_stdin(r#"{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "dccc687c-96e2-4604-b957-a8a89b5bec06", + "metadata": {}, + "outputs": [], + "source": [ + "import os" + ] + }, + { + "cell_type": "markdown", + "id": "19e1b029-f516-4662-a9b9-623b93edac1a", + "metadata": {}, + "source": [ + "Foo" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "cdce7b92-b0fb-4c02-86f6-e233b26fa84f", + "metadata": {}, + "outputs": [], + "source": [ + "import sys" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "e40b33d2-7fe4-46c5-bdf0-8802f3052565", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1\n" + ] + } + ], + "source": [ + "print(1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1899bc8-d46f-4ec0-b1d1-e1ca0f04bf60", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.2" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}"#), @r###" + success: false + exit_code: 1 + ----- stdout ----- + Jupyter.py:cell 1:1:8: F401 [*] `os` imported but unused + Jupyter.py:cell 3:1:8: F401 [*] `sys` imported but unused + Found 2 errors. + [*] 2 fixable with the `--fix` option. + + ----- stderr ----- + "###); +} + +#[test] +fn stdin_override_parser_py() { + assert_cmd_snapshot!(Command::new(get_cargo_bin(BIN_NAME)) + .args(STDIN_BASE_OPTIONS) + .args(["--extension", "ipynb:python", "--stdin-filename", "F401.ipynb"]) + .pass_stdin("import os\n"), @r###" + success: false + exit_code: 1 + ----- stdout ----- + F401.ipynb:1:8: F401 [*] `os` imported but unused + Found 1 error. + [*] 1 fixable with the `--fix` option. + + ----- stderr ----- + "###); +} #[test] fn stdin_fix_when_not_fixable_should_still_print_contents() { diff --git a/crates/ruff_linter/src/settings/mod.rs b/crates/ruff_linter/src/settings/mod.rs index d8cf37eeb2f35..66214a3a65201 100644 --- a/crates/ruff_linter/src/settings/mod.rs +++ b/crates/ruff_linter/src/settings/mod.rs @@ -23,7 +23,7 @@ use crate::rules::{ flake8_tidy_imports, flake8_type_checking, flake8_unused_arguments, isort, mccabe, pep8_naming, pycodestyle, pydocstyle, pyflakes, pylint, pyupgrade, }; -use crate::settings::types::{FilePatternSet, PerFileIgnore, PythonVersion}; +use crate::settings::types::{ExtensionMapping, FilePatternSet, PerFileIgnore, PythonVersion}; use crate::{codes, RuleSelector}; use super::line_width::IndentWidth; @@ -50,6 +50,7 @@ pub struct LinterSettings { pub target_version: PythonVersion, pub preview: PreviewMode, pub explicit_preview_rules: bool, + pub extension: ExtensionMapping, // Rule-specific settings pub allowed_confusables: FxHashSet, @@ -187,6 +188,7 @@ impl LinterSettings { pyupgrade: pyupgrade::settings::Settings::default(), preview: PreviewMode::default(), explicit_preview_rules: false, + extension: ExtensionMapping::default(), } } diff --git a/crates/ruff_linter/src/settings/types.rs b/crates/ruff_linter/src/settings/types.rs index 02ecb314c1c85..8c8d84efb7f34 100644 --- a/crates/ruff_linter/src/settings/types.rs +++ b/crates/ruff_linter/src/settings/types.rs @@ -7,13 +7,15 @@ use std::string::ToString; use anyhow::{bail, Result}; use globset::{Glob, GlobSet, GlobSetBuilder}; use pep440_rs::{Version as Pep440Version, VersionSpecifiers}; -use ruff_diagnostics::Applicability; +use rustc_hash::FxHashMap; use serde::{de, Deserialize, Deserializer, Serialize}; use strum::IntoEnumIterator; use strum_macros::EnumIter; use ruff_cache::{CacheKey, CacheKeyHasher}; +use ruff_diagnostics::Applicability; use ruff_macros::CacheKey; +use ruff_python_ast::PySourceType; use crate::fs; use crate::registry::RuleSet; @@ -289,6 +291,119 @@ impl FromStr for PatternPrefixPair { } } +#[derive( + Clone, + Copy, + Debug, + PartialOrd, + Ord, + PartialEq, + Eq, + Default, + Serialize, + Deserialize, + CacheKey, + EnumIter, +)] +#[serde(rename_all = "lowercase")] +#[cfg_attr(feature = "clap", derive(clap::ValueEnum))] +#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))] +pub enum Language { + #[default] + Python, + Pyi, + Ipynb, +} + +impl FromStr for Language { + type Err = anyhow::Error; + + fn from_str(s: &str) -> Result { + match s.to_ascii_lowercase().as_str() { + "python" => Ok(Self::Python), + "pyi" => Ok(Self::Pyi), + "ipynb" => Ok(Self::Ipynb), + _ => { + bail!("Unrecognized language: `{s}`. Expected one of `python`, `pyi`, or `ipynb`.") + } + } + } +} + +impl From for PySourceType { + fn from(value: Language) -> Self { + match value { + Language::Python => Self::Python, + Language::Ipynb => Self::Ipynb, + Language::Pyi => Self::Stub, + } + } +} + +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct ExtensionPair { + pub extension: String, + pub language: Language, +} + +impl ExtensionPair { + const EXPECTED_PATTERN: &'static str = ": pattern"; +} + +impl FromStr for ExtensionPair { + type Err = anyhow::Error; + + fn from_str(s: &str) -> std::result::Result { + let (extension_str, language_str) = { + let tokens = s.split(':').collect::>(); + if tokens.len() != 2 { + bail!("Expected {}", Self::EXPECTED_PATTERN); + } + (tokens[0].trim(), tokens[1].trim()) + }; + let extension = extension_str.into(); + let language = Language::from_str(language_str)?; + Ok(Self { + extension, + language, + }) + } +} + +impl From for (String, Language) { + fn from(value: ExtensionPair) -> Self { + (value.extension, value.language) + } +} +#[derive(Debug, Clone, Default, CacheKey)] +pub struct ExtensionMapping { + mapping: FxHashMap, +} + +impl ExtensionMapping { + /// Return the [`Language`] for the given extension. + pub fn get(&self, extension: &str) -> Option { + self.mapping.get(extension).copied() + } +} + +impl From> for ExtensionMapping { + fn from(value: FxHashMap) -> Self { + Self { mapping: value } + } +} + +impl FromIterator for ExtensionMapping { + fn from_iter>(iter: T) -> Self { + Self { + mapping: iter + .into_iter() + .map(|pair| (pair.extension, pair.language)) + .collect(), + } + } +} + #[derive(Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Debug, Hash)] #[cfg_attr(feature = "clap", derive(clap::ValueEnum))] #[serde(rename_all = "kebab-case")] diff --git a/crates/ruff_workspace/src/configuration.rs b/crates/ruff_workspace/src/configuration.rs index 163d257b7b8f6..1937596dbbe21 100644 --- a/crates/ruff_workspace/src/configuration.rs +++ b/crates/ruff_workspace/src/configuration.rs @@ -25,8 +25,8 @@ use ruff_linter::rule_selector::{PreviewOptions, Specificity}; use ruff_linter::rules::pycodestyle; use ruff_linter::settings::rule_table::RuleTable; use ruff_linter::settings::types::{ - FilePattern, FilePatternSet, PerFileIgnore, PreviewMode, PythonVersion, SerializationFormat, - UnsafeFixes, Version, + ExtensionMapping, FilePattern, FilePatternSet, PerFileIgnore, PreviewMode, PythonVersion, + SerializationFormat, UnsafeFixes, Version, }; use ruff_linter::settings::{ resolve_per_file_ignores, LinterSettings, DEFAULT_SELECTORS, DUMMY_VARIABLE_RGX, TASK_TAGS, @@ -216,6 +216,7 @@ impl Configuration { linter: LinterSettings { rules: lint.as_rule_table(lint_preview), exclude: FilePatternSet::try_from_iter(lint.exclude.unwrap_or_default())?, + extension: lint.extension.unwrap_or_default(), preview: lint_preview, target_version, project_root: project_root.to_path_buf(), @@ -523,6 +524,7 @@ impl Configuration { pub struct LintConfiguration { pub exclude: Option>, pub preview: Option, + pub extension: Option, // Rule selection pub extend_per_file_ignores: Vec, @@ -589,6 +591,9 @@ impl LintConfiguration { .chain(options.common.extend_unfixable.into_iter().flatten()) .collect(); Ok(LintConfiguration { + // `--extension` is a hidden command-line argument that isn't supported in configuration + // files at present. + extension: None, exclude: options.exclude.map(|paths| { paths .into_iter() @@ -905,6 +910,7 @@ impl LintConfiguration { Self { exclude: self.exclude.or(config.exclude), preview: self.preview.or(config.preview), + extension: self.extension.or(config.extension), rule_selections: config .rule_selections .into_iter()