diff --git a/Cargo.lock b/Cargo.lock index 829b025fe2..1f5801d3e6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -906,6 +906,7 @@ dependencies = [ "qsc_doc_gen", "qsc_eval", "qsc_fir", + "qsc_formatter", "qsc_frontend", "qsc_hir", "qsc_passes", @@ -994,6 +995,16 @@ dependencies = [ "rustc-hash", ] +[[package]] +name = "qsc_formatter" +version = "0.0.0" +dependencies = [ + "expect-test", + "indoc", + "qsc_data_structures", + "qsc_frontend", +] + [[package]] name = "qsc_frontend" version = "0.0.0" @@ -1027,6 +1038,7 @@ version = "0.0.0" dependencies = [ "enum-iterator", "expect-test", + "indoc", "miette", "num-bigint", "num-traits", diff --git a/compiler/qsc/Cargo.toml b/compiler/qsc/Cargo.toml index 35d09eb087..93c85bbdb3 100644 --- a/compiler/qsc/Cargo.toml +++ b/compiler/qsc/Cargo.toml @@ -19,6 +19,7 @@ num-complex = { workspace = true } qsc_codegen = { path = "../qsc_codegen" } qsc_data_structures = { path = "../qsc_data_structures" } qsc_doc_gen = { path = "../qsc_doc_gen" } +qsc_formatter = { path = "../qsc_formatter" } qsc_eval = { path = "../qsc_eval" } qsc_frontend = { path = "../qsc_frontend" } qsc_ast = { path = "../qsc_ast" } diff --git a/compiler/qsc/src/lib.rs b/compiler/qsc/src/lib.rs index f275f522ef..f7920a772e 100644 --- a/compiler/qsc/src/lib.rs +++ b/compiler/qsc/src/lib.rs @@ -8,6 +8,8 @@ pub mod interpret; pub mod location; pub mod target; +pub use qsc_formatter::formatter; + pub use qsc_frontend::compile::{ CompileUnit, PackageStore, RuntimeCapabilityFlags, SourceContents, SourceMap, SourceName, }; diff --git a/compiler/qsc_formatter/Cargo.toml b/compiler/qsc_formatter/Cargo.toml new file mode 100644 index 0000000000..9bc0f7544f --- /dev/null +++ b/compiler/qsc_formatter/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "qsc_formatter" + +version.workspace = true +authors.workspace = true +homepage.workspace = true +repository.workspace = true +edition.workspace = true +license.workspace = true + +[dependencies] +qsc_data_structures = { path = "../qsc_data_structures" } +qsc_frontend = { path = "../qsc_frontend" } + +[dev-dependencies] +expect-test = { workspace = true } +indoc = { workspace = true } + +[lib] +doctest = false diff --git a/compiler/qsc_formatter/src/formatter.rs b/compiler/qsc_formatter/src/formatter.rs new file mode 100644 index 0000000000..df9572ed8d --- /dev/null +++ b/compiler/qsc_formatter/src/formatter.rs @@ -0,0 +1,485 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use qsc_data_structures::span::Span; +use qsc_frontend::{ + keyword::Keyword, + lex::{ + concrete::{self, ConcreteToken, ConcreteTokenKind}, + cooked::{StringToken, TokenKind}, + Delim, InterpolatedEnding, InterpolatedStart, + }, +}; + +#[cfg(test)] +mod tests; + +#[derive(Debug)] +pub struct TextEdit { + pub new_text: String, + pub span: Span, +} + +impl TextEdit { + fn new(new_text: &str, lo: u32, hi: u32) -> Self { + Self { + new_text: new_text.to_string(), + span: Span { lo, hi }, + } + } +} + +fn make_indent_string(level: usize) -> String { + " ".repeat(level) +} + +/// Applies formatting rules to the give code str and returns +/// the formatted string. +pub fn format_str(code: &str) -> String { + let mut edits = calculate_format_edits(code); + edits.sort_by_key(|edit| edit.span.hi); // sort edits by their span's hi value from lowest to highest + edits.reverse(); // sort from highest to lowest so that that as edits are applied they don't invalidate later applications of edits + let mut new_code = String::from(code); + + for edit in edits { + let range = (edit.span.lo as usize)..(edit.span.hi as usize); + new_code.replace_range(range, &edit.new_text); + } + + new_code +} + +/// Applies formatting rules to the given code str, generating edits where +/// the source code needs to be changed to comply with the format rules. +pub fn calculate_format_edits(code: &str) -> Vec { + let tokens = concrete::ConcreteTokenIterator::new(code); + let mut edits = vec![]; + + let mut indent_level: usize = 0; + + // The sliding window used is over three adjacent tokens + #[allow(unused_assignments)] + let mut one = None; + let mut two = None; + let mut three = None; + + for token in tokens { + // Advance the token window + one = two; + two = three; + three = Some(token); + + let mut edits_for_triple = match (&one, &two, &three) { + (Some(one), Some(two), Some(three)) => { + match one.kind { + ConcreteTokenKind::Syntax(TokenKind::Open(Delim::Brace)) => indent_level += 1, + ConcreteTokenKind::Syntax(TokenKind::Close(Delim::Brace)) => { + indent_level = indent_level.saturating_sub(1) + } + ConcreteTokenKind::WhiteSpace => continue, + _ => {} + } + + if matches!(one.kind, ConcreteTokenKind::WhiteSpace) { + // first token is whitespace, continue scanning + continue; + } else if matches!(two.kind, ConcreteTokenKind::WhiteSpace) { + // whitespace in the middle + apply_rules( + one, + get_token_contents(code, two), + three, + code, + indent_level, + ) + } else { + // one, two are adjacent tokens with no whitespace in the middle + apply_rules(one, "", two, code, indent_level) + } + } + (None, None, Some(three)) => { + // Remove any whitespace at the start of a file + if matches!(three.kind, ConcreteTokenKind::WhiteSpace) { + vec![TextEdit::new("", three.span.lo, three.span.hi)] + } else { + vec![] + } + } + _ => { + // not enough tokens to apply a rule + continue; + } + }; + + edits.append(&mut edits_for_triple); + } + + edits +} + +fn apply_rules( + left: &ConcreteToken, + whitespace: &str, + right: &ConcreteToken, + code: &str, + indent_level: usize, +) -> Vec { + let mut edits = vec![]; + // when we get here, neither left nor right should be whitespace + + // if the right is a close brace, the indent level should be one less + let indent_level = if let ConcreteTokenKind::Syntax(TokenKind::Close(Delim::Brace)) = right.kind + { + indent_level.saturating_sub(1) + } else { + indent_level + }; + + let new_line_in_spaces = whitespace.contains('\n'); + + use qsc_frontend::keyword::Keyword; + use qsc_frontend::lex::cooked::ClosedBinOp; + use ConcreteTokenKind::*; + use TokenKind::*; + match (&left.kind, &right.kind) { + (Comment | Syntax(DocComment), _) => { + // remove whitespace at the ends of comments + effect_trim_comment(left, &mut edits, code); + effect_correct_indentation(left, whitespace, right, &mut edits, indent_level); + } + (_, Comment) => { + if new_line_in_spaces { + effect_correct_indentation(left, whitespace, right, &mut edits, indent_level); + } + } + (Syntax(cooked_left), Syntax(cooked_right)) => match (cooked_left, cooked_right) { + (ClosedBinOp(ClosedBinOp::Minus), _) | (_, ClosedBinOp(ClosedBinOp::Minus)) => { + // This case is used to ignore the spacing around a `-`. + // This is done because we currently don't have the architecture + // to be able to differentiate between the unary `-` and the binary `-` + // which would have different spacing rules. + } + (Gt, _) | (_, Gt) | (Lt, _) | (_, Lt) => { + // This case is used to ignore the spacing around a `<` and `>`. + // This is done because we currently don't have the architecture + // to be able to differentiate between the comparison operators + // and the type-parameter delimiters which would have different + // spacing rules. + } + (Semi, _) => { + effect_correct_indentation(left, whitespace, right, &mut edits, indent_level); + } + (_, Semi) => { + effect_no_space(left, whitespace, right, &mut edits); + } + (Open(l), Close(r)) if l == r => { + // close empty delimiter blocks, i.e. (), [], {} + effect_no_space(left, whitespace, right, &mut edits); + } + (At, Ident) => { + effect_no_space(left, whitespace, right, &mut edits); + } + (Keyword(Keyword::Internal), _) => { + effect_single_space(left, whitespace, right, &mut edits); + } + (Keyword(Keyword::Adjoint), Keyword(Keyword::Controlled)) + | (Keyword(Keyword::Controlled), Keyword(Keyword::Adjoint)) => { + effect_single_space(left, whitespace, right, &mut edits); + } + (Open(Delim::Brace), _) + | (_, Close(Delim::Brace)) + | (_, Keyword(Keyword::Internal)) + | (_, Keyword(Keyword::Operation)) + | (_, Keyword(Keyword::Function)) + | (_, Keyword(Keyword::Newtype)) + | (_, Keyword(Keyword::Namespace)) + | (_, Keyword(Keyword::Open)) + | (_, Keyword(Keyword::Body)) + | (_, Keyword(Keyword::Adjoint)) + | (_, Keyword(Keyword::Controlled)) + | (_, Keyword(Keyword::Let)) + | (_, Keyword(Keyword::Mutable)) + | (_, Keyword(Keyword::Set)) + | (_, Keyword(Keyword::Use)) + | (_, Keyword(Keyword::Borrow)) + | (_, Keyword(Keyword::Fixup)) + | (_, At) => { + effect_correct_indentation(left, whitespace, right, &mut edits, indent_level); + } + (_, TokenKind::Keyword(Keyword::Until)) + | (_, TokenKind::Keyword(Keyword::In)) + | (_, TokenKind::Keyword(Keyword::As)) + | (_, TokenKind::Keyword(Keyword::Elif)) + | (_, TokenKind::Keyword(Keyword::Else)) + | (_, TokenKind::Keyword(Keyword::Apply)) => { + effect_single_space(left, whitespace, right, &mut edits); + } + (_, TokenKind::Keyword(Keyword::Auto)) + | (_, TokenKind::Keyword(Keyword::Distribute)) + | (_, TokenKind::Keyword(Keyword::Intrinsic)) + | (_, TokenKind::Keyword(Keyword::Invert)) + | (_, TokenKind::Keyword(Keyword::Slf)) => { + effect_single_space(left, whitespace, right, &mut edits); + } + (_, _) if new_line_in_spaces => { + effect_trim_whitespace(left, whitespace, right, &mut edits); + // Ignore the rest of the cases if the user has a newline in the whitespace. + // This is done because we don't currently have logic for determining when + // lines are too long and require newlines, and we don't have logic + // for determining what the correct indentation should be in these cases, + // so we put this do-nothing case in to leave user code unchanged. + } + (String(StringToken::Interpolated(_, InterpolatedEnding::LBrace)), _) + | (_, String(StringToken::Interpolated(InterpolatedStart::RBrace, _))) => { + effect_no_space(left, whitespace, right, &mut edits); + } + (Open(Delim::Bracket | Delim::Paren), _) + | (_, Close(Delim::Bracket | Delim::Paren)) => { + effect_no_space(left, whitespace, right, &mut edits); + } + (_, Open(Delim::Bracket | Delim::Paren)) => { + if is_value_token_left(cooked_left) || is_prefix(cooked_left) { + // i.e. foo() or { foo }[3] + effect_no_space(left, whitespace, right, &mut edits); + } else { + // i.e. let x = (1, 2, 3); + effect_single_space(left, whitespace, right, &mut edits); + } + } + (_, TokenKind::DotDotDot) => { + if is_value_token_left(cooked_left) { + effect_no_space(left, whitespace, right, &mut edits); + } else { + effect_single_space(left, whitespace, right, &mut edits); + } + } + (TokenKind::DotDotDot, TokenKind::Open(Delim::Brace)) => { + // Special case: `... {}` + effect_single_space(left, whitespace, right, &mut edits); + } + (_, TokenKind::Keyword(Keyword::Is)) + | (_, TokenKind::Keyword(Keyword::For)) + | (_, TokenKind::Keyword(Keyword::While)) + | (_, TokenKind::Keyword(Keyword::Repeat)) + | (_, TokenKind::Keyword(Keyword::If)) + | (_, TokenKind::Keyword(Keyword::Within)) + | (_, TokenKind::Keyword(Keyword::Return)) + | (_, TokenKind::Keyword(Keyword::Fail)) => { + effect_single_space(left, whitespace, right, &mut edits); + } + (_, _) if is_value_token_right(cooked_right) => { + if is_prefix(cooked_left) { + effect_no_space(left, whitespace, right, &mut edits); + } else { + effect_single_space(left, whitespace, right, &mut edits); + } + } + (_, _) if is_suffix(cooked_right) => { + effect_no_space(left, whitespace, right, &mut edits); + } + (_, _) if is_prefix_with_space(cooked_right) => { + if is_prefix(cooked_left) { + effect_no_space(left, whitespace, right, &mut edits); + } else { + effect_single_space(left, whitespace, right, &mut edits); + } + } + (_, _) if is_prefix_without_space(cooked_right) => { + effect_no_space(left, whitespace, right, &mut edits); + } + (_, _) if is_bin_op(cooked_right) => { + effect_single_space(left, whitespace, right, &mut edits); + } + _ => {} + }, + _ => {} + } + edits +} + +fn is_bin_op(cooked: &TokenKind) -> bool { + matches!( + cooked, + TokenKind::Bar + | TokenKind::BinOpEq(_) + | TokenKind::ClosedBinOp(_) + | TokenKind::Colon + | TokenKind::Eq + | TokenKind::EqEq + | TokenKind::FatArrow + | TokenKind::Gt + | TokenKind::Gte + | TokenKind::LArrow + | TokenKind::Lt + | TokenKind::Lte + | TokenKind::Ne + | TokenKind::Question + | TokenKind::RArrow + | TokenKind::WSlash + | TokenKind::WSlashEq + | TokenKind::Keyword(Keyword::And) + | TokenKind::Keyword(Keyword::Or) + // Technically the rest are not binary ops, but has the same spacing as one + | TokenKind::Keyword(Keyword::Not) + | TokenKind::Keyword(Keyword::AdjointUpper) + | TokenKind::Keyword(Keyword::ControlledUpper) + ) +} + +fn is_prefix_with_space(cooked: &TokenKind) -> bool { + matches!(cooked, TokenKind::TildeTildeTilde) +} + +fn is_prefix_without_space(cooked: &TokenKind) -> bool { + matches!( + cooked, + TokenKind::ColonColon | TokenKind::Dot | TokenKind::DotDot + ) +} + +fn is_prefix(cooked: &TokenKind) -> bool { + is_prefix_with_space(cooked) + || is_prefix_without_space(cooked) + || matches!(cooked, TokenKind::DotDotDot) +} + +fn is_suffix(cooked: &TokenKind) -> bool { + matches!(cooked, TokenKind::Bang | TokenKind::Comma) +} + +fn is_keyword_value(keyword: &Keyword) -> bool { + use Keyword::*; + matches!( + keyword, + True | False | Zero | One | PauliI | PauliX | PauliY | PauliZ | Underscore + // Adj and Ctl are not really values, but have the same spacing as values + | Adj | Ctl + ) +} + +/// Note that this does not include interpolated string literals +fn is_value_lit(cooked: &TokenKind) -> bool { + matches!( + cooked, + TokenKind::BigInt(_) + | TokenKind::Float + | TokenKind::Ident + | TokenKind::AposIdent + | TokenKind::Int(_) + | TokenKind::String(StringToken::Normal) + ) +} + +fn is_value_token_left(cooked: &TokenKind) -> bool { + match cooked { + _ if is_value_lit(cooked) => true, + TokenKind::String(StringToken::Interpolated(_, InterpolatedEnding::Quote)) => true, + TokenKind::Keyword(keyword) if is_keyword_value(keyword) => true, + TokenKind::Close(_) => true, // a closed delim represents a value on the left + _ => false, + } +} + +fn is_value_token_right(cooked: &TokenKind) -> bool { + match cooked { + _ if is_value_lit(cooked) => true, + TokenKind::String(StringToken::Interpolated(InterpolatedStart::DollarQuote, _)) => true, + TokenKind::Keyword(keyword) if is_keyword_value(keyword) => true, + TokenKind::Open(_) => true, // an open delim represents a value on the right + _ => false, + } +} + +fn effect_no_space( + left: &ConcreteToken, + whitespace: &str, + right: &ConcreteToken, + edits: &mut Vec, +) { + if !whitespace.is_empty() { + edits.push(TextEdit::new("", left.span.hi, right.span.lo)); + } +} + +fn effect_single_space( + left: &ConcreteToken, + whitespace: &str, + right: &ConcreteToken, + edits: &mut Vec, +) { + if whitespace != " " { + edits.push(TextEdit::new(" ", left.span.hi, right.span.lo)); + } +} + +fn effect_trim_comment(left: &ConcreteToken, edits: &mut Vec, code: &str) { + let comment_contents = get_token_contents(code, left); + let new_comment_contents = comment_contents.trim_end(); + if comment_contents != new_comment_contents { + edits.push(TextEdit::new( + new_comment_contents, + left.span.lo, + left.span.hi, + )); + } +} + +fn effect_trim_whitespace( + left: &ConcreteToken, + whitespace: &str, + right: &ConcreteToken, + edits: &mut Vec, +) { + let count_newlines = whitespace.chars().filter(|c| *c == '\n').count(); + let suffix = match whitespace.rsplit_once('\n') { + Some((_, suffix)) => suffix, + None => "", + }; + + let mut new_whitespace = if whitespace.contains("\r\n") { + "\r\n".repeat(count_newlines) + } else { + "\n".repeat(count_newlines) + }; + new_whitespace.push_str(suffix); + if whitespace != new_whitespace { + edits.push(TextEdit::new( + new_whitespace.as_str(), + left.span.hi, + right.span.lo, + )); + } +} + +fn effect_correct_indentation( + left: &ConcreteToken, + whitespace: &str, + right: &ConcreteToken, + edits: &mut Vec, + indent_level: usize, +) { + let mut count_newlines = whitespace.chars().filter(|c| *c == '\n').count(); + + // There should always be at least one newline + if count_newlines < 1 { + count_newlines = 1; + } + + let mut new_whitespace = if whitespace.contains("\r\n") { + "\r\n".repeat(count_newlines) + } else { + "\n".repeat(count_newlines) + }; + new_whitespace.push_str(&make_indent_string(indent_level)); + if whitespace != new_whitespace { + edits.push(TextEdit::new( + new_whitespace.as_str(), + left.span.hi, + right.span.lo, + )); + } +} + +fn get_token_contents<'a>(code: &'a str, token: &ConcreteToken) -> &'a str { + &code[token.span.lo as usize..token.span.hi as usize] +} diff --git a/compiler/qsc_formatter/src/formatter/tests.rs b/compiler/qsc_formatter/src/formatter/tests.rs new file mode 100644 index 0000000000..6847c80576 --- /dev/null +++ b/compiler/qsc_formatter/src/formatter/tests.rs @@ -0,0 +1,784 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use expect_test::{expect, Expect}; +use indoc::indoc; + +fn check(input: &str, expect: &Expect) { + let actual = super::format_str(input); + expect.assert_eq(&actual); +} + +// Removing trailing whitespace from lines + +#[test] +fn remove_trailing_spaces() { + let extra_spaces = " "; + let input = format!( + "/// Doc Comment with trailing spaces{extra_spaces} + operation Foo() : Unit {{ + // Comment with trailing spaces{extra_spaces} + let x = 3; // In-line comment with trailing spaces{extra_spaces} + let y = 4;{extra_spaces} + }} +" + ); + + check( + input.as_str(), + &expect![[r#" + /// Doc Comment with trailing spaces + operation Foo() : Unit { + // Comment with trailing spaces + let x = 3; // In-line comment with trailing spaces + let y = 4; + } + "#]], + ); +} + +#[test] +fn preserve_string_trailing_spaces() { + let extra_spaces = " "; + let input = format!( + "\"Hello{extra_spaces} +World\"" + ); + + assert!(super::calculate_format_edits(input.as_str()).is_empty()); +} + +// Namespace items begin on their own lines + +#[test] +fn namespace_items_begin_on_their_own_lines() { + check( + "operation Foo() : Unit {} function Bar() : Unit {}", + &expect![[r#" + operation Foo() : Unit {} + function Bar() : Unit {}"#]], + ); +} + +// Functor specializations begin on their own lines + +#[test] +fn functor_specs_begin_on_their_own_lines() { + check( + "operation Foo() : Unit { body ... {} adjoint ... {} controlled (c, ...) {} controlled adjoint (c, ...) {} }", + &expect![[r#" + operation Foo() : Unit { + body ... {} + adjoint ... {} + controlled (c, ...) {} + controlled adjoint (c, ...) {} + }"#]], + ); +} + +#[test] +fn single_space_between_adjoint_controlled_func_spec_keywords() { + check( + indoc! {" + operation Foo() : Unit { + body ... {} + adjoint ... {} + controlled (c, ...) {} + controlled adjoint (c, ...) {} + } + operation Bar() : Unit { + body ... {} + adjoint ... {} + controlled (c, ...) {} + adjoint controlled (c, ...) {} + }"}, + &expect![[r#" + operation Foo() : Unit { + body ... {} + adjoint ... {} + controlled (c, ...) {} + controlled adjoint (c, ...) {} + } + operation Bar() : Unit { + body ... {} + adjoint ... {} + controlled (c, ...) {} + adjoint controlled (c, ...) {} + }"#]], + ); +} + +// Single spaces before generator keywords + +#[test] +fn single_spaces_before_generator_keywords() { + check( + indoc! {" + operation Foo() : Unit { + body ... intrinsic + adjoint ... invert + controlled (c, ...) distribute + controlled adjoint (c, ...) auto + adjoint ... self + }"}, + &expect![[r#" + operation Foo() : Unit { + body ... intrinsic + adjoint ... invert + controlled (c, ...) distribute + controlled adjoint (c, ...) auto + adjoint ... self + }"#]], + ); +} + +// Single spaces around most binary operators + +#[test] +fn singe_space_around_arithmetic_bin_ops() { + // Note that `-` is missing at this time due to it being unsupported for formatting. + check( + indoc! {" + 1+2; + 1 * 2; + 4 /2; + 3% 2; + 2 ^ 3; + "}, + &expect![[r#" + 1 + 2; + 1 * 2; + 4 / 2; + 3 % 2; + 2 ^ 3; + "#]], + ); +} + +#[test] +fn singe_space_around_bit_wise_bin_ops() { + check( + indoc! {" + 1&&&2; + 1 ||| 2; + 4 ^^^2; + 3<<< 2; + 2 >>> 3; + "}, + &expect![[r#" + 1 &&& 2; + 1 ||| 2; + 4 ^^^ 2; + 3 <<< 2; + 2 >>> 3; + "#]], + ); +} + +#[test] +fn singe_space_around_boolean_bin_ops() { + check( + indoc! {" + true and false; + true or false; + "}, + &expect![[r#" + true and false; + true or false; + "#]], + ); +} + +#[test] +fn singe_space_around_bin_op_equals() { + check( + indoc! {" + let x += y; + let x -=y; + let x*= y; + let x /= y; + let x %= y; + "}, + &expect![[r#" + let x += y; + let x -= y; + let x *= y; + let x /= y; + let x %= y; + "#]], + ); +} + +#[test] +fn singe_space_around_equals() { + check("let x = 3;", &expect!["let x = 3;"]); +} + +#[test] +fn singe_space_around_colon() { + check("let x : Int = 3;", &expect!["let x : Int = 3;"]); +} + +#[test] +fn singe_space_around_comp_ops() { + // Note that `<` and `>` are missing at this time due to them being unsupported for formatting. + check( + indoc! {" + x <=y; + x >= y; + x == y; + x != y; + "}, + &expect![[r#" + x <= y; + x >= y; + x == y; + x != y; + "#]], + ); +} + +#[test] +fn singe_space_around_ternary() { + check("x? 3| 4", &expect!["x ? 3 | 4"]); +} + +#[test] +fn singe_space_around_copy() { + check("x w/3 <- 4", &expect!["x w/ 3 <- 4"]); +} + +#[test] +fn singe_space_around_copy_and_update() { + check("x w/=3 <- 4", &expect!["x w/= 3 <- 4"]); +} + +#[test] +fn singe_space_around_lambda_ops() { + check( + indoc! {" + let x = () -> (); + let y = ()=>(); + "}, + &expect![[r#" + let x = () -> (); + let y = () => (); + "#]], + ); +} + +#[test] +fn singe_space_around_characteristic_expr() { + check( + "operation Foo() : Unit is Adj+Ctl {}", + &expect!["operation Foo() : Unit is Adj + Ctl {}"], + ); +} + +#[test] +fn singe_space_around_functors() { + check( + "Controlled Adjoint Foo()", + &expect!["Controlled Adjoint Foo()"], + ); +} + +#[test] +fn singe_space_around_as() { + check( + "open thing as other;", + &expect!["open thing as other;"], + ); +} + +// No space between unary operators and their operand + +#[test] +fn no_space_before_unwrap() { + check("let x = foo !;", &expect!["let x = foo!;"]); +} + +#[test] +fn no_space_after_bit_negation() { + check("let x = ~~~ 3;", &expect!["let x = ~~~3;"]); +} + +#[test] +fn single_space_around_boolean_negation() { + check("let x = not 3;", &expect!["let x = not 3;"]); +} + +// No space after open parentheses and brackets and before close parentheses and brackets + +#[test] +fn no_space_for_parentheses() { + check("( 12, 13, 14 )", &expect!["(12, 13, 14)"]); +} + +#[test] +fn no_space_for_brackets() { + check("[ 12 + 13 + 14 ]", &expect!["[12 + 13 + 14]"]); +} + +// No space after open string-interpolation argument braces and before close string-interpolation argument braces + +#[test] +fn no_space_for_string_interpolation_argument_braces() { + check( + r#"let x = $"First { 1 + 1 } Third";"#, + &expect![[r#"let x = $"First {1 + 1} Third";"#]], + ); +} + +// No space before commas or semicolons + +#[test] +fn no_space_before_comma() { + check("(12 , 13 , 14)", &expect!["(12, 13, 14)"]); +} + +#[test] +fn no_space_before_semicolons() { + check("let x = 3 ;", &expect!["let x = 3;"]); +} + +// Newline after semicolons + +#[test] +fn newline_after_semicolon() { + check( + "let x = 3; let y = 2;", + &expect![[r#" + let x = 3; + let y = 2;"#]], + ); +} + +#[test] +fn preserve_eol_comment() { + let input = indoc! {"let x = 3; // End-of-line Comment + let y = 2; + "}; + assert!(super::calculate_format_edits(input).is_empty()); +} + +// Newline before declaration keywords + +#[test] +fn newline_before_let() { + check( + "let x = 3; {} let y = 2;", + &expect![[r#" + let x = 3; + {} + let y = 2;"#]], + ); +} + +#[test] +fn newline_before_mutable() { + check( + "mutable x = 3; {} mutable y = 2;", + &expect![[r#" + mutable x = 3; + {} + mutable y = 2;"#]], + ); +} + +#[test] +fn newline_before_set() { + check( + "set x = 3; {} set y = 2;", + &expect![[r#" + set x = 3; + {} + set y = 2;"#]], + ); +} + +#[test] +fn newline_before_use() { + check( + "use q = Qubit(); {} use w = Qubit();", + &expect![[r#" + use q = Qubit(); + {} + use w = Qubit();"#]], + ); +} + +#[test] +fn newline_before_borrow() { + check( + "borrow q = Qubit(); {} borrow w = Qubit();", + &expect![[r#" + borrow q = Qubit(); + {} + borrow w = Qubit();"#]], + ); +} + +// Single space before control-flow-helper keywords + +#[test] +fn single_space_before_in() { + check("for x in 0..2 {}", &expect![[r#"for x in 0..2 {}"#]]); +} + +#[test] +fn single_space_before_until() { + check( + "repeat {} until x fixup {}", + &expect![[r#" + repeat {} until x + fixup {}"#]], + ); +} + +#[test] +fn single_space_before_elif_and_else() { + check( + "if x {} elif y {} else {}", + &expect!["if x {} elif y {} else {}"], + ); +} + +#[test] +fn single_space_before_apply() { + check("within {} apply {}", &expect!["within {} apply {}"]); +} + +// No space between caller expressions and argument tuple + +#[test] +fn no_space_in_front_of_argument_tuple() { + check("Foo (1, 2, 3)", &expect!["Foo(1, 2, 3)"]); +} + +#[test] +fn no_space_in_front_of_parameter_tuple() { + check( + "operation Foo (x : Int, y : Int) : Unit {}", + &expect!["operation Foo(x : Int, y : Int) : Unit {}"], + ); +} + +// No space between array expressions and indexing brackets + +#[test] +fn no_space_in_front_of_array_indexing() { + check("arr [4]", &expect!["arr[4]"]); +} + +// No space around `.`, `..`, and `::` operators + +#[test] +fn no_space_around_dot_operator() { + check("let x = thing . other;", &expect!["let x = thing.other;"]); +} + +#[test] +fn no_space_around_range_operator() { + check("let x = 1 .. 4;", &expect!["let x = 1..4;"]); +} + +#[test] +fn no_space_around_field_operator() { + check("let x = thing :: other;", &expect!["let x = thing::other;"]); +} + +// No space between the `…` operator and any possible operands on either side + +#[test] +fn no_space_around_full_range_in_slice() { + check("let x = y[ ... ];", &expect!["let x = y[...];"]); +} + +#[test] +fn no_space_between_open_end_range_and_operand() { + check("let x = 15 ...;", &expect!["let x = 15...;"]); +} + +#[test] +fn no_space_between_open_start_range_and_operand() { + check("let x = ... 15;", &expect!["let x = ...15;"]); +} + +// Single space before open brace and newline after, except empty blocks have no space + +#[test] +fn single_space_before_open_brace_and_newline_after() { + check( + indoc! {r#" + operation Foo() : Unit{ let x = 3; } + operation Bar() : Unit + { { let x = 3; }{ let x = 4; } } + "#}, + &expect![[r#" + operation Foo() : Unit { + let x = 3; + } + operation Bar() : Unit + { + { + let x = 3; + } { + let x = 4; + } + } + "#]], + ); +} + +#[test] +fn remove_spaces_between_empty_delimiters() { + check( + indoc! {r#" + operation Foo() : Unit { + } + operation Bar() : Unit { + operation Baz() : Unit { } + let x = { + + }; + let y : Int[] = [ ]; + let z = ( + + ); + } + "#}, + &expect![[r#" + operation Foo() : Unit {} + operation Bar() : Unit { + operation Baz() : Unit {} + let x = {}; + let y : Int[] = []; + let z = (); + } + "#]], + ); +} + +// Single space before literals + +#[test] +fn single_space_before_literals() { + check( + indoc! {" + let x = 15; + let x = 0xF; + let x = 15.0; + let x = 15L; + let x = \"Fifteen\"; + let x = $\"Fifteen\"; + let x = PauliI; + let x = PauliX; + let x = PauliY; + let x = PauliZ; + let x = true; + let x = false; + let x = One; + let x = Zero; + "}, + &expect![[r#" + let x = 15; + let x = 0xF; + let x = 15.0; + let x = 15L; + let x = "Fifteen"; + let x = $"Fifteen"; + let x = PauliI; + let x = PauliX; + let x = PauliY; + let x = PauliZ; + let x = true; + let x = false; + let x = One; + let x = Zero; + "#]], + ); +} + +// Single space before types + +#[test] +fn single_space_before_types() { + check( + "let x : (Int, Double, String[], (BigInt, Unit), ('T,)) => 'T = foo;", + &expect![[r#"let x : (Int, Double, String[], (BigInt, Unit), ('T,)) => 'T = foo;"#]], + ); +} + +// Single space before variables + +#[test] +fn single_space_before_idents() { + check("let x = foo;", &expect!["let x = foo;"]); +} + +// Formatter continues after error token + +#[test] +fn formatter_continues_after_error_token() { + check( + indoc! {" + let x : ' T = foo; + let x : ` T = foo; + let x : & T = foo; + let x : || T = foo; + let x : ^^ T = foo; + "}, + &expect![[r#" + let x : ' T = foo; + let x : ` T = foo; + let x : & T = foo; + let x : || T = foo; + let x : ^^ T = foo; + "#]], + ); +} + +#[test] +fn formatter_does_not_crash_on_non_terminating_string() { + super::calculate_format_edits("let x = \"Hello World"); +} + +// Correct indentation, which increases by four spaces when a brace-delimited block is opened and decreases when block is closed + +#[test] +fn formatting_corrects_indentation() { + check( + r#" + /// First +/// Second + /// Third + namespace MyQuantumProgram { + open Microsoft.Quantum.Diagnostics; + + @EntryPoint() + operation Main() : Int { + let x = 3; + let y = 4; + + // Comment + return 5; + } + } +"#, + &expect![[r#" + /// First + /// Second + /// Third + namespace MyQuantumProgram { + open Microsoft.Quantum.Diagnostics; + + @EntryPoint() + operation Main() : Int { + let x = 3; + let y = 4; + + // Comment + return 5; + } + } + "#]], + ); +} + +#[test] +fn preserve_string_indentation() { + let input = r#""Hello + World""#; + + assert!(super::calculate_format_edits(input).is_empty()); +} + +// Will respect user new-lines and indentation added into expressions + +#[test] +fn preserve_user_newlines_in_expressions() { + let input = indoc! {r#" + let x = [ + thing1, + thing2, + thing3, + ]; + let y = 1 + 2 + 3 + 4 + 5 + + 6 + 7 + 8 + 9 + 10; + "#}; + assert!(super::calculate_format_edits(input).is_empty()); +} + +// Remove extra whitespace from start of code + +#[test] +fn remove_extra_whitespace_from_start_of_code() { + let input = indoc! {r#" + + + + + namespace Foo {}"#}; + + check(input, &expect!["namespace Foo {}"]); +} + +// Extra test cases for sanity + +#[test] +fn preserve_comments_at_start_of_file() { + let input = indoc! {r#" + // Initial Comment + namespace Foo {}"#}; + + assert!(super::calculate_format_edits(input).is_empty()); +} + +#[test] +fn sample_has_no_formatting_changes() { + let input = indoc! {r#" + /// # Sample + /// Joint Measurement + /// + /// # Description + /// Joint measurements, also known as Pauli measurements, are a generalization + /// of 2-outcome measurements to multiple qubits and other bases. + namespace Sample { + open Microsoft.Quantum.Diagnostics; + + @EntryPoint() + operation Main() : (Result, Result[]) { + // Prepare an entangled state. + use qs = Qubit[2]; // |00〉 + H(qs[0]); // 1/sqrt(2)(|00〉 + |10〉) + CNOT(qs[0], qs[1]); // 1/sqrt(2)(|00〉 + |11〉) + + // Show the quantum state before performing the joint measurement. + DumpMachine(); + + // The below code uses a joint measurement as a way to check the parity + // of the first two qubits. In this case, the parity measurement result + // will always be `Zero`. + // Notice how the state was not collapsed by the joint measurement. + let parityResult = Measure([PauliZ, PauliZ], qs[...1]); + DumpMachine(); + + // However, if we perform a measurement just on the first qubit, we can + // see how the state collapses. + let firstQubitResult = M(qs[0]); + DumpMachine(); + + // Measuring the last qubit does not change the quantum state + // since the state of the second qubit collapsed when the first qubit + // was measured because they were entangled. + let secondQubitResult = M(qs[1]); + DumpMachine(); + + ResetAll(qs); + return (parityResult, [firstQubitResult, secondQubitResult]); + } + } + "#}; + assert!(super::calculate_format_edits(input).is_empty()); +} diff --git a/compiler/qsc_formatter/src/lib.rs b/compiler/qsc_formatter/src/lib.rs new file mode 100644 index 0000000000..dcfeb86981 --- /dev/null +++ b/compiler/qsc_formatter/src/lib.rs @@ -0,0 +1,13 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +//! This module contains the Q# formatter, which can be used by calling +//! the format function available from this module. The formatting algorithm +//! uses the cooked and concrete tokens from the parser crate to create a +//! token stream of the given source code string. It then uses a sliding window +//! over this token stream to apply formatting rules when the selected tokens +//! match certain patterns. Formatting rules will generate text edit objects +//! when the format of the input string does not match the expected format, and +//! these edits are returned on using the formatter. + +pub mod formatter; diff --git a/compiler/qsc_frontend/src/lib.rs b/compiler/qsc_frontend/src/lib.rs index 221ee9ccc7..c292e1e616 100644 --- a/compiler/qsc_frontend/src/lib.rs +++ b/compiler/qsc_frontend/src/lib.rs @@ -8,3 +8,6 @@ pub mod incremental; mod lower; pub mod resolve; pub mod typeck; + +pub use qsc_parse::keyword; +pub use qsc_parse::lex; diff --git a/compiler/qsc_parse/Cargo.toml b/compiler/qsc_parse/Cargo.toml index bce6d7ea9d..f77b6215df 100644 --- a/compiler/qsc_parse/Cargo.toml +++ b/compiler/qsc_parse/Cargo.toml @@ -19,6 +19,7 @@ thiserror = { workspace = true } [dev-dependencies] expect-test = { workspace = true } +indoc = { workspace = true } [lints] workspace = true diff --git a/compiler/qsc_parse/src/keyword.rs b/compiler/qsc_parse/src/keyword.rs index 22ab75ef15..2a808e7806 100644 --- a/compiler/qsc_parse/src/keyword.rs +++ b/compiler/qsc_parse/src/keyword.rs @@ -8,7 +8,7 @@ use std::{ }; #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq, Sequence)] -pub(super) enum Keyword { +pub enum Keyword { Adj, Adjoint, AdjointUpper, diff --git a/compiler/qsc_parse/src/lex.rs b/compiler/qsc_parse/src/lex.rs index 9c6d6721ba..96bfd98781 100644 --- a/compiler/qsc_parse/src/lex.rs +++ b/compiler/qsc_parse/src/lex.rs @@ -1,8 +1,9 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT License. -mod cooked; -mod raw; +pub mod concrete; +pub mod cooked; +pub mod raw; use enum_iterator::Sequence; @@ -10,7 +11,7 @@ pub(super) use cooked::{ClosedBinOp, Error, Lexer, StringToken, Token, TokenKind /// A delimiter token. #[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] -pub(super) enum Delim { +pub enum Delim { /// `{` or `}` Brace, /// `[` or `]` @@ -20,7 +21,7 @@ pub(super) enum Delim { } #[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] -pub(super) enum Radix { +pub enum Radix { Binary, Octal, Decimal, @@ -39,13 +40,13 @@ impl From for u32 { } #[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] -pub(super) enum InterpolatedStart { +pub enum InterpolatedStart { DollarQuote, RBrace, } #[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] -pub(super) enum InterpolatedEnding { +pub enum InterpolatedEnding { Quote, LBrace, } diff --git a/compiler/qsc_parse/src/lex/concrete.rs b/compiler/qsc_parse/src/lex/concrete.rs new file mode 100644 index 0000000000..dee4f6c63c --- /dev/null +++ b/compiler/qsc_parse/src/lex/concrete.rs @@ -0,0 +1,150 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use std::iter::Peekable; + +use qsc_data_structures::span::Span; + +use super::{cooked, raw}; + +/// This struct extends cooked tokens to include whitespace and comment tokens. +/// Whitespace and comment tokens were removed during the creation of cooked tokens +/// because they are generally not useful for compilation, but they are reintroduced +/// here because they are needed for formatting. +pub struct ConcreteToken { + pub kind: ConcreteTokenKind, + pub span: Span, +} + +/// This enum extends the cooked token kind to include whitespace and comment token kinds. +#[derive(Debug, PartialEq)] +pub enum ConcreteTokenKind { + Syntax(cooked::TokenKind), + Error(cooked::Error), + WhiteSpace, + Comment, +} + +/// This is an iterator over `ConcreteTokens`, creating the tokens from a source str. +/// It works by running the cooked lexer on the source str, and iterating over +/// those cooked tokens. Whenever adjacent cooked tokens are found to have a gap +/// between their spans, the raw lexer is run on that slice of the source str to +/// generate the raw tokens (which should only produce the non-compilation whitespace +/// and comment tokens) for that slice, which are iterated over before continuing +/// with the cooked tokens. +pub struct ConcreteTokenIterator<'a> { + code: &'a str, + cooked_tokens: Peekable>, + non_compilation_tokens: Peekable>, +} + +impl<'a> ConcreteTokenIterator<'a> { + #[must_use] + pub fn new(code: &'a str) -> Self { + let mut cooked_tokens = cooked::Lexer::new(code).peekable(); + let non_compilation_tokens = match cooked_tokens.peek() { + Some(first) => { + let lo = match first { + Ok(okay) => okay.span.lo, + Err(err) => err.span().lo, + }; + if lo != 0 { + match get_tokens_from_span(code, 0, lo) { + Some(iter) => iter, + None => raw::Lexer::new("").peekable(), + } + } else { + raw::Lexer::new("").peekable() + } + } + None => raw::Lexer::new(code).peekable(), + }; + Self { + code, + cooked_tokens, + non_compilation_tokens, + } + } + + fn get_tokens_from_span(&mut self, lo: u32, hi: u32) { + if let Some(iter) = get_tokens_from_span(self.code, lo, hi) { + self.non_compilation_tokens = iter; + } + } + + fn get_next_lo(&mut self) -> u32 { + match self.non_compilation_tokens.peek() { + Some(next) => next.offset, + None => match self.cooked_tokens.peek() { + Some(next) => match next { + Ok(next) => next.span.lo, + Err(err) => err.span().lo, + }, + None => self + .code + .len() + .try_into() + .expect("expected length of code to fit into u32"), + }, + } + } +} + +fn get_tokens_from_span(code: &str, lo: u32, hi: u32) -> Option>> { + let starting_offset = lo; + let lo = lo as usize; + let hi = hi as usize; + code.get(lo..hi) + .map(|slice| raw::Lexer::new_with_starting_offset(slice, starting_offset).peekable()) +} + +impl Iterator for ConcreteTokenIterator<'_> { + type Item = ConcreteToken; + + fn next(&mut self) -> Option { + match self.non_compilation_tokens.next() { + Some(raw_token) => { + let next_lo = self.get_next_lo(); + let span = Span { + lo: raw_token.offset, + hi: next_lo, + }; + let concrete = match raw_token.kind { + raw::TokenKind::Comment(_) => ConcreteToken { + kind: ConcreteTokenKind::Comment, + span, + }, + raw::TokenKind::Whitespace => ConcreteToken { + kind: ConcreteTokenKind::WhiteSpace, + span, + }, + _ => { + return self.next(); + } + }; + Some(concrete) + } + None => match self.cooked_tokens.next()? { + Ok(token) => { + let next_lo = self.get_next_lo(); + self.get_tokens_from_span(token.span.hi, next_lo); + let syntax = ConcreteToken { + kind: ConcreteTokenKind::Syntax(token.kind), + span: token.span, + }; + Some(syntax) + } + Err(err) => { + let next_lo = self.get_next_lo(); + let span = err.span(); + self.get_tokens_from_span(span.hi, next_lo); + let error = ConcreteToken { + kind: ConcreteTokenKind::Error(err), + span, + }; + Some(error) + } + }, + } + } +} diff --git a/compiler/qsc_parse/src/lex/cooked.rs b/compiler/qsc_parse/src/lex/cooked.rs index 4cc0c9e41a..e273d483c3 100644 --- a/compiler/qsc_parse/src/lex/cooked.rs +++ b/compiler/qsc_parse/src/lex/cooked.rs @@ -35,7 +35,7 @@ pub(crate) struct Token { } #[derive(Clone, Copy, Debug, Diagnostic, Eq, Error, PartialEq)] -pub(crate) enum Error { +pub enum Error { #[error("expected {0} to complete {1}, found {2}")] #[diagnostic(code("Qsc.Lex.Incomplete"))] Incomplete(raw::TokenKind, TokenKind, raw::TokenKind, #[label] Span), @@ -66,11 +66,20 @@ impl Error { Self::Unknown(c, span) => Self::Unknown(c, span + offset), } } + + pub(crate) fn span(self) -> Span { + match self { + Error::Incomplete(_, _, _, s) + | Error::IncompleteEof(_, _, s) + | Error::UnterminatedString(s) + | Error::Unknown(_, s) => s, + } + } } /// A token kind. #[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] -pub(crate) enum TokenKind { +pub enum TokenKind { /// `'T` /// used for generic parameters -- an apostrophe followed by an ident. AposIdent, @@ -210,7 +219,7 @@ impl From for TokenKind { /// the domain of the first operand is closed under this operation. These are candidates for /// compound assignment operators, like `+=`. #[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] -pub(crate) enum ClosedBinOp { +pub enum ClosedBinOp { /// `&&&` AmpAmpAmp, /// `and` @@ -260,7 +269,7 @@ impl Display for ClosedBinOp { } #[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] -pub(crate) enum StringToken { +pub enum StringToken { Normal, Interpolated(InterpolatedStart, InterpolatedEnding), } diff --git a/compiler/qsc_parse/src/lex/raw.rs b/compiler/qsc_parse/src/lex/raw.rs index 5eda442568..ad8be008cb 100644 --- a/compiler/qsc_parse/src/lex/raw.rs +++ b/compiler/qsc_parse/src/lex/raw.rs @@ -25,15 +25,15 @@ use std::{ /// A raw token. #[derive(Clone, Debug, Eq, PartialEq)] -pub(super) struct Token { +pub struct Token { /// The token kind. - pub(super) kind: TokenKind, + pub kind: TokenKind, /// The byte offset of the token starting character. - pub(super) offset: u32, + pub offset: u32, } #[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] -pub(crate) enum TokenKind { +pub enum TokenKind { Comment(CommentKind), Ident, Number(Number), @@ -62,7 +62,7 @@ impl Display for TokenKind { /// A single-character operator token. #[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] -pub(crate) enum Single { +pub enum Single { /// `&` Amp, /// `'` @@ -143,14 +143,14 @@ impl Display for Single { } #[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] -pub(crate) enum Number { +pub enum Number { BigInt(Radix), Float, Int(Radix), } #[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] -pub(crate) enum StringToken { +pub enum StringToken { Normal { terminated: bool }, Interpolated(InterpolatedStart, Option), } @@ -162,22 +162,34 @@ enum StringKind { } #[derive(Clone, Copy, Debug, Eq, PartialEq, Sequence)] -pub(crate) enum CommentKind { +pub enum CommentKind { Normal, Doc, } #[derive(Clone)] -pub(super) struct Lexer<'a> { +pub struct Lexer<'a> { chars: Peekable>, interpolation: u8, + starting_offset: u32, } impl<'a> Lexer<'a> { - pub(super) fn new(input: &'a str) -> Self { + #[must_use] + pub fn new(input: &'a str) -> Self { Self { chars: input.char_indices().peekable(), interpolation: 0, + starting_offset: 0, + } + } + + #[must_use] + pub fn new_with_starting_offset(input: &'a str, starting_offset: u32) -> Self { + Self { + chars: input.char_indices().peekable(), + interpolation: 0, + starting_offset, } } @@ -388,9 +400,10 @@ impl Iterator for Lexer<'_> { .or_else(|| single(c).map(TokenKind::Single)) .unwrap_or(TokenKind::Unknown) }; + let offset: u32 = offset.try_into().expect("offset should fit into u32"); Some(Token { kind, - offset: offset.try_into().expect("offset should fit into u32"), + offset: offset + self.starting_offset, }) } } diff --git a/compiler/qsc_parse/src/lib.rs b/compiler/qsc_parse/src/lib.rs index 5179bc52f7..637efcb8ad 100644 --- a/compiler/qsc_parse/src/lib.rs +++ b/compiler/qsc_parse/src/lib.rs @@ -7,8 +7,8 @@ mod expr; mod item; -mod keyword; -mod lex; +pub mod keyword; +pub mod lex; mod prim; mod scan; mod stmt; diff --git a/language_service/src/completion.rs b/language_service/src/completion.rs index 13107e528d..01390d3068 100644 --- a/language_service/src/completion.rs +++ b/language_service/src/completion.rs @@ -5,7 +5,7 @@ mod tests; use crate::compilation::{Compilation, CompilationKind}; -use crate::protocol::{CompletionItem, CompletionItemKind, CompletionList}; +use crate::protocol::{CompletionItem, CompletionItemKind, CompletionList, TextEdit}; use crate::qsc_utils::{into_range, span_contains}; use qsc::ast::visit::{self, Visitor}; use qsc::display::{CodeDisplay, Lookup}; @@ -449,14 +449,14 @@ impl CompletionListBuilder { Some(alias) => alias.as_ref().cloned(), None => match insert_open_at { Some(start) => { - additional_edits.push(( - start, - format!( + additional_edits.push(TextEdit { + new_text: format!( "open {};{}", namespace.name.clone(), indent, ), - )); + range: start, + }); None } None => Some(namespace.name.clone()), diff --git a/language_service/src/completion/tests.rs b/language_service/src/completion/tests.rs index 51ffe7d6c5..34a6c24517 100644 --- a/language_service/src/completion/tests.rs +++ b/language_service/src/completion/tests.rs @@ -151,8 +151,9 @@ fn ignore_unstable_callable() { ), additional_text_edits: Some( [ - ( - Range { + TextEdit { + new_text: "open FakeStdLib;\n ", + range: Range { start: Position { line: 2, column: 12, @@ -162,8 +163,7 @@ fn ignore_unstable_callable() { column: 12, }, }, - "open FakeStdLib;\n ", - ), + }, ], ), }, @@ -203,8 +203,9 @@ fn ignore_internal_callable() { ), additional_text_edits: Some( [ - ( - Range { + TextEdit { + new_text: "open FakeStdLib;\n ", + range: Range { start: Position { line: 2, column: 12, @@ -214,8 +215,7 @@ fn ignore_internal_callable() { column: 12, }, }, - "open FakeStdLib;\n ", - ), + }, ], ), }, @@ -334,8 +334,9 @@ fn in_block_contains_std_functions() { ), additional_text_edits: Some( [ - ( - Range { + TextEdit { + new_text: "open FakeStdLib;\n ", + range: Range { start: Position { line: 1, column: 4, @@ -345,8 +346,7 @@ fn in_block_contains_std_functions() { column: 4, }, }, - "open FakeStdLib;\n ", - ), + }, ], ), }, @@ -363,8 +363,9 @@ fn in_block_contains_std_functions() { ), additional_text_edits: Some( [ - ( - Range { + TextEdit { + new_text: "open FakeStdLib;\n ", + range: Range { start: Position { line: 1, column: 4, @@ -374,8 +375,7 @@ fn in_block_contains_std_functions() { column: 4, }, }, - "open FakeStdLib;\n ", - ), + }, ], ), }, @@ -392,8 +392,9 @@ fn in_block_contains_std_functions() { ), additional_text_edits: Some( [ - ( - Range { + TextEdit { + new_text: "open FakeStdLib;\n ", + range: Range { start: Position { line: 1, column: 4, @@ -403,8 +404,7 @@ fn in_block_contains_std_functions() { column: 4, }, }, - "open FakeStdLib;\n ", - ), + }, ], ), }, @@ -566,8 +566,9 @@ fn in_block_from_other_namespace() { ), additional_text_edits: Some( [ - ( - Range { + TextEdit { + new_text: "open Other;\n ", + range: Range { start: Position { line: 1, column: 4, @@ -577,8 +578,7 @@ fn in_block_from_other_namespace() { column: 4, }, }, - "open Other;\n ", - ), + }, ], ), }, @@ -618,8 +618,9 @@ fn auto_open_multiple_files() { ), additional_text_edits: Some( [ - ( - Range { + TextEdit { + new_text: "open Foo;\n ", + range: Range { start: Position { line: 0, column: 16, @@ -629,8 +630,7 @@ fn auto_open_multiple_files() { column: 16, }, }, - "open Foo;\n ", - ), + }, ], ), }, @@ -797,8 +797,9 @@ fn stdlib_udt() { ), additional_text_edits: Some( [ - ( - Range { + TextEdit { + new_text: "open FakeStdLib;\n ", + range: Range { start: Position { line: 1, column: 4, @@ -808,8 +809,7 @@ fn stdlib_udt() { column: 4, }, }, - "open FakeStdLib;\n ", - ), + }, ], ), }, @@ -876,8 +876,9 @@ fn notebook_top_level() { ), additional_text_edits: Some( [ - ( - Range { + TextEdit { + new_text: "open FakeStdLib;\n", + range: Range { start: Position { line: 0, column: 0, @@ -887,8 +888,7 @@ fn notebook_top_level() { column: 0, }, }, - "open FakeStdLib;\n", - ), + }, ], ), }, @@ -922,8 +922,9 @@ fn notebook_top_level_global() { ), additional_text_edits: Some( [ - ( - Range { + TextEdit { + new_text: "open FakeStdLib;\n", + range: Range { start: Position { line: 0, column: 0, @@ -933,8 +934,7 @@ fn notebook_top_level_global() { column: 0, }, }, - "open FakeStdLib;\n", - ), + }, ], ), }, @@ -1001,8 +1001,9 @@ fn notebook_block() { ), additional_text_edits: Some( [ - ( - Range { + TextEdit { + new_text: "open FakeStdLib;\n", + range: Range { start: Position { line: 0, column: 0, @@ -1012,8 +1013,7 @@ fn notebook_block() { column: 0, }, }, - "open FakeStdLib;\n", - ), + }, ], ), }, @@ -1056,8 +1056,9 @@ fn notebook_auto_open_start_of_cell_empty() { ), additional_text_edits: Some( [ - ( - Range { + TextEdit { + new_text: "open FakeStdLib;\n", + range: Range { start: Position { line: 0, column: 0, @@ -1067,8 +1068,7 @@ fn notebook_auto_open_start_of_cell_empty() { column: 0, }, }, - "open FakeStdLib;\n", - ), + }, ], ), }, @@ -1100,8 +1100,9 @@ fn notebook_auto_open_start_of_cell() { ), additional_text_edits: Some( [ - ( - Range { + TextEdit { + new_text: "open FakeStdLib;\n ", + range: Range { start: Position { line: 0, column: 3, @@ -1111,8 +1112,7 @@ fn notebook_auto_open_start_of_cell() { column: 3, }, }, - "open FakeStdLib;\n ", - ), + }, ], ), }, diff --git a/language_service/src/format.rs b/language_service/src/format.rs new file mode 100644 index 0000000000..0adf0443a2 --- /dev/null +++ b/language_service/src/format.rs @@ -0,0 +1,28 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +use crate::{compilation::Compilation, protocol::TextEdit}; + +use qsc::formatter::calculate_format_edits; +use qsc::line_column::{Encoding, Range}; + +pub(crate) fn get_format_changes( + compilation: &Compilation, + source_name: &str, + encoding: Encoding, +) -> Vec { + let contents = &compilation + .user_unit() + .sources + .find_by_name(source_name) + .expect("can't find source by name") + .contents; + + calculate_format_edits(contents) + .into_iter() + .map(|edit| TextEdit { + new_text: edit.new_text, + range: Range::from_span(encoding, contents, &edit.span), + }) + .collect() +} diff --git a/language_service/src/lib.rs b/language_service/src/lib.rs index 5103aa1f6b..c8ae936589 100644 --- a/language_service/src/lib.rs +++ b/language_service/src/lib.rs @@ -5,6 +5,7 @@ pub mod code_lens; mod compilation; pub mod completion; pub mod definition; +pub mod format; pub mod hover; mod name_locator; mod project_system; @@ -24,7 +25,7 @@ use futures::channel::mpsc::{unbounded, UnboundedReceiver, UnboundedSender}; use futures_util::StreamExt; use log::{trace, warn}; use protocol::{ - CodeLens, CompletionList, DiagnosticUpdate, Hover, NotebookMetadata, SignatureHelp, + CodeLens, CompletionList, DiagnosticUpdate, Hover, NotebookMetadata, SignatureHelp, TextEdit, WorkspaceConfigurationUpdate, }; use qsc::{ @@ -218,6 +219,19 @@ impl LanguageService { ) } + /// LSP: textDocument/format + #[must_use] + pub fn get_format_changes(&self, uri: &str) -> Vec { + self.document_op( + |compilation, uri, (), position_encoding| { + format::get_format_changes(compilation, uri, position_encoding) + }, + "get_format_changes", + uri, + (), + ) + } + /// LSP: textDocument/hover #[must_use] pub fn get_hover(&self, uri: &str, position: Position) -> Option { diff --git a/language_service/src/protocol.rs b/language_service/src/protocol.rs index 0d17894171..d19ab3cbab 100644 --- a/language_service/src/protocol.rs +++ b/language_service/src/protocol.rs @@ -43,7 +43,7 @@ pub struct CompletionItem { pub kind: CompletionItemKind, pub sort_text: Option, pub detail: Option, - pub additional_text_edits: Option>, + pub additional_text_edits: Option>, } impl CompletionItem { @@ -89,6 +89,12 @@ pub struct Hover { pub span: Range, } +#[derive(Debug, PartialEq)] +pub struct TextEdit { + pub new_text: String, + pub range: Range, +} + #[derive(Debug, PartialEq)] pub struct SignatureHelp { pub signatures: Vec, diff --git a/npm/src/language-service/language-service.ts b/npm/src/language-service/language-service.ts index aeb638b151..d3fa166214 100644 --- a/npm/src/language-service/language-service.ts +++ b/npm/src/language-service/language-service.ts @@ -54,6 +54,7 @@ export interface ILanguageService { documentUri: string, position: IPosition, ): Promise; + getFormatChanges(documentUri: string): Promise; getHover( documentUri: string, position: IPosition, @@ -167,6 +168,10 @@ export class QSharpLanguageService implements ILanguageService { return this.languageService.get_completions(documentUri, position); } + async getFormatChanges(documentUri: string): Promise { + return this.languageService.get_format_changes(documentUri); + } + async getHover( documentUri: string, position: IPosition, @@ -275,6 +280,7 @@ export const languageServiceProtocol: ServiceProtocol< closeDocument: "request", closeNotebookDocument: "request", getCompletions: "request", + getFormatChanges: "request", getHover: "request", getDefinition: "request", getReferences: "request", diff --git a/vscode/package.json b/vscode/package.json index ab8caa74b8..1793877251 100644 --- a/vscode/package.json +++ b/vscode/package.json @@ -108,6 +108,11 @@ "The minimal set of capabilities required to run a quantum program. This option maps to the Base Profile as defined by the QIR specification." ], "description": "Setting the target profile allows the Q# extension to generate programs that are compatible with a specific target. The target is the hardware or simulator which will be used to run the Q# program. The target profile is a description of a target's capabilities." + }, + "Q#.enableFormatting": { + "type": "boolean", + "default": "true", + "description": "Enables the Q# formatter." } } }, diff --git a/vscode/src/config.ts b/vscode/src/config.ts index 75e031ac8f..5bfcd9f398 100644 --- a/vscode/src/config.ts +++ b/vscode/src/config.ts @@ -26,3 +26,9 @@ export async function setTarget(target: TargetProfile) { vscode.ConfigurationTarget.Global, ); } + +export function getEnableFormating(): boolean { + return vscode.workspace + .getConfiguration("Q#") + .get("enableFormatting", true); +} diff --git a/vscode/src/extension.ts b/vscode/src/extension.ts index 5b27331aef..5b02432a40 100644 --- a/vscode/src/extension.ts +++ b/vscode/src/extension.ts @@ -18,7 +18,7 @@ import { qsharpLanguageId, } from "./common.js"; import { createCompletionItemProvider } from "./completion"; -import { getTarget } from "./config"; +import { getEnableFormating, getTarget } from "./config"; import { activateDebugger } from "./debugger/activate"; import { createDefinitionProvider } from "./definition"; import { startCheckingQSharp } from "./diagnostics"; @@ -39,6 +39,7 @@ import { initCodegen } from "./qirGeneration.js"; import { createReferenceProvider } from "./references.js"; import { createRenameProvider } from "./rename.js"; import { createSignatureHelpProvider } from "./signature.js"; +import { createFormatProvider } from "./format.js"; import { activateTargetProfileStatusBarItem } from "./statusbar.js"; import { EventType, @@ -181,8 +182,24 @@ async function activateLanguageService(extensionUri: vscode.Uri) { ...registerQSharpNotebookCellUpdateHandlers(languageService), ); + // format document + const isFormattingEnabled = getEnableFormating(); + const formatterHandle = { + handle: undefined as vscode.Disposable | undefined, + }; + log.debug("Enable formatting set to: " + isFormattingEnabled); + if (isFormattingEnabled) { + formatterHandle.handle = + vscode.languages.registerDocumentFormattingEditProvider( + qsharpLanguageId, + createFormatProvider(languageService), + ); + } + // synchronize configuration - subscriptions.push(registerConfigurationChangeHandlers(languageService)); + subscriptions.push( + registerConfigurationChangeHandlers(languageService, formatterHandle), + ); // completions subscriptions.push( @@ -266,6 +283,23 @@ async function updateLanguageServiceProfile(languageService: ILanguageService) { }); } +async function updateLanguageServiceEnableFormatting( + languageService: ILanguageService, + formatterHandle: any, +) { + const isFormattingEnabled = getEnableFormating(); + log.debug("Enable formatting set to: " + isFormattingEnabled); + if (isFormattingEnabled) { + formatterHandle.handle = + vscode.languages.registerDocumentFormattingEditProvider( + qsharpLanguageId, + createFormatProvider(languageService), + ); + } else { + formatterHandle.handle?.dispose(); + } +} + async function loadLanguageService(baseUri: vscode.Uri) { const start = performance.now(); const wasmUri = vscode.Uri.joinPath(baseUri, "./wasm/qsc_wasm_bg.wasm"); @@ -288,10 +322,13 @@ async function loadLanguageService(baseUri: vscode.Uri) { function registerConfigurationChangeHandlers( languageService: ILanguageService, + formatterHandle: any, ) { return vscode.workspace.onDidChangeConfiguration((event) => { if (event.affectsConfiguration("Q#.targetProfile")) { updateLanguageServiceProfile(languageService); + } else if (event.affectsConfiguration("Q#.enableFormatting")) { + updateLanguageServiceEnableFormatting(languageService, formatterHandle); } }); } diff --git a/vscode/src/format.ts b/vscode/src/format.ts new file mode 100644 index 0000000000..c6b7935382 --- /dev/null +++ b/vscode/src/format.ts @@ -0,0 +1,56 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +import { ILanguageService } from "qsharp-lang"; +import * as vscode from "vscode"; +import { toVscodeRange } from "./common"; +import { EventType, sendTelemetryEvent } from "./telemetry"; +import { getRandomGuid } from "./utils"; + +export function createFormatProvider(languageService: ILanguageService) { + return new QSharpFormatProvider(languageService); +} + +class QSharpFormatProvider implements vscode.DocumentFormattingEditProvider { + constructor(public languageService: ILanguageService) {} + + async provideDocumentFormattingEdits(document: vscode.TextDocument) { + // telemetry start format + const associationId = getRandomGuid(); + sendTelemetryEvent(EventType.FormatStart, { associationId }, {}); + const start = performance.now(); + + const lsEdits = await this.languageService.getFormatChanges( + document.uri.toString(), + ); + + if (!lsEdits) { + // telemetry end format + sendTelemetryEvent( + EventType.FormatEnd, + { associationId }, + { + timeToCompleteMs: performance.now() - start, + numberOfEdits: 0, + }, + ); + return []; + } + + const edits = lsEdits.map( + (edit) => new vscode.TextEdit(toVscodeRange(edit.range), edit.newText), + ); + + // telemetry end format + sendTelemetryEvent( + EventType.FormatEnd, + { associationId }, + { + timeToCompleteMs: performance.now() - start, + numberOfEdits: edits.length, + }, + ); + + return edits; + } +} diff --git a/vscode/src/telemetry.ts b/vscode/src/telemetry.ts index 71720a8f47..10fc9ed87a 100644 --- a/vscode/src/telemetry.ts +++ b/vscode/src/telemetry.ts @@ -40,6 +40,8 @@ export enum EventType { TriggerHistogram = "Qsharp.TriggerHistogram", HistogramStart = "Qsharp.HistogramStart", HistogramEnd = "Qsharp.HistogramEnd", + FormatStart = "Qsharp.FormatStart", + FormatEnd = "Qsharp.FormatEnd", } type Empty = { [K in any]: never }; @@ -206,6 +208,14 @@ type EventTypes = { properties: { associationId: string }; measurements: { timeToCompleteMs: number }; }; + [EventType.FormatStart]: { + properties: { associationId: string }; + measurements: Empty; + }; + [EventType.FormatEnd]: { + properties: { associationId: string }; + measurements: { timeToCompleteMs: number; numberOfEdits: number }; + }; }; export enum QsharpDocumentType { diff --git a/wasm/src/language_service.rs b/wasm/src/language_service.rs index 232472b08a..c8f27a9b1c 100644 --- a/wasm/src/language_service.rs +++ b/wasm/src/language_service.rs @@ -162,9 +162,9 @@ impl LanguageService { additionalTextEdits: i.additional_text_edits.map(|edits| { edits .into_iter() - .map(|(span, text)| TextEdit { - range: span.into(), - newText: text, + .map(|edit| TextEdit { + range: edit.range.into(), + newText: edit.new_text, }) .collect() }), @@ -196,6 +196,20 @@ impl LanguageService { .collect() } + pub fn get_format_changes(&self, uri: &str) -> Vec { + let edits = self.0.get_format_changes(uri); + edits + .into_iter() + .map(|edit| { + TextEdit { + range: edit.range.into(), + newText: edit.new_text, + } + .into() + }) + .collect() + } + pub fn get_hover(&self, uri: &str, position: IPosition) -> Option { let position: Position = position.into(); let hover = self.0.get_hover(uri, position.into());