From 12c0da6aa98e0d0a0762c47103b64290c88620a1 Mon Sep 17 00:00:00 2001 From: Myron Marston Date: Tue, 19 Aug 2025 18:22:28 -0700 Subject: [PATCH] Improve YAML lexer: handle quoted keys. Previously, quoted keys would be highlighted as having a syntax error. They are particularly common when dumping a JSON schema as YAML since keys like `$schema`, `$defs` and `$ref` are common, all of which require quoting. --- lib/rouge/lexers/yaml.rb | 14 +++- spec/lexers/yaml_spec.rb | 152 +++++++++++++++++++++++++++++++++++++++ spec/visual/samples/yaml | 11 +++ 3 files changed, 176 insertions(+), 1 deletion(-) diff --git a/lib/rouge/lexers/yaml.rb b/lib/rouge/lexers/yaml.rb index 1e949a6217..f8dc96fef4 100644 --- a/lib/rouge/lexers/yaml.rb +++ b/lib/rouge/lexers/yaml.rb @@ -172,12 +172,24 @@ def set_indent(match, opts={}) end state :block_nodes do - # implicit key + # implicit unquoted key rule %r/([^#,?\[\]{}"'\n]+)(:)(?=\s|$)/ do |m| groups Name::Attribute, Punctuation::Indicator set_indent m[0], :implicit => true end + # implicit double-quoted key + rule %r/("(?:[^\n"]|\\")*")(\s*)(:)(?=\s|$)/ do |m| + groups Name::Attribute, Text, Punctuation::Indicator + set_indent m[0], :implicit => true + end + + # implicit single-quoted key + rule %r/('(?:[^\n']|\\')*')(\s*)(:)(?=\s|$)/ do |m| + groups Name::Attribute, Text, Punctuation::Indicator + set_indent m[0], :implicit => true + end + # literal and folded scalars rule %r/[\|>][+-]?/ do token Punctuation::Indicator diff --git a/spec/lexers/yaml_spec.rb b/spec/lexers/yaml_spec.rb index 10419149bd..23d351d10a 100644 --- a/spec/lexers/yaml_spec.rb +++ b/spec/lexers/yaml_spec.rb @@ -4,6 +4,158 @@ describe Rouge::Lexers::YAML do let(:subject) { Rouge::Lexers::YAML.new } + describe 'lexing' do + include Support::Lexing + + describe 'quoted keys' do + describe 'double quoted keys in block context' do + it 'highlights quoted key and colon correctly' do + assert_tokens_equal '"$schema": http://json-schema.org/draft-07/schema#', + ['Name.Attribute', '"$schema"'], + ['Punctuation.Indicator', ':'], + ['Text', ' '], + ['Literal.String', 'http://json-schema.org/draft-07/schema#'] + end + + it 'handles quoted keys with unescaped single quote characters' do + assert_tokens_equal %("key'with'single'quotes": value), + ['Name.Attribute', %("key'with'single'quotes")], + ['Punctuation.Indicator', ':'], + ['Text', ' '], + ['Literal.String', 'value'] + end + + it 'handles quoted keys with unescaped YAML syntax characters' do + assert_tokens_equal '"normally disallowed: []{}:,#": value', + ['Name.Attribute', '"normally disallowed: []{}:,#"'], + ['Punctuation.Indicator', ':'], + ['Text', ' '], + ['Literal.String', 'value'] + end + + it 'handles quoted keys with escaped double quotes' do + assert_tokens_equal '"key\"with\"escaped\"quotes: 3": value', + ['Name.Attribute', '"key\"with\"escaped\"quotes: 3"'], + ['Punctuation.Indicator', ':'], + ['Text', ' '], + ['Literal.String', 'value'] + end + + it 'handles quoted keys with whitespace before colon' do + assert_tokens_equal '"$defs" : value', + ['Name.Attribute', '"$defs"'], + ['Text', ' '], + ['Punctuation.Indicator', ':'], + ['Text', ' '], + ['Literal.String', 'value'] + end + + it 'handles an empty key' do + assert_tokens_equal '"": value', + ['Name.Attribute', '""'], + ['Punctuation.Indicator', ':'], + ['Text', ' '], + ['Literal.String', 'value'] + end + end + + describe 'single quoted keys in block context' do + it 'highlights single key and colon correctly' do + assert_tokens_equal "'$schema': http://json-schema.org/draft-07/schema#", + ['Name.Attribute', "'$schema'"], + ['Punctuation.Indicator', ':'], + ['Text', ' '], + ['Literal.String', 'http://json-schema.org/draft-07/schema#'] + end + + it 'handles quoted keys with unescaped double quote characters' do + assert_tokens_equal %('key"with"double"quotes': value), + ['Name.Attribute', %('key"with"double"quotes')], + ['Punctuation.Indicator', ':'], + ['Text', ' '], + ['Literal.String', 'value'] + end + + it 'handles quoted keys with unescaped YAML syntax characters' do + assert_tokens_equal "'normally disallowed: []{}:,#': value", + ['Name.Attribute', "'normally disallowed: []{}:,#'"], + ['Punctuation.Indicator', ':'], + ['Text', ' '], + ['Literal.String', 'value'] + end + + it 'handles quoted keys with escaped single quotes' do + assert_tokens_equal "'key\\'with\\'escaped\\'quotes: 3': value", + ['Name.Attribute', "'key\\'with\\'escaped\\'quotes: 3'"], + ['Punctuation.Indicator', ':'], + ['Text', ' '], + ['Literal.String', 'value'] + end + + it 'handles quoted keys with whitespace before colon' do + assert_tokens_equal "'$defs' : value", + ['Name.Attribute', "'$defs'"], + ['Text', ' '], + ['Punctuation.Indicator', ':'], + ['Text', ' '], + ['Literal.String', 'value'] + end + + it 'handles an empty key' do + assert_tokens_equal "'': value", + ['Name.Attribute', "''"], + ['Punctuation.Indicator', ':'], + ['Text', ' '], + ['Literal.String', 'value'] + end + end + + it 'handles combination of quoted and unquoted keys' do + yaml = <<~YAML.strip + "$schema": http://json-schema.org/draft-07/schema# + json_schema_version: 1 + "$defs": + Album: + type: object + YAML + + tokens = subject.lex(yaml).to_a + + # Check that quoted keys have correct tokens + schema_key_index = tokens.find_index { |token| token[1] == '"$schema"' } + assert schema_key_index, "Could not find '$schema' key" + assert_equal 'Name.Attribute', tokens[schema_key_index][0].qualname + assert_equal 'Punctuation.Indicator', tokens[schema_key_index + 1][0].qualname + assert_equal ':', tokens[schema_key_index + 1][1] + + defs_key_index = tokens.find_index { |token| token[1] == '"$defs"' } + assert defs_key_index, "Could not find '$defs' key" + assert_equal 'Name.Attribute', tokens[defs_key_index][0].qualname + assert_equal 'Punctuation.Indicator', tokens[defs_key_index + 1][0].qualname + assert_equal ':', tokens[defs_key_index + 1][1] + + # Check that unquoted keys still work + version_key_index = tokens.find_index { |token| token[1] == 'json_schema_version' } + assert version_key_index, "Could not find 'json_schema_version' key" + assert_equal 'Name.Attribute', tokens[version_key_index][0].qualname + end + + it 'handles nested quoted keys' do + yaml = <<~YAML.strip + "$defs": + 'Album': + "$ref": "#/definitions/Album" + YAML + + tokens = subject.lex(yaml).to_a + + # Should not contain any error tokens + error_tokens = tokens.select { |token| token[0] == 'Error' } + assert error_tokens.empty?, "Found error tokens: #{error_tokens}" + end + end + end + describe 'guessing' do include Support::Guessing diff --git a/spec/visual/samples/yaml b/spec/visual/samples/yaml index 1bbda11d18..3ecc415f4e 100644 --- a/spec/visual/samples/yaml +++ b/spec/visual/samples/yaml @@ -359,3 +359,14 @@ foo/bar: We are great foo.bar: Are we really? foo+bar: Maybe not foo:bar: ...or maybe we are? + +# YAML key edge cases (quotes, whitespace before colon, etc) +--- +"$schema1": "some-schema" +"normally disallowed chracters: []{}:": "value" +"with some trailing spaces" : true +foo : 3 +object1: + "$schema1": "some-schema" + "with some trailing spaces" : true + foo : 3