diff --git a/encoding.c b/encoding.c index 8bb393b471ed54..e9510fe3c146c5 100644 --- a/encoding.c +++ b/encoding.c @@ -1367,7 +1367,7 @@ enc_names_i(st_data_t name, st_data_t idx, st_data_t args) VALUE *arg = (VALUE *)args; if ((int)idx == (int)arg[0]) { - VALUE str = rb_interned_str_cstr((char *)name); + VALUE str = rb_enc_interned_str_cstr((char *)name, rb_usascii_encoding()); rb_ary_push(arg[1], str); } return ST_CONTINUE; @@ -1873,7 +1873,7 @@ static int rb_enc_name_list_i(st_data_t name, st_data_t idx, st_data_t arg) { VALUE ary = (VALUE)arg; - VALUE str = rb_interned_str_cstr((char *)name); + VALUE str = rb_enc_interned_str_cstr((char *)name, rb_usascii_encoding()); rb_ary_push(ary, str); return ST_CONTINUE; } @@ -1921,7 +1921,7 @@ rb_enc_aliases_enc_i(st_data_t name, st_data_t orig, st_data_t arg) str = rb_fstring_cstr(rb_enc_name(enc)); rb_ary_store(ary, idx, str); } - key = rb_interned_str_cstr((char *)name); + key = rb_enc_interned_str_cstr((char *)name, rb_usascii_encoding()); rb_hash_aset(aliases, key, str); return ST_CONTINUE; } diff --git a/ext/io/console/console.c b/ext/io/console/console.c index 7ddaf071a8833a..2b0193bb90b631 100644 --- a/ext/io/console/console.c +++ b/ext/io/console/console.c @@ -84,9 +84,9 @@ getattr(int fd, conmode *t) static ID id_getc, id_close; static ID id_gets, id_flush, id_chomp_bang; -#ifndef HAVE_RB_INTERNED_STR_CSTR +#ifndef HAVE_RB_ENC_INTERNED_STR_CSTR # define rb_str_to_interned_str(str) rb_str_freeze(str) -# define rb_interned_str_cstr(str) rb_str_freeze(rb_usascii_str_new_cstr(str)) +# define rb_enc_interned_str_cstr(str, enc) rb_str_freeze(rb_usascii_str_new_cstr(str)) #endif #if defined HAVE_RUBY_FIBER_SCHEDULER_H @@ -1897,7 +1897,7 @@ console_ttyname(VALUE io) size_t size = sizeof(termname); int e; if (ttyname_r(fd, tn, size) == 0) - return rb_interned_str_cstr(tn); + return rb_enc_interned_str_cstr(tn, rb_usascii_encoding()); if ((e = errno) == ERANGE) { VALUE s = rb_str_new(0, size); while (1) { @@ -1921,7 +1921,7 @@ console_ttyname(VALUE io) int e = errno; rb_syserr_fail_str(e, rb_sprintf("ttyname(%d)", fd)); } - return rb_interned_str_cstr(tn); + return rb_enc_interned_str_cstr(tn, rb_usascii_encoding()); } # else # error No ttyname function diff --git a/ext/io/console/extconf.rb b/ext/io/console/extconf.rb index dd3d221ae51df3..e6254c9e90fe98 100644 --- a/ext/io/console/extconf.rb +++ b/ext/io/console/extconf.rb @@ -9,7 +9,7 @@ have_func("rb_syserr_new_str(0, Qnil)") or abort -have_func("rb_interned_str_cstr") +have_func("rb_enc_interned_str_cstr") have_func("rb_io_path", "ruby/io.h") have_func("rb_io_descriptor", "ruby/io.h") have_func("rb_io_get_write_io", "ruby/io.h") diff --git a/gc/default/default.c b/gc/default/default.c index be5385f166fde1..45c0d3e2552fef 100644 --- a/gc/default/default.c +++ b/gc/default/default.c @@ -505,7 +505,6 @@ typedef struct rb_objspace { unsigned int during_compacting : 1; unsigned int during_reference_updating : 1; unsigned int gc_stressful: 1; - unsigned int has_newobj_hook: 1; unsigned int during_minor_gc : 1; unsigned int during_incremental_marking : 1; unsigned int measure_gc : 1; @@ -1520,7 +1519,6 @@ rb_gc_impl_set_event_hook(void *objspace_ptr, const rb_event_flag_t event) { rb_objspace_t *objspace = objspace_ptr; objspace->hook_events = event & RUBY_INTERNAL_EVENT_OBJSPACE_MASK; - objspace->flags.has_newobj_hook = !!(objspace->hook_events & RUBY_INTERNAL_EVENT_NEWOBJ); } unsigned long long @@ -3812,7 +3810,7 @@ gc_sweep_finish_heap(rb_objspace_t *objspace, rb_heap_t *heap) heap_allocatable_slots_expand(objspace, heap, swept_slots, heap->total_slots); } } - else { + else if (objspace->heap_pages.allocatable_slots < (min_free_slots - swept_slots)) { gc_needs_major_flags |= GPR_FLAG_MAJOR_BY_NOFREE; heap->force_major_gc_count++; } @@ -9584,7 +9582,7 @@ rb_gc_impl_init(void) VALUE opts; /* \GC build options */ rb_define_const(rb_mGC, "OPTS", opts = rb_ary_new()); -#define OPT(o) if (o) rb_ary_push(opts, rb_interned_str(#o, sizeof(#o) - 1)) +#define OPT(o) if (o) rb_ary_push(opts, rb_enc_interned_str(#o, sizeof(#o) - 1, rb_usascii_encoding())) OPT(GC_DEBUG); OPT(USE_RGENGC); OPT(RGENGC_DEBUG); diff --git a/imemo.c b/imemo.c index 8b3018523f0155..d949466a776f3d 100644 --- a/imemo.c +++ b/imemo.c @@ -94,8 +94,9 @@ rb_free_tmp_buffer(volatile VALUE *store) rb_imemo_tmpbuf_t *s = (rb_imemo_tmpbuf_t*)ATOMIC_VALUE_EXCHANGE(*store, 0); if (s) { void *ptr = ATOMIC_PTR_EXCHANGE(s->ptr, 0); + long cnt = s->cnt; s->cnt = 0; - ruby_xfree(ptr); + ruby_sized_xfree(ptr, sizeof(VALUE) * cnt); } } diff --git a/lib/prism/prism.gemspec b/lib/prism/prism.gemspec index 463387e55c814b..283c7b04aa95e6 100644 --- a/lib/prism/prism.gemspec +++ b/lib/prism/prism.gemspec @@ -104,6 +104,7 @@ Gem::Specification.new do |spec| "lib/prism/translation/parser/compiler.rb", "lib/prism/translation/parser/lexer.rb", "lib/prism/translation/ripper.rb", + "lib/prism/translation/ripper/filter.rb", "lib/prism/translation/ripper/lexer.rb", "lib/prism/translation/ripper/sexp.rb", "lib/prism/translation/ripper/shim.rb", diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb index 6552d2dbb80794..c8f9fa7731a539 100644 --- a/lib/prism/translation/ripper.rb +++ b/lib/prism/translation/ripper.rb @@ -437,6 +437,7 @@ def self.sexp_raw(src, filename = "-", lineno = 1, raise_errors: false) end end + autoload :Filter, "prism/translation/ripper/filter" autoload :Lexer, "prism/translation/ripper/lexer" autoload :SexpBuilder, "prism/translation/ripper/sexp" autoload :SexpBuilderPP, "prism/translation/ripper/sexp" diff --git a/lib/prism/translation/ripper/filter.rb b/lib/prism/translation/ripper/filter.rb new file mode 100644 index 00000000000000..19deef2d37dfaa --- /dev/null +++ b/lib/prism/translation/ripper/filter.rb @@ -0,0 +1,53 @@ +# frozen_string_literal: true + +module Prism + module Translation + class Ripper + class Filter # :nodoc: + # :stopdoc: + def initialize(src, filename = '-', lineno = 1) + @__lexer = Lexer.new(src, filename, lineno) + @__line = nil + @__col = nil + @__state = nil + end + + def filename + @__lexer.filename + end + + def lineno + @__line + end + + def column + @__col + end + + def state + @__state + end + + def parse(init = nil) + data = init + @__lexer.lex.each do |pos, event, tok, state| + @__line, @__col = *pos + @__state = state + data = if respond_to?(event, true) + then __send__(event, tok, data) + else on_default(event, tok, data) + end + end + data + end + + private + + def on_default(event, token, data) + data + end + # :startdoc: + end + end + end +end diff --git a/lib/prism/translation/ripper/lexer.rb b/lib/prism/translation/ripper/lexer.rb index 787181b5a7fc42..bd40fb4c5a2de0 100644 --- a/lib/prism/translation/ripper/lexer.rb +++ b/lib/prism/translation/ripper/lexer.rb @@ -100,21 +100,17 @@ def to_a end end - def initialize(...) - super - @lex_compat = Prism.lex_compat(@source, filepath: filename, line: lineno) + # Pretty much just the same as Prism.lex_compat. + def lex(raise_errors: false) + Ripper.lex(@source, filename, lineno, raise_errors: raise_errors) end # Returns the lex_compat result wrapped in `Elem`. Errors are omitted. # Since ripper is a streaming parser, tokens are expected to be emitted in the order # that the parser encounters them. This is not implemented. - def parse(raise_errors: false) - if @lex_compat.failure? && raise_errors - raise SyntaxError, @lex_compat.errors.first.message - else - @lex_compat.value.map do |position, event, token, state| - Elem.new(position, event, token, state.to_int) - end + def parse(...) + lex(...).map do |position, event, token, state| + Elem.new(position, event, token, state.to_int) end end diff --git a/string.c b/string.c index 6f4ea03fb37a41..d564c2e2e1bf94 100644 --- a/string.c +++ b/string.c @@ -12709,7 +12709,7 @@ VALUE rb_interned_str(const char *ptr, long len) { struct RString fake_str = {RBASIC_INIT}; - return register_fstring(setup_fake_str(&fake_str, ptr, len, ENCINDEX_US_ASCII), true, false); + return register_fstring(setup_fake_str(&fake_str, ptr, len, ENCINDEX_ASCII_8BIT), true, false); } VALUE diff --git a/test/prism/ruby/ripper_test.rb b/test/prism/ruby/ripper_test.rb index cac20a073db61b..2a0504c19f35f0 100644 --- a/test/prism/ruby/ripper_test.rb +++ b/test/prism/ruby/ripper_test.rb @@ -59,7 +59,7 @@ class RipperTest < TestCase "whitequark/slash_newline_in_heredocs.txt" ] - omitted_lexer_parse = [ + omitted_lex = [ "comments.txt", "heredoc_percent_q_newline_delimiter.txt", "heredoc_with_escaped_newline_at_start.txt", @@ -80,8 +80,20 @@ class RipperTest < TestCase define_method("#{fixture.test_name}_sexp_raw") { assert_ripper_sexp_raw(fixture.read) } end - Fixture.each_for_current_ruby(except: incorrect | omitted_lexer_parse) do |fixture| - define_method("#{fixture.test_name}_lexer_parse") { assert_ripper_lexer_parse(fixture.read) } + Fixture.each_for_current_ruby(except: incorrect | omitted_lex) do |fixture| + define_method("#{fixture.test_name}_lex") { assert_ripper_lex(fixture.read) } + end + + def test_lexer + lexer = Translation::Ripper::Lexer.new("foo") + expected = [[1, 0], :on_ident, "foo", Translation::Ripper::EXPR_CMDARG] + + assert_equal([expected], lexer.lex) + assert_equal(expected, lexer.parse[0].to_a) + assert_equal(lexer.parse[0].to_a, lexer.scan[0].to_a) + + assert_equal(%i[on_int on_op], Translation::Ripper::Lexer.new("1 +").lex.map(&:event)) + assert_raise(SyntaxError) { Translation::Ripper::Lexer.new("1 +").lex(raise_errors: true) } end def test_tokenize @@ -106,15 +118,15 @@ def assert_ripper_sexp_raw(source) assert_equal Ripper.sexp_raw(source), Prism::Translation::Ripper.sexp_raw(source) end - def assert_ripper_lexer_parse(source) - prism = Translation::Ripper::Lexer.new(source).parse - ripper = Ripper::Lexer.new(source).parse - ripper.reject! { |elem| elem.event == :on_sp } # Prism doesn't emit on_sp - ripper.sort_by!(&:pos) # Prism emits tokens by their order in the code, not in parse order + def assert_ripper_lex(source) + prism = Translation::Ripper.lex(source) + ripper = Ripper.lex(source) + ripper.reject! { |elem| elem[1] == :on_sp } # Prism doesn't emit on_sp + ripper.sort_by! { |elem| elem[0] } # Prism emits tokens by their order in the code, not in parse order [prism.size, ripper.size].max.times do |i| - expected = ripper[i].to_a - actual = prism[i].to_a + expected = ripper[i] + actual = prism[i] # Since tokens related to heredocs are not emitted in the same order, # the state also doesn't line up. if expected[1] == :on_heredoc_end && actual[1] == :on_heredoc_end