From b7843525c06f320a67f8a8d7c6445ec8f2a11a22 Mon Sep 17 00:00:00 2001 From: Allison Reid Date: Mon, 26 Jul 2021 00:05:45 -0700 Subject: [PATCH] Mark `Ox::sax_parse` as `Ractor`-safe and add a `Ractor`-based `Ox::Sax` example. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For https://github.com/ohler55/ox/issues/277 I've been experimenting with `Ractor`s since upgrading to Ruby 3.0 for https://github.com/ohler55/ox/issues/275 but quickly ran into `Ractor::UnsafeError` when trying to call `Ox::sax_parse` in anything except the main `Ractor`. Per Ruby's `Ractor` C extension documentation (link below): "By default, all C extensions are recognized as `Ractor`-unsafe. If C extension becomes `Ractor`-safe, the extension should call `rb_ext_ractor_safe(true)` at the `Init_` function and all defined method marked as `Ractor`-safe. `Ractor`-unsafe C-methods only been called from main-ractor. If non-main ractor calls it, then `Ractor::UnsafeError` is raised." I don't like to open seemingly-large feature requests like this without making some attempt at it myself first, and luckily it seems like `Ox::sax_parse` Just Works™ since I marked it `Ractor`-safe, even with the `class_cache`. Confirming this safety, making any remaining changes to `Ox::Sax`, and expanding this to the non-`Sax` parts of `Ox` are all unfortunately out of my depth as a n00b C coder, so I would appreciate if you could take this over if it interests you. I am happy with just `Sax` support since I have no current need for marshalling, but I imagine other `Ox` users wouldn't be satisfied if stratified. In this commit: - Enable `rb_ext_ractor_safe` preprocessor macro via `have_func` in `extconf.rb`. - Mark `Init_Ox` and `ox_sax_parse` as `Ractor` -safe. - Add a new `Ractor`-based `Ox::Sax` example exercising both parallel and serial `Ox::Sax` handler `Ractor`s to parse data from `shared-mime-info` XML files many users likely already have on their systems. Official `Ractor` info: - "Ractor: a proposal for a new concurrent abstraction without thread-safety issues": https://bugs.ruby-lang.org/issues/17100 (https://github.com/ruby/ruby/pull/3365) - Ruby's official `Ractor` documentation: https://docs.ruby-lang.org/en/master/doc/ractor_md.html - "A way to mark C extensions as thread-safe, Ractor-safe, or unsafe": https://bugs.ruby-lang.org/issues/17307 (https://github.com/ruby/ruby/pull/3824) - Ruby's C Extension `Ractor` documention covering `rb_ext_ractor_safe`: https://docs.ruby-lang.org/en/master/doc/extension_rdoc.html#label-Appendix+F.+Ractor+support - A `Ractor` C Extension from the creator of `Ractor` that might serve as a useful example: https://github.com/ko1/ractor-tvar Blogs: - "Ractors: Multi-Core Parallel Processing Comes to Ruby 3": https://www.ruby3.dev/ruby-3-fundamentals/2021/01/27/ractors-multi-core-parallel-processing-in-ruby-3/ - "Ruby Ractor Experiments: Safe async communication" :https://ivoanjo.me/blog/2021/02/14/ractor-experiments-safe-async/ - "Playing with Ruby Ractors": https://billy-ruffian.co.uk/playing-with-ruby-ractors/ - "How Fast are Ractors?": https://www.fastruby.io/blog/ruby/performance/how-fast-are-ractors.html (https://github.com/noahgibbs/ractor_basic_benchmarks/tree/main/benchmarks) Before this change: ``` [okeeblow@emi#CHECKING-YOU-OUT] time ./bin/checking-you-out ~/224031-dot-jpg Received /home/okeeblow/.local/share/mime/packages/user-extension-rsrc.xml /home/okeeblow/Works/DistorteD/CHECKING-YOU-OUT/lib/checking-you-out/ghost_revival/mr_mime.rb:375:in `sax_parse': ractor unsafe method called from not main ractor (Ractor::UnsafeError) from /home/okeeblow/Works/DistorteD/CHECKING-YOU-OUT/lib/checking-you-out/ghost_revival/mr_mime.rb:375:in `block in open' from /home/okeeblow/Works/DistorteD/CHECKING-YOU-OUT/lib/checking-you-out/ghost_revival/mr_mime.rb:331:in `open' from /home/okeeblow/Works/DistorteD/CHECKING-YOU-OUT/lib/checking-you-out/ghost_revival/mr_mime.rb:331:in `open' from /home/okeeblow/Works/DistorteD/CHECKING-YOU-OUT/lib/checking-you-out/ghost_revival.rb:24:in `block (2 levels) in remember_me' from /home/okeeblow/Works/DistorteD/CHECKING-YOU-OUT/lib/checking-you-out/ghost_revival.rb:19:in `loop' from /home/okeeblow/Works/DistorteD/CHECKING-YOU-OUT/lib/checking-you-out/ghost_revival.rb:19:in `block in remember_me' :583:in `send': The incoming-port is already closed (Ractor::ClosedError) from /home/okeeblow/Works/DistorteD/CHECKING-YOU-OUT/lib/checking-you-out/ghost_revival.rb:86:in `block in extended' from /home/okeeblow/Works/DistorteD/CHECKING-YOU-OUT/lib/checking-you-out/ghost_revival.rb:63:in `each' from /home/okeeblow/Works/DistorteD/CHECKING-YOU-OUT/lib/checking-you-out/ghost_revival.rb:63:in `each_with_object' from /home/okeeblow/Works/DistorteD/CHECKING-YOU-OUT/lib/checking-you-out/ghost_revival.rb:63:in `extended' from /home/okeeblow/Works/DistorteD/CHECKING-YOU-OUT/lib/checking-you-out/inner_spirit.rb:216:in `extend' from /home/okeeblow/Works/DistorteD/CHECKING-YOU-OUT/lib/checking-you-out/inner_spirit.rb:216:in `' from /home/okeeblow/Works/DistorteD/CHECKING-YOU-OUT/lib/checking-you-out.rb:10:in `require_relative' from /home/okeeblow/Works/DistorteD/CHECKING-YOU-OUT/lib/checking-you-out.rb:10:in `' from ./bin/checking-you-out:3:in `require_relative' from ./bin/checking-you-out:3:in `
' ./bin/checking-you-out ~/224031-dot-jpg 0.12s user 0.05s system 100% cpu 0.168 total ``` My new `Ox::Sax` Ractor example script's usage: ``` [okeeblow@emi#ox] ./examples/sax_ractor.rb Please provide the path to a `shared-mime-info` XML package and some media-type query arguments (e.g. 'image/jpeg') ``` ``` [okeeblow@emi#ox] ./examples/sax_ractor.rb /usr/share/mime/packages/freedesktop.org.xml Please provide some media-type query arguments (e.g. 'image/jpeg') ``` Finding all-extant types: ``` [okeeblow@emi#ox] ./examples/sax_ractor.rb /usr/share/mime/packages/freedesktop.org.xml image/jpeg font/ttf application/xhtml+xml image/x-pict Parallel Ractors ["Worker 0 gave us JPEG 影像 (image/jpeg) [.jpeg,.jpg,.jpe]", "Worker 1 gave us TrueType 字型 (font/ttf) [.ttf]", "Worker 2 gave us XHTML 網頁 (application/xhtml+xml) [.xhtml,.xht]", "Worker 3 gave us Macintosh Quickdraw/PICT 繪圖 (image/x-pict) [.pct,.pict,.pict1,.pict2]"] Serial Ractor "ONLY ONE OX gave us [#, #, #, #]" ``` …and not finding invalid ones: ``` [okeeblow@emi#ox] ./examples/sax_ractor.rb /usr/share/mime/packages/freedesktop.org.xml lol/rofl fart/butt image/jpeg Parallel Ractors ["Worker 0 gave us nothing", "Worker 1 gave us nothing", "Worker 2 gave us JPEG 影像 (image/jpeg) [.jpeg,.jpg,.jpe]"] Serial Ractor "ONLY ONE OX gave us [nil, nil, #]" ``` Unit tests pass. --- examples/sax_ractor.rb | 194 +++++++++++++++++++++++++++++++++++++++++ ext/ox/extconf.rb | 1 + ext/ox/ox.c | 3 + ext/ox/sax.c | 3 + 4 files changed, 201 insertions(+) create mode 100755 examples/sax_ractor.rb diff --git a/examples/sax_ractor.rb b/examples/sax_ractor.rb new file mode 100755 index 00000000..8f5a1ceb --- /dev/null +++ b/examples/sax_ractor.rb @@ -0,0 +1,194 @@ +#!/usr/bin/env ruby + +require 'ox' +require 'pathname' + +# Silence Ractor warning in Ruby 3.0.x +Warning[:experimental] = false +abort("This Ractor example requires at least Ruby 3.0") if RUBY_VERSION.start_with?("2") + +# Example/test script for `Ractor`-based `Ox::Sax` parsing. +# In the Real World™ we probably wouldn't create a single-use `Ractor` for +# every argument, but this is primarily a test of `rb_ext_ractor_safe` for Ox. + + +# Miniature example Ractor-based `shared-mime-info` Ox handler à la `CHECKING::YOU::OUT`: +# https://github.com/okeeblow/DistorteD/tree/NEW-SENSATION/CHECKING-YOU-OUT +class Saxtor < Ox::Sax + + # We will fill this `Struct` as we parse, + # yield it if its `ietf` matches our `needle`, + # and throw it away otherwise. + CYO = Struct.new(:ietf, :globs, :description) do + def initialize(ietf = nil, globs = Array.new, description = nil) + super(ietf, globs, description) + end + def to_s # Pretty print + "#{self[:description]} (#{self[:ietf]}) [#{ + self[:globs]&.map(&File.method(:extname)).join(?,) + }]" + end + def inspect; "#"; end + end + + # Set up our parsing environment and open a file handle for our XML. + def initialize(parent, haystack) + @parse_stack = Array::new # Track our current Element as we parse. + @parent = parent # `Ractor` that instantiated us. + @haystack = File.open(haystack, File::Constants::RDONLY) + @haystack.advise(:sequential) + end + + # Stratch `Struct`. + def cyo + @cyo ||= CYO.new + end + + # Wax on… + def start_element(name) + @parse_stack.push(name) + case @parse_stack.last + when :"mime-type" then @cyo = nil # Clear out leftovers between types. + end + end + + # …wax off. + def end_element(name) + case @parse_stack.last + when :"mime-type" then + # Save the scratch `Struct` if we matched our needle while building it. + @out = cyo.dup if @i_can_haz + @i_can_haz = false + end + @parse_stack.pop + end + + # Element attribute callback — Ox::Sax::Value version + def attr_value(name, value) + case [@parse_stack.last, name] + in :"mime-type", :type then + cyo[:ietf] = value.as_s + # If we found our needle then we will yield the scratch `CYO` instead of `nil`. + @i_can_haz = true if value.as_s == @needle + in :glob, :pattern then + cyo[:globs].append(value.as_s) + else nil + end + end + + # Element text content callback, e.g. for TEXT + # This part. --------^ + def text(element_text) + case @parse_stack.last + when :comment then + # This will end up being the `last` locale (probably `zh_TW`) + # because I don't want to implement locale checking for a test script lol + cyo[:description] = element_text + end + end + + # Start our search for a given `needle` in our open `haystack`. + def awen(needle, **kwargs) + @needle = needle # What IETF Media-Type should we find? (e.g. `'image/jpeg'`) + @i_can_haz = false # Did we find our `needle`? (obviously not yet) + @haystack.rewind # Pon de Replay + + # Do the thing. + Ox.sax_parse( + self, # Instance of a class that responds to `Ox::Sax`'s callback messages. + @haystack, # IO stream or String of XML to parse. Won't close File handles automatically. + **{ + convert_special: true, # [boolean] Convert encoded entities back to their unencoded form, e.g. `"<"` to `"<"`. + skip: :skip_off, # [:skip_none|:skip_return|:skip_white|:skip_off] (from Element text/value) Strip CRs, whitespace, or nothing. + smart: false, # [boolean] Toggle Ox's built-in hints for HTML parsing: https://github.com/ohler55/ox/blob/master/ext/ox/sax_hint.c + strip_namespace: true, # [nil|String|true|false] (from Element names) Strip no namespaces, all namespaces, or a specific namespace. + symbolize: true, # [boolean] Fill callback method `name` arguments with Symbols instead of with Strings. + intern_string_values: true, # [boolean] Intern (freeze and deduplicate) String return values. + }.update(kwargs), + ) + + # Let our parent `#take` our needle-equivalent `CYO`, or `nil`. + Ractor.yield(@out) + end # def awen + +end # class Saxtor + +# Fancy "usage" help `String` fragment to concat with specific error messages. +usage = <<-PLZ + +Usage: `sax_ractor.rb [SHARED-MIME-INFO_XML_PATH] [IETF_MEDIA_TYPES]…` + +Common file paths: + +- FreeBSD: + `${LOCALBASE}/share/mime/packages/freedesktop.org.xml` (probably `/usr/local`) + https://www.freshports.org/misc/shared-mime-info/ + +- Linux: + `/usr/share/mime/packages/freedesktop.org.xml` + +- macOS: + `/opt/homebrew/share/mime/packages/freedesktop.org.xml` (Homebrew) + `/opt/local/share/mime/packages/freedesktop.org.xml` (MacPorts) + https://formulae.brew.sh/formula/shared-mime-info +PLZ + +# Bail out if we were given a nonexistant file. +abort("Please provide the path to a `shared-mime-info` XML package \ +and some media-type query arguments (e.g. 'image/jpeg')".concat(usage)) unless ARGV.size > 0 +haystack = Pathname.new(ARGV.first) +abort("#{haystack} does not exist") unless haystack.exist? and haystack.file? + +# *Judicator Aldaris voice* "YOU HAVE NOT ENOUGH ARGUMENTS." +abort("Please provide some media-type query arguments (e.g. 'image/jpeg')".concat(usage)) unless ARGV.size > 1 + +# We can use `Ractor::make_shareable()` for larger traversable data structures, +# but freezing should be enough to share a `Pathname`. +# Resolve symlinks etc with `#realpath` before we freeze. +haystack = haystack.realpath.freeze +needles = ARGV[1...] + + +# Hamburger Style. +puts "Parallel Ractors" +# Create one `Ractor` for every given media-type argument +moo = ['Heifer', 'Cow', 'Bull', 'Steer'].tally +head_count = needles.size - 1 +herd = (0..head_count).map { + # Give our worker `Ractor` a name, otherwise its `#name` will return `nil`. + individual = moo.keys.sample + moo[individual] += 1 + Ractor.new(haystack, name: "#{individual} #{moo[individual] - 1}") { |haystack| + # Initialize an `Ox::Sax` handler for our given source file. + handler = Saxtor::new(Ractor.current, haystack) + + # Now we can `#send` a needle to this `Ractor` and make it search the haystack! + while ietf_string = Ractor.receive + Ractor.yield(handler.awen(ietf_string)) + end + } +} + +# Send our arguments to our herd in a 1:1 mapping +(0..head_count).each { herd[_1].send(needles[_1]) } + +# Wait for every `Ractor` to have a result, and then pretty print all of them :) +pp (0..head_count).map { + [herd[_1], herd[_1].take] +}.map { + "#{_1.name} gave us #{_2 || 'nothing'}" +} + + +# Hotdog Style. +puts +puts "Serial Ractor" +# Create a single `Ractor` and send every media-type to it in series. +only_one_ox = Ractor.new(haystack, name: "ONLY ONE OX") { |haystack| + handler = Saxtor::new(Ractor.current, haystack) + while ietf_string = Ractor.receive + handler.awen(ietf_string) + end +} +(0..head_count).each { only_one_ox.send(needles[_1]) } +pp "#{only_one_ox.name} gave us #{(0..head_count).map { only_one_ox.take }}" diff --git a/ext/ox/extconf.rb b/ext/ox/extconf.rb index 6272c10f..42284808 100644 --- a/ext/ox/extconf.rb +++ b/ext/ox/extconf.rb @@ -38,6 +38,7 @@ have_func('rb_struct_alloc_noinit') have_func('rb_obj_encoding') have_func('rb_ivar_foreach') +have_func('rb_ext_ractor_safe', 'ruby.h') have_header('ruby/st.h') have_header('sys/uio.h') diff --git a/ext/ox/ox.c b/ext/ox/ox.c index 2d4961fb..fa5372ae 100644 --- a/ext/ox/ox.c +++ b/ext/ox/ox.c @@ -1395,6 +1395,9 @@ cache8_test(VALUE self) { #endif void Init_ox() { +#if HAVE_RB_EXT_RACTOR_SAFE + rb_ext_ractor_safe(true); +#endif Ox = rb_define_module("Ox"); rb_define_module_function(Ox, "default_options", get_def_opts, 0); diff --git a/ext/ox/sax.c b/ext/ox/sax.c index 5e94cfcc..8f73ed94 100644 --- a/ext/ox/sax.c +++ b/ext/ox/sax.c @@ -123,6 +123,9 @@ str2sym(SaxDrive dr, const char *str, const char **strp) { void ox_sax_parse(VALUE handler, VALUE io, SaxOptions options) { +#if HAVE_RB_EXT_RACTOR_SAFE + rb_ext_ractor_safe(true); +#endif struct _saxDrive dr; int line = 0;