From 466495ced9a77b1c89bb7d72d361c6ec2a9e5002 Mon Sep 17 00:00:00 2001 From: Bernard Lambeau Date: Thu, 27 Jun 2024 10:00:42 +0200 Subject: [PATCH] Optimize `transform.restrict`. Push whatever can be pushed down the tree. --- CHANGELOG.md | 5 ++ lib/bmg/operator/summarize.rb | 20 ++++++ spec/unit/optimizer/test_summarize.rb | 100 ++++++++++++++++++++++++++ 3 files changed, 125 insertions(+) create mode 100644 spec/unit/optimizer/test_summarize.rb diff --git a/CHANGELOG.md b/CHANGELOG.md index 31f0392..568d21d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +## 0.23.1 + +* Optimize `summarize.restrict`. Push whatever can be pushed down the + tree. + ## 0.23.0 - 2024-06-27 * Add `Bmg.json` and `Bmg.yaml` factory methods, to get relations on top of diff --git a/lib/bmg/operator/summarize.rb b/lib/bmg/operator/summarize.rb index d439ecf..50314e5 100644 --- a/lib/bmg/operator/summarize.rb +++ b/lib/bmg/operator/summarize.rb @@ -20,6 +20,26 @@ def initialize(type, operand, by, summarization) attr_reader :by, :summarization + protected # optimization + + def _restrict(type, predicate) + return super unless type.knows_attrlist? + + # bottom only uses attributes of the `by` list + # and can be pushed down the tree + summaries = type.attrlist - by + top, bottom = predicate.and_split(summaries) + if top == predicate + super + else + op = operand + op = op.restrict(bottom) + op = op.summarize(by, summarization) + op = op.restrict(top) + op + end + end + public def each diff --git a/spec/unit/optimizer/test_summarize.rb b/spec/unit/optimizer/test_summarize.rb new file mode 100644 index 0000000..207e0d5 --- /dev/null +++ b/spec/unit/optimizer/test_summarize.rb @@ -0,0 +1,100 @@ +require 'spec_helper' +module Bmg + describe "summarize optimization" do + + context "summarize.restrict" do + subject { + Relation.new([ + { a: 1, b: 2 }, + { a: 11, b: 2 } + ]).summarize(by, sums).restrict(predicate) + } + + context 'when no optimization is possible' do + let(:by) { + [:a] + } + let(:sums) { + {:b => :sum} + } + let(:predicate) { + Predicate.gt(:b, 100) + } + + it 'does not optimize' do + expect(subject).to be_a(Operator::Restrict) + expect(operand).to be_a(Operator::Summarize) + expect(subject.send(:predicate)).to eql(predicate) + end + end + + context 'when predicate is fully on by' do + let(:by) { + [:a] + } + let(:sums) { + {:b => :sum} + } + let(:predicate) { + Predicate.eq(:a, 1) + } + + it 'pushes restrict down the tree' do + expect(subject).to be_a(Operator::Summarize) + expect(operand).to be_a(Operator::Restrict) + expect(operand.send(:predicate)).to eql(predicate) + end + end + + context 'when predicate is on both' do + let(:by) { + [:a] + } + let(:sums) { + {:b => :sum} + } + let(:p1) { + Predicate.eq(:a, 1) + } + let(:p2) { + Predicate.lt(:b, 2) + } + let(:predicate) { + p1 & p2 + } + + it 'splits the predicate' do + expect(subject).to be_a(Operator::Restrict) + expect(subject.send(:predicate)).to eql(p2) + expect(operand).to be_a(Operator::Summarize) + expect(operand.send(:operand)).to be_a(Operator::Restrict) + expect(operand.send(:operand).send(:predicate)).to eql(p1) + end + end + + context "when predicate is on both but can't be split" do + let(:by) { + [:a] + } + let(:sums) { + {:b => :sum} + } + let(:p1) { + Predicate.eq(:a, 1) + } + let(:p2) { + Predicate.lt(:b, 2) + } + let(:predicate) { + p1 | p2 + } + + it 'does not optimize' do + expect(subject).to be_a(Operator::Restrict) + expect(operand).to be_a(Operator::Summarize) + expect(subject.send(:predicate)).to eql(predicate) + end + end + end + end +end