Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions app/models/proxy_exercise.rb
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,18 @@ def get_matching_exercise(user)
end

def find_matching_exercise(user)
exercises_user_has_accessed = user.submissions.where(cause: %w[submit assess remoteSubmit remoteAssess]).map(&:exercise).uniq.compact
tags_user_has_seen = exercises_user_has_accessed.map(&:tags).uniq.flatten
exercises_user_has_accessed =
Exercise
.joins(:submissions)
.merge(user.submissions.where(cause: %w[submit assess remoteSubmit remoteAssess]))
.includes(:tags, :files)
.distinct
tags_user_has_seen = exercises_user_has_accessed.flat_map(&:tags).uniq
Rails.logger.debug { "exercises_user_has_accessed #{exercises_user_has_accessed.map(&:id).join(',')}" }

# find exercises
potential_recommended_exercises = []
exercises.where('expected_difficulty >= 1').find_each do |ex|
exercises.where('expected_difficulty >= 1').includes(:tags, exercise_tags: :tag).find_each do |ex|
## find exercises which have only tags the user has already seen
if (ex.tags - tags_user_has_seen).empty?
potential_recommended_exercises << ex
Expand Down Expand Up @@ -119,9 +124,9 @@ def select_best_matching_exercise(user, exercises_user_has_accessed, potential_r
relative_knowledge_improvement[potex] = 0.0
Rails.logger.debug { "review potential exercise #{potex.id}" }
tags.each do |tag|
tag_ratio = potex.exercise_tags.find_by(tag:).factor.to_f / potex.exercise_tags.inject(0) do |sum, et|
sum + et.factor
end
tag_ratio = potex.exercise_tags.find {|et| et.tag == tag }.factor.to_f / potex.exercise_tags.inject(0) do |sum, et|
sum + et.factor
end
max_topic_knowledge_ratio = potex.expected_difficulty * tag_ratio
old_relative_loss_tag = topic_knowledge_user[tag] / topic_knowledge_max[tag]
new_relative_loss_tag = topic_knowledge_user[tag] / (topic_knowledge_max[tag] + max_topic_knowledge_ratio)
Expand Down
7 changes: 7 additions & 0 deletions spec/factories/tag.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# frozen_string_literal: true

FactoryBot.define do
factory :tag do
name { 'tagName' }
end
end
177 changes: 177 additions & 0 deletions spec/models/proxy_exercise_performance_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
# frozen_string_literal: true

require 'rails_helper'

# AI generated to capture performance characteristics of ProxyExercise
# rubocop:disable FactoryBot/ExcessiveCreateList
RSpec.describe ProxyExercise do
# Simple SQL capture helper using ActiveSupport notifications
def capture_sql(&)
events = []
callback = ->(_name, _start, _finish, _id, payload) { events << payload[:sql] }
ActiveSupport::Notifications.subscribed(callback, 'sql.active_record', &)
events
end

def query_count(&)
capture_sql(&).size
end

describe 'performance characteristics' do
describe '#count_files' do
it 'uses an efficient COUNT query instead of loading all exercises' do
pe = create(:proxy_exercise)
create_list(:dummy, 20).each {|ex| pe.exercises << ex }

# Warm-up (avoid counting queries from lazy loading of associations)
pe.reload

sqls = capture_sql { expect(pe.count_files).to eq(20) }

# We expect a single COUNT(*) style query (plus, in some adapters, an implicit PRAGMA/SHOW if any)
# Be tolerant: assert that at least one of the queries contains COUNT and total number is small
has_count = sqls.any? {|sql| sql =~ /COUNT\s*\(/i }
expect(has_count).to be(true), "Expected a COUNT query, got: #{sqls.inspect}"
expect(sqls.size).to be <= 3 # 1 main COUNT + possible noise
end
end

describe "#get_matching_exercise when algorithm is 'random'" do
let(:user) { create(:learner) }

it 'does not increase the number of SQL queries with a larger exercise pool' do
pe_small = create(:proxy_exercise, algorithm: 'random')
chosen_small = create(:dummy)
others_small = create_list(:dummy, 9)
pe_small.exercises << ([chosen_small] + others_small)
allow(pe_small.exercises.target).to receive(:sample).and_return(chosen_small)

pe_large = create(:proxy_exercise, algorithm: 'random')
chosen_large = create(:dummy)
others_large = create_list(:dummy, 99)
pe_large.exercises << ([chosen_large] + others_large)
allow(pe_large.exercises.target).to receive(:sample).and_return(chosen_large)

# Measure queries for small pool
small_queries = query_count { pe_small.get_matching_exercise(user) }

# Measure queries for large pool
large_queries = query_count { pe_large.get_matching_exercise(user) }

# The random algorithm should not need additional queries proportional to the pool size.
# Allow a tiny slack for adapter differences and inserts of assignment rows.
expect(large_queries - small_queries).to be <= 2,
"Expected roughly constant query count, got small=#{small_queries}, large=#{large_queries}"
end
end

describe "#get_matching_exercise when algorithm is 'best_match'" do
it 'reports current SQL query counts across pool sizes (opt-in via PERF=1)' do
ActiveRecord.verbose_query_logs = true

# Helper to build an exercise with given tags and difficulty
def build_tagged_exercise(difficulty:, tags:)
ex = create(:dummy, expected_difficulty: difficulty)
tags.each do |tag|
ExerciseTag.create!(exercise: ex, tag:, factor: 1)
end
ex
end

# Prepare a fixed tag universe to be stable across runs
tags = (1..5).map {|i| Tag.create!(name: "perf-tag-#{i}-#{SecureRandom.hex(2)}") }

# Build a small history of exercises the user has accessed, covering all tags
user = create(:learner)
seen_exercises = Array.new(5) {|i| build_tagged_exercise(difficulty: 2, tags: [tags[i]]) }
# Create real submissions for the user to reflect accessed exercises (no stubbing)
seen_exercises.each do |ex|
Submission.create!(exercise: ex, contributor: user, cause: 'submit')
end

def build_pool(size:, tags:)
pe = create(:proxy_exercise, algorithm: 'best_match')
# Distribute tags and difficulties deterministically
exercises = (1..size).map do |i|
difficulty = 1 + (i % 3) # 1..3
assigned_tags = [tags[i % tags.size], tags[(i + 1) % tags.size]].uniq
build_tagged_exercise(difficulty:, tags: assigned_tags)
end
pe.exercises << exercises
pe
end

# Measure across multiple pool sizes
sizes = [10, 30, 60]
# sizes = [1]
results = {}
sizes.each do |n|
pe = build_pool(size: n, tags:)
# Warm up: ensure associations are loaded enough to avoid counting lazy first-time loads unrelated to algorithm
pe.reload
queries = query_count { pe.get_matching_exercise(user) }
results[n] = queries
end

puts "BEST_MATCH query counts by pool size: #{results.inspect}"

# Very loose sanity checks — this spec primarily reports the current optimization level.
expect(results.values.all?(&:positive?)).to be(true)
end
end

describe "#get_matching_exercise 'best_match' with varying tag counts" do
it 'reports current SQL query counts across different tags per exercise (opt-in via PERF=1)' do
# skip('Set PERF=1 to run performance specs') unless ENV['PERF']

# Helper to build an exercise with given tags and difficulty
def build_tagged_exercise(difficulty:, tags:)
ex = create(:dummy, expected_difficulty: difficulty)
tags.each do |tag|
ExerciseTag.create!(exercise: ex, tag: tag, factor: 1)
end
ex
end

user = create(:learner)

# Build a small history of exercises the user has accessed, covering some tags
base_tags = (1..8).map {|i| Tag.create!(name: "var-tags-base-#{i}-#{SecureRandom.hex(2)}") }
seen_exercises = base_tags.first(4).map {|t| build_tagged_exercise(difficulty: 2, tags: [t]) }
# Create real submissions for the user to reflect accessed exercises (no stubbing)
seen_exercises.each do |ex|
Submission.create!(exercise: ex, contributor: user, cause: 'submit')
end

def build_pool_with_tag_count(size:, tag_universe:, tags_per_ex:)
pe = create(:proxy_exercise, algorithm: 'best_match')
exercises = (1..size).map do |i|
difficulty = 1 + (i % 3) # 1..3
assigned = (0...tags_per_ex).map {|k| tag_universe[(i + k) % tag_universe.size] }.uniq
build_tagged_exercise(difficulty: difficulty, tags: assigned)
end
pe.exercises << exercises
pe
end

# Vary the number of tags per exercise while keeping pool size constant
tags_per_exercise = [1, 2, 3, 5]
tag_universe = (1..10).map {|i| Tag.create!(name: "var-tags-univ-#{i}-#{SecureRandom.hex(2)}") }
results = {}
tags_per_exercise.each do |k|
pe = build_pool_with_tag_count(size: 30, tag_universe: tag_universe, tags_per_ex: k)
# Warm up: ensure associations are loaded enough to avoid counting lazy first-time loads unrelated to algorithm
pe.reload
queries = query_count { pe.get_matching_exercise(user) }
results[k] = queries
end

puts "BEST_MATCH query counts by tags per exercise: #{results.inspect}"

# Loose sanity check
expect(results.values.all?(&:positive?)).to be(true)
end
end
end
end
# rubocop:enable FactoryBot/ExcessiveCreateList
150 changes: 150 additions & 0 deletions spec/models/proxy_exercise_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
# frozen_string_literal: true

require 'rails_helper'

RSpec.describe ProxyExercise do
describe 'defaults and validations' do
let(:pe) { described_class.new }

it 'sets public to false by default (after_initialize)' do
expect(pe.public).to be(false)
end

it 'generates a token if none is present (after_initialize)' do
expect(pe.token).to be_present
expect(pe.token).to be_a(String)
expect(pe.token.size).to eq(8) # SecureRandom.hex(4)
end

context 'with a token' do
let(:pe) { build(:proxy_exercise, token: 'fixedtoken') }

it 'does not overwrite an existing token' do
expect(pe.token).to eq('fixedtoken')
end
end
end

describe '#count_files' do
let(:pe) { create(:proxy_exercise, exercises: create_list(:dummy, 7)) }

it 'returns the number of associated exercises' do
expect(pe.count_files).to eq(7)
end
end

describe '#duplicate' do
subject(:copy) { original.duplicate(title: 'Copy') }

let(:original) { create(:proxy_exercise, title: 'Original', token: 'tok12345') }

it 'returns a duplicated instance with overridden attributes' do
expect(copy).to be_a(described_class)
expect(copy).not_to be_persisted
expect(copy.title).to eq('Copy')
expect(copy.token).to eq('tok12345')
end
end

describe '#get_matching_exercise' do
let(:user) { create(:learner) }

context 'when an assignment already exists for the user' do
subject(:get_matching_exercise) { pe.get_matching_exercise(user) }

let(:pe) { create(:proxy_exercise) }
let(:ex) { create(:dummy) }

before { UserProxyExerciseExercise.create!(user:, exercise: ex, proxy_exercise: pe) }

it { is_expected.to eql ex }

it 'returns the previously assigned exercise without creating another one' do
expect { get_matching_exercise }.not_to change(UserProxyExerciseExercise, :count)
end
end

context "when algorithm is 'random'" do
subject(:get_matching_exercise) { pe.get_matching_exercise(user) }

let(:pe) { create(:proxy_exercise, algorithm: 'random', exercises: [chosen, other]) }
let(:chosen) { create(:dummy) }
let(:other) { create(:dummy) }

before { allow(pe.exercises.target).to receive(:sample).and_return(chosen) }

it { is_expected.to eql chosen }

it 'uses exercises.sample and persists the assignment with a reason' do
expect { get_matching_exercise }.to change(UserProxyExerciseExercise, :count).by(1)

assignment = UserProxyExerciseExercise.last
reason = JSON.parse(assignment.reason)
expect(reason['reason']).to eq('random exercise requested')
expect(assignment).to have_attributes(user: user, exercise: chosen, proxy_exercise: pe)
end
end

context "when algorithm is 'best_match'" do
subject(:get_matching_exercise) { pe.get_matching_exercise(user) }

let(:pe) { create(:proxy_exercise, algorithm: 'best_match', exercises: [easy, hard]) }
let(:easy) { create(:dummy, expected_difficulty: 1) }
let(:hard) { create(:dummy, expected_difficulty: 2) }

context 'when an error occurs' do
before do
allow(pe).to receive(:find_matching_exercise).and_raise(StandardError, 'boom')
allow(pe.exercises.target).to receive(:sample).and_return(hard)
end

it { is_expected.to eql hard }

it 'falls back to a random exercise with expected_difficulty > 1 and records the error in the reason' do
expect { get_matching_exercise }.to change(UserProxyExerciseExercise, :count).by(1)

reason = JSON.parse(UserProxyExerciseExercise.last.reason)
expect(reason['reason']).to eq('fallback because of error')
expect(reason['error']).to include('boom')
end
end

context 'when the user has not seen any tags' do
let(:easy) { create(:dummy, expected_difficulty: 1, tags: [build(:tag, name: 'arrays')]) }
let(:hard) { create(:dummy, expected_difficulty: 2, tags: [build(:tag, name: 'hashes')]) }

it { is_expected.to eql easy }

it 'selects the easiest exercise and records the reason' do
expect { get_matching_exercise }.to change(UserProxyExerciseExercise, :count).by(1)

reason = JSON.parse(UserProxyExerciseExercise.last.reason)
expect(reason['reason']).to eq('easiest exercise in pool. empty potential exercises')
end
end

context 'when user has submission for exercise' do
let(:tag) { build(:tag, name: 'loops') }
let(:hard) { create(:dummy, expected_difficulty: 2, tags: [tag]) }

before do
create(:dummy, expected_difficulty: 1, tags: [tag])
create(:dummy, expected_difficulty: 3, tags: [tag])
create(:submission, cause: 'submit', exercise: build(:dummy, expected_difficulty: 2, tags: [tag]), contributor: user)
end

it { is_expected.to eql hard }

it 'chooses the best matching exercise within the difficulty constraint and stores a detailed reason' do
expect { get_matching_exercise }.to change(UserProxyExerciseExercise, :count).by(1)

reason = JSON.parse(UserProxyExerciseExercise.last.reason)
expect(reason['reason']).to eq('best matching exercise')
expect(reason['highest_difficulty_user_has_accessed']).to eq(2)
expect(reason).to have_key('current_users_knowledge_lack')
expect(reason).to have_key('relative_knowledge_improvement')
end
end
end
end
end