Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

test extra space methods #132

Merged
merged 4 commits into from
Nov 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 0 additions & 47 deletions lib/marc_cleanup/record_level.rb
Original file line number Diff line number Diff line change
Expand Up @@ -187,53 +187,6 @@ def field_count(record, opts = {})
results
end

def extra_space_gsub(string)
string.gsub!(/([[:blank:]]){2,}/, '\1')
string.gsub!(/^(.*)[[:blank:]]+$/, '\1')
string.gsub(/^[[:blank:]]+(.*)$/, '\1')
end

### Remove extra spaces from all fields that are not positionally defined
def extra_space_fix(record)
record.fields.each do |field|
next unless field.class == MARC::DataField && field.tag != '010'

field_index = record.fields.index(field)
curr_subfield = -1
case field.tag
when /^[1-469]..|0[2-9].|01[1-9]|7[0-5].|5[0-24-9].|53[0-24-9]/
field.subfields.each do |subfield|
curr_subfield += 1
next if subfield.value.nil?

record.fields[field_index].subfields[curr_subfield].value = extra_space_gsub(subfield.value)
end
when '533'
field.subfields.each do |subfield|
curr_subfield += 1
next if subfield.code == '7' || subfield.value.nil?

record.fields[field_index].subfields[curr_subfield].value = extra_space_gsub(subfield.value)
end
when /^7[6-8]./
field.subfields.each do |subfield|
curr_subfield += 1
next if subfield.code =~ /[^a-v3-8]/ || subfield.value.nil?

record.fields[field_index].subfields[curr_subfield].value = extra_space_gsub(subfield.value)
end
when /^8../
field.subfields.each do |subfield|
curr_subfield += 1
next if %w[w 7].include?(subfield.code) || subfield.value.nil?

record.fields[field_index].subfields[curr_subfield].value = extra_space_gsub(subfield.value)
end
end
end
record
end

### Scrub invalid UTF-8 byte sequences within field values,
# replacing with nothing; indicators, subfield codes, and tags must be
# handled separately
Expand Down
52 changes: 47 additions & 5 deletions lib/marc_cleanup/variable_fields.rb
Original file line number Diff line number Diff line change
Expand Up @@ -192,25 +192,26 @@ def empty_subfields?(record)
def extra_spaces?(record)
blank_regex = /^.*[[:blank:]]{2,}.*$|^.*[[:blank:]]+$|^[[:blank:]]+(.*)$/
record.fields.each do |field|
next unless field.class == MARC::DataField && field.tag != '010'
next unless field.instance_of?(MARC::DataField) && field.tag != '010'

if field.tag =~ /[1-469]..|0[2-9].|01[1-9]|7[0-5].|5[0-24-9].|53[0-24-9]/
case field.tag
when /[1-469]..|0[2-9].|01[1-9]|7[0-5].|5[0-24-9].|53[0-24-9]/
field.subfields.each do |subfield|
return true if subfield.value =~ blank_regex
end
elsif field.tag == '533'
when '533'
field.subfields.each do |subfield|
next if subfield.code == '7'

return true if subfield.value =~ blank_regex
end
elsif field.tag =~ /7[6-8]./
when /7[6-8]./
field.subfields.each do |subfield|
next unless subfield.code =~ /[a-v3-8]/

return true if subfield.value =~ blank_regex
end
elsif field.tag =~ /8../
when /8../
field.subfields.each do |subfield|
next unless subfield.code =~ /[^w7]/

Expand All @@ -221,6 +222,47 @@ def extra_spaces?(record)
false
end

def extra_space_gsub(string)
string.gsub!(/([[:blank:]]){2,}/, '\1')
string.gsub!(/^(.*)[[:blank:]]+$/, '\1')
string.gsub(/^[[:blank:]]+(.*)$/, '\1')
end

### Remove extra spaces from all fields that are not positionally defined
def extra_space_fix(record)
record.fields.each do |field|
next unless field.instance_of?(MARC::DataField) && field.tag != '010'

case field.tag
when /^[1-469]..|0[2-9].|01[1-9]|7[0-5].|5[0-24-9].|53[0-24-9]/
field.subfields.each do |subfield|
next if subfield.value.nil?

subfield.value = extra_space_gsub(subfield.value.dup)
end
when '533'
field.subfields.each do |subfield|
next if subfield.code == '7' || subfield.value.nil?

subfield.value = extra_space_gsub(subfield.value.dup)
end
when /^7[6-8]./
field.subfields.each do |subfield|
next if subfield.code =~ /[^a-v3-8]/ || subfield.value.nil?

subfield.value = extra_space_gsub(subfield.value.dup)
end
when /^8../
field.subfields.each do |subfield|
next if %w[w 7].include?(subfield.code) || subfield.value.nil?

subfield.value = extra_space_gsub(subfield.value.dup)
end
end
end
record
end

def multiple_no_040?(record)
record.fields('040').size != 1
end
Expand Down
117 changes: 117 additions & 0 deletions spec/variable_fields/extra_spaces_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
# frozen_string_literal: true

require 'marc_cleanup'

RSpec.describe 'extra_spaces?' do
let(:record) { MARC::Record.new_from_hash('fields' => fields, 'leader' => leader) }
let(:leader) { '01104naa a2200289 i 4500' }

context '1xx field has extra spaces' do
let(:fields) do
[
{ '100' => { 'ind1' => '0',
'ind2' => ' ',
'subfields' => [{ 'a' => 'Candy wrapper.' }] } }
]
end
it 'triggers an error' do
expect(MarcCleanup.extra_spaces?(record)).to eq true
end
end

context '533 field has extra spaces in a positionally-defined subfield' do
let(:fields) do
[
{ '533' => { 'ind1' => ' ',
'ind2' => ' ',
'subfields' => [
{ 'a' => "Positive\u3000photo" },
{ '7' => 's1989 nyu a'}] } }
]
end
it 'does not triggers an error' do
expect(MarcCleanup.extra_spaces?(record)).to eq false
end
end

context '775 field has extra spaces in main entry' do
let(:fields) do
[
{ '775' => { 'ind1' => '0',
'ind2' => ' ',
'subfields' => [ { 'a' => 'Modernist thought' }] } }]
end
it 'triggers an error' do
expect(MarcCleanup.extra_spaces?(record)).to eq true
end
end

context '830 field has no extra spaces in main entry' do
let(:fields) do
[
{ '830' => { 'ind1' => ' ',
'ind2' => '4',
'subfields' => [ { 'a' => 'The modern world.' }] } }]
end
it 'does not trigger an error' do
expect(MarcCleanup.extra_spaces?(record)).to eq false
end
end
end

RSpec.describe 'extra_space_fix' do
let(:record) { MARC::Record.new_from_hash('fields' => fields, 'leader' => leader) }
let(:leader) { '01104naa a2200289 i 4500' }

context '1xx field has extra spaces' do
let(:fields) do
[
{ '100' => { 'ind1' => '0',
'ind2' => ' ',
'subfields' => [{ 'a' => 'Candy wrapper.' }] } }
]
end
it 'removes the extra spaces' do
expect(MarcCleanup.extra_space_fix(record)['100']['a']).to eq 'Candy wrapper.'
end
end

context '533 field has extra spaces in a positionally-defined subfield' do
let(:fields) do
[
{ '533' => { 'ind1' => ' ',
'ind2' => ' ',
'subfields' => [
{ 'a' => "Positive\u3000photo" },
{ '7' => 's1989 nyu a'}] } }
]
end
it 'does not modify the positionally-defined subfield' do
expect(MarcCleanup.extra_space_fix(record)['533']['7']).to eq 's1989 nyu a'
end
end

context '775 field has extra spaces in main entry' do
let(:fields) do
[
{ '775' => { 'ind1' => '0',
'ind2' => ' ',
'subfields' => [ { 'a' => 'Modernist thought ' }] } }]
end
it 'removes the extra spaces' do
expect(MarcCleanup.extra_space_fix(record)['775']['a']).to eq 'Modernist thought'
end
end

context '830 field has no extra spaces in main entry' do
let(:fields) do
[
{ '830' => { 'ind1' => ' ',
'ind2' => '4',
'subfields' => [ { 'a' => 'The modern world.' }] } }]
end
it 'does not modify the record' do
expect(MarcCleanup.extra_space_fix(record)['830']['a']).to eq 'The modern world.'
end
end
end
34 changes: 26 additions & 8 deletions spec/variable_fields/field_structure_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -46,16 +46,34 @@
RSpec.describe 'empty_subfields?' do
let(:record) { MARC::Record.new_from_hash('fields' => fields, 'leader' => leader) }
let(:leader) { '01104naa a2200289 i 4500' }
let(:fields) do
[
{ '245' => { 'ind1' => ' ',
'ind2' => ' ',
'subfields' => [{ 'a' => ' ' }, { 'b' => 'a tale' }] } }
]

context 'record has an empty subfield' do
let(:fields) do
[
{ '245' => { 'ind1' => ' ',
'ind2' => ' ',
'subfields' => [{ 'a' => ' ' }, { 'b' => 'a tale' }] } }
]
end

it 'finds an empty subfield' do
expect(MarcCleanup.empty_subfields?(record)).to eq true
end
end

it 'finds an empty subfield' do
expect(MarcCleanup.empty_subfields?(record)).to eq true
context 'record has no empty subfield' do
let(:fields) do
[
{ '245' => { 'ind1' => ' ',
'ind2' => ' ',
'subfields' => [{ 'a' => 'Headphones :' },
{ 'b' => 'a tale' }] } }
]
end

it 'does not trigger an error' do
expect(MarcCleanup.empty_subfields?(record)).to eq false
end
end
end

Expand Down