Skip to content

Commit

Permalink
Make composed_chars_normalized compatible with frozen_string_literal:…
Browse files Browse the repository at this point in the history
… true (#133)

Co-authored-by: Jane Sandberg <sandbergja@users.noreply.github.com>
  • Loading branch information
christinach and sandbergja authored Dec 31, 2024
1 parent 3207790 commit 39cd9ff
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 6 deletions.
11 changes: 5 additions & 6 deletions lib/marc_cleanup/variable_fields.rb
Original file line number Diff line number Diff line change
Expand Up @@ -469,16 +469,15 @@ def composed_chars_normalize(record)
field_index = record.fields.index(field)
curr_subfield = 0
field.subfields.each do |subfield|
fixed_subfield = ''
prevalue = subfield.value
if prevalue =~ /^.*[\u0653\u0654\u0655].*$/
prevalue = prevalue.unicode_normalize(:nfc)
end
prevalue.each_codepoint do |c|
char = c.chr(Encoding::UTF_8)
char.unicode_normalize!(:nfd) if c < 1570 || (7_680..10_792).cover?(c)
fixed_subfield << char
end
fixed_subfield = prevalue.codepoints.map do |codepoint|
char = codepoint.chr(Encoding::UTF_8)
char.unicode_normalize!(:nfd) if codepoint < 1570 || (7_680..10_792).cover?(codepoint)
char
end.join
record.fields[field_index].subfields[curr_subfield].value = fixed_subfield
curr_subfield += 1
end
Expand Down
35 changes: 35 additions & 0 deletions spec/variable_fields/composed_chars_normalize_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# frozen_string_literal: true

require 'marc_cleanup'
ARABIC_STRING = 'يُعَظِّمُونَهُ وَيُؤْمِنُونَ أَنَّهُ'

RSpec.describe 'composed_chars_normalize' do
let(:record) { MARC::Record.new_from_hash('fields' => fields, 'leader' => leader) }
let(:leader) { '01104naa a2200289 i 4500' }
let(:length_of_arabic_string) { ARABIC_STRING.length }

context 'When the title is in Arabic with NFC' do
let(:fields) do
[
{ '245' => { 'ind1' => '', 'ind2' => ' ', 'subfields' => [{ '6' => '880-03' }, { 'a' => ARABIC_STRING.unicode_normalize(:nfc) }] } }
]
end

it 'keeps it in NFC' do
expect(composed_chars_normalize(record)['245']['a']).to eq ARABIC_STRING.unicode_normalize(:nfc)
expect(ARABIC_STRING.unicode_normalize(:nfc).length).to eq length_of_arabic_string
end
end
context 'When the title is in Arabic with NFD' do
let(:fields) do
[
{ '245' => { 'ind1' => '', 'ind2' => ' ', 'subfields' => [{ '6' => '880-03' }, { 'a' => ARABIC_STRING.unicode_normalize(:nfd) }] } }
]
end

it 'normalizes it to NFC' do
expect(composed_chars_normalize(record)['245']['a']).to eq ARABIC_STRING.unicode_normalize(:nfc)
expect(ARABIC_STRING.unicode_normalize(:nfd).length).not_to eq length_of_arabic_string
end
end
end

0 comments on commit 39cd9ff

Please sign in to comment.