Skip to content

Commit

Permalink
Fix some link anchors being recognized as hashtags (mastodon#27271)
Browse files Browse the repository at this point in the history
  • Loading branch information
ClearlyClaire authored Oct 23, 2023
1 parent 53fd28b commit bcae744
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 12 deletions.
2 changes: 1 addition & 1 deletion app/models/tag.rb
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ class Tag < ApplicationRecord
HASHTAG_LAST_SEQUENCE = '([[:word:]_]*[[:alpha:]][[:word:]_]*)'
HASHTAG_NAME_PAT = "#{HASHTAG_FIRST_SEQUENCE}|#{HASHTAG_LAST_SEQUENCE}"

HASHTAG_RE = %r{(?:^|[^/)\w])#(#{HASHTAG_NAME_PAT})}i
HASHTAG_RE = %r{(?<![=/)[:word]])#(#{HASHTAG_NAME_PAT})}i
HASHTAG_NAME_RE = /\A(#{HASHTAG_NAME_PAT})\z/i
HASHTAG_INVALID_CHARS_RE = /[^[:alnum:]#{HASHTAG_SEPARATORS}]/

Expand Down
26 changes: 15 additions & 11 deletions spec/models/tag_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -32,52 +32,56 @@
expect(subject.match('https://en.wikipedia.org/wiki/Ghostbusters_(song)#Lawsuit')).to be_nil
end

it 'does not match URLs with hashtag-like anchors after an empty query parameter' do
expect(subject.match('https://en.wikipedia.org/wiki/Ghostbusters_(song)?foo=#Lawsuit')).to be_nil
end

it 'matches #aesthetic' do
expect(subject.match('this is #aesthetic').to_s).to eq ' #aesthetic'
expect(subject.match('this is #aesthetic').to_s).to eq '#aesthetic'
end

it 'matches digits at the start' do
expect(subject.match('hello #3d').to_s).to eq ' #3d'
expect(subject.match('hello #3d').to_s).to eq '#3d'
end

it 'matches digits in the middle' do
expect(subject.match('hello #l33ts35k').to_s).to eq ' #l33ts35k'
expect(subject.match('hello #l33ts35k').to_s).to eq '#l33ts35k'
end

it 'matches digits at the end' do
expect(subject.match('hello #world2016').to_s).to eq ' #world2016'
expect(subject.match('hello #world2016').to_s).to eq '#world2016'
end

it 'matches underscores at the beginning' do
expect(subject.match('hello #_test').to_s).to eq ' #_test'
expect(subject.match('hello #_test').to_s).to eq '#_test'
end

it 'matches underscores at the end' do
expect(subject.match('hello #test_').to_s).to eq ' #test_'
expect(subject.match('hello #test_').to_s).to eq '#test_'
end

it 'matches underscores in the middle' do
expect(subject.match('hello #one_two_three').to_s).to eq ' #one_two_three'
expect(subject.match('hello #one_two_three').to_s).to eq '#one_two_three'
end

it 'matches middle dots' do
expect(subject.match('hello #one·two·three').to_s).to eq ' #one·two·three'
expect(subject.match('hello #one·two·three').to_s).to eq '#one·two·three'
end

it 'matches ・unicode in ぼっち・ざ・ろっく correctly' do
expect(subject.match('testing #ぼっち・ざ・ろっく').to_s).to eq ' #ぼっち・ざ・ろっく'
expect(subject.match('testing #ぼっち・ざ・ろっく').to_s).to eq '#ぼっち・ざ・ろっく'
end

it 'matches ZWNJ' do
expect(subject.match('just add #نرم‌افزار and').to_s).to eq ' #نرم‌افزار'
expect(subject.match('just add #نرم‌افزار and').to_s).to eq '#نرم‌افزار'
end

it 'does not match middle dots at the start' do
expect(subject.match('hello #·one·two·three')).to be_nil
end

it 'does not match middle dots at the end' do
expect(subject.match('hello #one·two·three·').to_s).to eq ' #one·two·three'
expect(subject.match('hello #one·two·three·').to_s).to eq '#one·two·three'
end

it 'does not match purely-numeric hashtags' do
Expand Down

0 comments on commit bcae744

Please sign in to comment.