Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Group ideographic characters in tag cloud #613

Merged
merged 2 commits into from
Mar 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 54 additions & 1 deletion bookmarks/tests/test_tag_cloud_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def assertTagGroups(self, rendered_template: str, groups: List[List[str]]):
group_element = group_elements[group_index]
link_elements = group_element.select("a")

self.assertEqual(len(link_elements), len(tags))
self.assertEqual(len(link_elements), len(tags), tags)

for tag_index, tag in enumerate(tags, start=0):
link_element = link_elements[tag_index]
Expand All @@ -50,6 +50,59 @@ def assertNumSelectedTags(self, rendered_template: str, count: int):
link_elements = soup.select("p.selected-tags a")
self.assertEqual(len(link_elements), count)

def test_cjk_using_single_group(self):
"""
Ideographic characters will be using the same group
While other japanese and korean characters will have separate groups.
"""
tags = [
self.setup_tag(name="Aardvark"),
self.setup_tag(name="Armadillo"),
self.setup_tag(name="あひる"),
self.setup_tag(name="あきらか"),
self.setup_tag(name="アヒル"),
self.setup_tag(name="アキラカ"),
self.setup_tag(name="ひる"),
self.setup_tag(name="アヒル"),
self.setup_tag(name="오리"),
self.setup_tag(name="물"),
self.setup_tag(name="家鴨"),
self.setup_tag(name="感じ"),
]
self.setup_bookmark(tags=tags)
rendered_template = self.render_template()

self.assertTagGroups(
rendered_template,
[
[
"Aardvark",
"Armadillo",
],
[
"あきらか",
"あひる",
],
[
"ひる",
],
[
"アキラカ",
"アヒル",
],
[
"물",
],
[
"오리",
],
[
"家鴨",
"感じ",
],
],
)

def test_group_alphabetically(self):
tags = [
self.setup_tag(name="Cockatoo"),
Expand Down
29 changes: 20 additions & 9 deletions bookmarks/views/partials/contexts.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import urllib.parse
from typing import Set, List
import re

from django.core.handlers.wsgi import WSGIRequest
from django.core.paginator import Paginator
Expand All @@ -11,13 +12,13 @@
from bookmarks.models import (
Bookmark,
BookmarkSearch,
BookmarkSearchForm,
User,
UserProfile,
Tag,
)

DEFAULT_PAGE_SIZE = 30
CJK_RE = re.compile(r"[\u4e00-\u9fff]+")


class BookmarkItem:
Expand Down Expand Up @@ -123,13 +124,13 @@ def generate_action_url(
)

def get_base_url(self):
raise Exception(f"Must be implemented by subclass")
raise Exception("Must be implemented by subclass")

def get_base_action_url(self):
raise Exception(f"Must be implemented by subclass")
raise Exception("Must be implemented by subclass")

def get_bookmark_query_set(self):
raise Exception(f"Must be implemented by subclass")
raise Exception("Must be implemented by subclass")


class ActiveBookmarkListContext(BookmarkListContext):
Expand Down Expand Up @@ -178,23 +179,33 @@ def __init__(self, char: str):
self.tags = []
self.char = char

def __repr__(self):
return f"<{self.char} TagGroup>"

@staticmethod
def create_tag_groups(tags: Set[Tag]):
# Ensure groups, as well as tags within groups, are ordered alphabetically
sorted_tags = sorted(tags, key=lambda x: str.lower(x.name))
group = None
groups = []
cjk_used = False
cjk_group = TagGroup("Ideographic")

# Group tags that start with a different character than the previous one
for tag in sorted_tags:
tag_char = tag.name[0].lower()

if not group or group.char != tag_char:
if CJK_RE.match(tag_char):
cjk_used = True
cjk_group.tags.append(tag)
elif not group or group.char != tag_char:
group = TagGroup(tag_char)
groups.append(group)
group.tags.append(tag)
else:
group.tags.append(tag)

group.tags.append(tag)

if cjk_used:
groups.append(cjk_group)
return groups


Expand Down Expand Up @@ -224,7 +235,7 @@ def __init__(self, request: WSGIRequest) -> None:
self.has_selected_tags = has_selected_tags

def get_tag_query_set(self):
raise Exception(f"Must be implemented by subclass")
raise Exception("Must be implemented by subclass")

def get_selected_tags(self, tags: List[Tag]):
parsed_query = queries.parse_query_string(self.search.q)
Expand Down