martinlackner · martinlackner · Jul 4, 2023 · Jul 4, 2023 · Jul 4, 2023 · Jul 4, 2023
diff --git a/abcvoting/fileio.py b/abcvoting/fileio.py
@@ -9,6 +9,7 @@
 import os
 from math import ceil
 import ruamel.yaml
+import preflibtools.instances as preflib
 from abcvoting.preferences import Profile, Voter
 from abcvoting import misc
 
@@ -62,57 +63,7 @@ def get_file_names(dir_name, filename_extensions=None):
     return sorted(files)
 
 
-def _approval_set_from_preflib_datastructures(num_appr, ranking, candidate_map):
-    # if num_appr = 1 and the ranking starts with empty set, interpret as empty ballot and
-    # return set()
-    if (
-        num_appr == 1
-        and ranking[0].strip()[0] == "{"
-        and ranking[0].strip()[-1] == "}"
-        and ranking[0].strip().replace("}", "").replace("{", "").strip() == ""
-    ):
-        return set()
-
-    approval_set = set()
-    tied = False
-    for rank in ranking:
-        rank = rank.strip()
-        if rank.startswith("{"):
-            if not tied:
-                tied = True
-                rank = rank[1:]
-            else:
-                raise MalformattedFileException(
-                    "Invalid format for tied candidates: " + str(ranking)
-                )
-        if rank.endswith("}"):
-            if tied:
-                tied = False
-                rank = rank[:-1]
-            else:
-                raise MalformattedFileException(
-                    "Invalid format for tied candidates: " + str(ranking)
-                )
-        rank = rank.strip()
-        if len(rank) > 0:
-            try:
-                cand = int(rank)
-            except ValueError as error:
-                raise MalformattedFileException(
-                    f"Expected candidate number but encountered {rank}"
-                ) from error
-            approval_set.add(cand)
-        if len(approval_set) >= num_appr and not tied:
-            break
-    if tied:
-        raise MalformattedFileException("Invalid format for tied candidates: " + str(ranking))
-    if len(approval_set) < num_appr:
-        # all candidates approved
-        approval_set = set(candidate_map.keys())
-    return approval_set
-
-
-def read_preflib_file(filename, setsize=1, relative_setsize=None, use_weights=False):
+def read_preflib_file(filename, num_cats=None, setsize=None, use_weights=False):
     """
     Read a Preflib file (soi, toi, soc or toc).
 
@@ -121,20 +72,18 @@ def read_preflib_file(filename, setsize=1, relative_setsize=None, use_weights=Fa
         filename : str
             Name of the Preflib file.
 
+        num_cats : int, default=1
+            The approval set is composed of the union of the first `num_cats` catefories of the instance.
+
+            It cannot be used if parameter `setsize` is used too.
+
         setsize : int
             Minimum number of candidates that voters approve.
 
             These candidates are taken from the top of ranking.
             In case of ties, more than setsize candidates are approved.
 
-            Paramer `setsize` is ignored if `relative_setsize` is used.
-
-        relative_setsize : float
-            Proportion (number between 0 and 1) of candidates that voters approve (rounded up).
-
-            In case of ties, more candidates are approved.
-            E.g., if there are 10 candidates and `relative_setsize=0.75`,
-            then the voter approves the top 8 candidates.
+            It cannot be used if parameter `num_cats` is used too.
 
         use_weights : bool, default=False
             Use weights of voters instead of individual voters.
@@ -148,81 +97,68 @@ def read_preflib_file(filename, setsize=1, relative_setsize=None, use_weights=Fa
         abcvoting.preferences.Profile
             Preference profile extracted from Preflib file.
     """
-    if setsize <= 0:
+    if num_cats is None and setsize is None:
+        num_cats = 1
+    if num_cats and setsize:
+        raise ValueError("Parameters num_cats and setsize cannot be used simultaneously.")
+    if num_cats and num_cats <= 0:
+        raise ValueError("Parameter num_cats must be > 0")
+    if setsize and setsize <= 0:
         raise ValueError("Parameter setsize must be > 0")
-    if relative_setsize and (relative_setsize <= 0.0 or relative_setsize > 1.0):
-        raise ValueError("Parameter relative_setsize not in interval (0, 1]")
-    with open(filename) as f:
-        line = f.readline()
-        num_cand = int(line.strip())
-        candidate_map = {}
-        for _ in range(num_cand):
-            parts = f.readline().strip().split(",")
-            candidate_map[int(parts[0].strip())] = ",".join(parts[1:]).strip()
-
-        parts = f.readline().split(",")
-        try:
-            voter_count, _, unique_orders = (int(p.strip()) for p in parts)
-        except ValueError as error:
-            raise MalformattedFileException(
-                f"Number of voters ill specified ({str(parts)}), should be triple of integers"
-            ) from error
 
-        approval_sets = []
-        lines = [line.strip() for line in f.readlines() if line.strip()]
-        if len(lines) != unique_orders:
-            raise MalformattedFileException(
-                f"Expected {unique_orders} lines that specify voters in the input, "
-                f"encountered {len(lines)}"
-            )
+    try:
+        preflib_inst = preflib.get_parsed_instance(filename)
+    except Exception as e:
+        raise MalformattedFileException(
+            "The preflib parser returned the following error: " + str(e)
+        )
 
-    for line in lines:
-        parts = line.split(",")
-        if len(parts) < 1:
-            continue
-        try:
-            count = int(parts[0])
-        except ValueError as error:
-            raise MalformattedFileException(
-                f"Each ranking must start with count/weight ({line})."
-            ) from error
-        ranking = parts[1:]  # ranking starts after count
-        if len(ranking) == 0:
-            raise MalformattedFileException("Empty ranking: " + str(line))
-        if relative_setsize:
-            num_appr = int(ceil(len(ranking) * relative_setsize))
-        else:
-            num_appr = setsize
-        approval_set = _approval_set_from_preflib_datastructures(num_appr, ranking, candidate_map)
-        approval_sets.append((count, approval_set))
+    if isinstance(preflib_inst, preflib.OrdinalInstance):
+        if setsize:
+            preflib_inst = preflib.CategoricalInstance.from_ordinal(
+                preflib_inst, size_truncators=[setsize]
+            )
+        elif num_cats:
+            preflib_inst = preflib.CategoricalInstance.from_ordinal(
+                preflib_inst, num_indif_classes=[1] * preflib_inst.num_alternatives
+            )
+    elif not isinstance(preflib_inst, preflib.CategoricalInstance):
+        raise ValueError("Only ordinal and categorical preferences can be converted from PrefLib")
 
     # normalize candidates to 0, 1, 2, ...
     cand_names = []
     normalize_map = {}
-    for cand, name in candidate_map.items():
+    for cand, name in preflib_inst.alternatives_name.items():
         cand_names.append(name)
         normalize_map[cand] = len(cand_names) - 1
 
-    profile = Profile(num_cand, cand_names=cand_names)
+    profile = Profile(preflib_inst.num_alternatives, cand_names=cand_names)
+
+    for preferences, count in preflib_inst.multiplicity.items():
+        approval_set = []
+        if setsize:
+            category = 0
+            while len(approval_set) < setsize and category < len(preferences):
+                approval_set.extend([normalize_map[cand] for cand in preferences[category]])
+                category += 1
+            if 0 < len(approval_set) < setsize:
+                approval_set = normalize_map.values()
+        elif num_cats:
+            approval_set = [
+                normalize_map[cand]
+                for category in range(min(len(preferences), num_cats))
+                for cand in preferences[category]
+            ]
 
-    for count, approval_set in approval_sets:
-        normalized_approval_set = []
-        for cand in approval_set:
-            normalized_approval_set.append(normalize_map[cand])
         if use_weights:
-            profile.add_voter(Voter(normalized_approval_set, weight=count))
+            profile.add_voter(Voter(approval_set, weight=count))
         else:
-            profile.add_voters([normalized_approval_set] * count)
-    if use_weights:
-        if len(profile) != unique_orders:
-            raise MalformattedFileException("Number of voters wrongly specified in preflib file.")
-    else:
-        if len(profile) != voter_count:
-            raise MalformattedFileException("Number of voters wrongly specified in preflib file.")
+            profile.add_voters([approval_set] * count)
+
     return profile
 
 
-def read_preflib_files_from_dir(dir_name, setsize=1, relative_setsize=None):
+def read_preflib_files_from_dir(dir_name, num_cats=None, setsize=None):
     """
     Read all Preflib files (soi, toi, soc or toc) in a given directory.
 
@@ -231,39 +167,35 @@ def read_preflib_files_from_dir(dir_name, setsize=1, relative_setsize=None):
         dir_name : str
             Path of the directory to be searched for Preflib files.
 
+        num_cats : int, default=1
+            The approval set is composed of the union of the first `num_cats` catefories of the instance.
+
+            It cannot be used if parameter `setsize` is used too.
+
         setsize : int
             Minimum number of candidates that voters approve.
 
             These candidates are taken from the top of ranking.
             In case of ties, more than setsize candidates are approved.
 
-            Paramer `setsize` is ignored if `relative_setsize` is used.
-
-        relative_setsize : float
-            Proportion (number between 0 and 1) of candidates that voters approve (rounded up).
-
-            In case of ties, more candidates are approved.
-            E.g., if there are 10 candidates and `relative_setsize=0.75`,
-            then the voter approves the top 8 candidates.
+            It cannot be used if parameter `num_cats` is used too.
 
     Returns
     -------
         dict
             Dictionary with file names as keys and profiles (class abcvoting.preferences.Profile)
             as values.
     """
-    files = get_file_names(dir_name, filename_extensions=[".soi", ".toi", ".soc", ".toc"])
+    files = get_file_names(dir_name, filename_extensions=[".soi", ".toi", ".soc", ".toc", ".cat"])
 
     profiles = {}
     for f in files:
-        profile = read_preflib_file(
-            os.path.join(dir_name, f), setsize=setsize, relative_setsize=relative_setsize
-        )
+        profile = read_preflib_file(os.path.join(dir_name, f), num_cats=num_cats, setsize=setsize)
         profiles[f] = profile
     return profiles
 
 
-def write_profile_to_preflib_toi_file(filename, profile):
+def write_profile_to_preflib_cat_file(filename, profile):
     """
     Write a profile to a Preflib .toi file.
 
@@ -279,21 +211,30 @@ def write_profile_to_preflib_toi_file(filename, profile):
     -------
         None
     """
-    with open(filename, "w") as f:
-        # write: number of candidates
-        f.write(str(profile.num_cand) + "\n")
-        # write: names of candidates
-        for cand in profile.candidates:
-            f.write(f"{cand + 1}, {profile.cand_names[cand]}\n")
-        # write: info about number of voters and total weight
-        total_weight = sum(voter.weight for voter in profile)
-        f.write(f"{total_weight}, {total_weight}, {len(profile)}\n")
-        # write: approval sets and weights
-        for voter in profile:
-            str_approval_set = misc.str_set_of_candidates(
-                voter.approved, cand_names=list(range(1, profile.num_cand + 1))
-            )
-            f.write(f"{voter.weight}, {str_approval_set}\n")
+    preflib_inst = preflib.CategoricalInstance()
+    preflib_inst.num_categories = 2
+    preflib_inst.categories_name = {"1": "Approved", "2": "Not approved"}
+    preflib_inst.file_name = filename
+    preflib_inst.num_alternatives = profile.num_cand
+    for cand in profile.candidates:
+        preflib_inst.alternatives_name[cand + 1] = profile.cand_names[cand]
+
+    for voter in profile:
+        pref = (
+            tuple(cand + 1 for cand in voter.approved),
+            tuple(cand + 1 for cand in profile.candidates if cand not in voter.approved),
+        )
+        if int(voter.weight) == voter.weight:
+            multiplicity = voter.weight
+        else:
+            multiplicity = 1
+        if pref not in preflib_inst.preferences:
+            preflib_inst.preferences.append(pref)
+            preflib_inst.multiplicity[pref] = multiplicity
+        else:
+            preflib_inst.multiplicity[pref] += multiplicity
+    preflib_inst.recompute_cardinality_param()
+    preflib_inst.write(filename)
 
 
 def _yaml_flow_style_list(x):

diff --git a/examples/handling_preflib_files.py b/examples/handling_preflib_files.py
@@ -15,11 +15,11 @@
 # Write a profile to toi file
 profile = Profile(5, "ABCDE")
 profile.add_voters([{0, 1}, {1, 3, 4}, {2}, {3}, {3}])
-fileio.write_profile_to_preflib_toi_file(currdir + "/toi-files/new_example.toi", profile)
+fileio.write_profile_to_preflib_cat_file(currdir + "/toi-files/new_example.cat", profile)
 
 
-# Read a directory of Preflib files (using parameter `relative_setsize`)
-profiles = fileio.read_preflib_files_from_dir(currdir + "/toi-files/", relative_setsize=0.7)
+# Read a directory of Preflib files (using parameter `num_cats`)
+profiles = fileio.read_preflib_files_from_dir(currdir + "/toi-files/", num_cats=3)
 # Compute PAV for each profile
 committeesize = 2
 for profile in profiles.values():

diff --git a/examples/toi-files/example.toi b/examples/toi-files/example.toi
@@ -1,12 +1,22 @@
-6
-1,a
-2,b
-3,c
-4,d
-5,five
-6,e
-5,5,4
-1,1,2,3
-2,2,{1,4}
-1,{1,2},4
-1,4,5,6
+# FILE NAME: example.toi
+# TITLE: example.toi
+# DESCRIPTION:
+# DATA TYPE: toi
+# MODIFICATION TYPE: original
+# RELATES TO:
+# RELATED FILES:
+# PUBLICATION DATE:
+# MODIFICATION DATE:
+# NUMBER ALTERNATIVES: 6
+# NUMBER VOTERS: 5
+# NUMBER UNIQUE ORDERS: 4
+# ALTERNATIVE NAME 1: a
+# ALTERNATIVE NAME 2: b
+# ALTERNATIVE NAME 3: c
+# ALTERNATIVE NAME 4: d
+# ALTERNATIVE NAME 5: five
+# ALTERNATIVE NAME 6: e
+1:1,2,3
+2:2,{1,4}
+1:{1,2},4
+1:4,5,6,1
diff --git a/examples/toi-files/new_example.cat b/examples/toi-files/new_example.cat
@@ -0,0 +1,24 @@
+# FILE NAME: /home/simon/Git/abcvoting/examples/toi-files/new_example.cat
+# TITLE: 
+# DESCRIPTION: 
+# DATA TYPE: 
+# MODIFICATION TYPE: 
+# RELATES TO: 
+# RELATED FILES: 
+# PUBLICATION DATE: 
+# MODIFICATION DATE: 
+# NUMBER ALTERNATIVES: 5
+# NUMBER VOTERS: 5
+# NUMBER UNIQUE PREFERENCES: 4
+# NUMBER CATEGORIES: 2
+# CATEGORY NAME 1: Approved
+# CATEGORY NAME 2: Not approved
+# ALTERNATIVE NAME 1: A
+# ALTERNATIVE NAME 2: B
+# ALTERNATIVE NAME 3: C
+# ALTERNATIVE NAME 4: D
+# ALTERNATIVE NAME 5: E
+2: 4, {1, 2, 3, 5}
+1: {1, 2}, {3, 4, 5}
+1: {2, 4, 5}, {1, 3}
+1: 3, {1, 2, 4, 5}