Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Updated I/O to use preflibtools #73

Merged
merged 7 commits into from
Jul 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
233 changes: 87 additions & 146 deletions abcvoting/fileio.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import os
from math import ceil
import ruamel.yaml
import preflibtools.instances as preflib
from abcvoting.preferences import Profile, Voter
from abcvoting import misc

Expand Down Expand Up @@ -62,57 +63,7 @@ def get_file_names(dir_name, filename_extensions=None):
return sorted(files)


def _approval_set_from_preflib_datastructures(num_appr, ranking, candidate_map):
# if num_appr = 1 and the ranking starts with empty set, interpret as empty ballot and
# return set()
if (
num_appr == 1
and ranking[0].strip()[0] == "{"
and ranking[0].strip()[-1] == "}"
and ranking[0].strip().replace("}", "").replace("{", "").strip() == ""
):
return set()

approval_set = set()
tied = False
for rank in ranking:
rank = rank.strip()
if rank.startswith("{"):
if not tied:
tied = True
rank = rank[1:]
else:
raise MalformattedFileException(
"Invalid format for tied candidates: " + str(ranking)
)
if rank.endswith("}"):
if tied:
tied = False
rank = rank[:-1]
else:
raise MalformattedFileException(
"Invalid format for tied candidates: " + str(ranking)
)
rank = rank.strip()
if len(rank) > 0:
try:
cand = int(rank)
except ValueError as error:
raise MalformattedFileException(
f"Expected candidate number but encountered {rank}"
) from error
approval_set.add(cand)
if len(approval_set) >= num_appr and not tied:
break
if tied:
raise MalformattedFileException("Invalid format for tied candidates: " + str(ranking))
if len(approval_set) < num_appr:
# all candidates approved
approval_set = set(candidate_map.keys())
return approval_set


def read_preflib_file(filename, setsize=1, relative_setsize=None, use_weights=False):
def read_preflib_file(filename, num_cats=None, setsize=None, use_weights=False):
"""
Read a Preflib file (soi, toi, soc or toc).

Expand All @@ -121,20 +72,18 @@ def read_preflib_file(filename, setsize=1, relative_setsize=None, use_weights=Fa
filename : str
Name of the Preflib file.

num_cats : int, default=1
The approval set is composed of the union of the first `num_cats` catefories of the instance.

It cannot be used if parameter `setsize` is used too.

setsize : int
Minimum number of candidates that voters approve.

These candidates are taken from the top of ranking.
In case of ties, more than setsize candidates are approved.

Paramer `setsize` is ignored if `relative_setsize` is used.

relative_setsize : float
Proportion (number between 0 and 1) of candidates that voters approve (rounded up).

In case of ties, more candidates are approved.
E.g., if there are 10 candidates and `relative_setsize=0.75`,
then the voter approves the top 8 candidates.
It cannot be used if parameter `num_cats` is used too.

use_weights : bool, default=False
Use weights of voters instead of individual voters.
Expand All @@ -148,81 +97,68 @@ def read_preflib_file(filename, setsize=1, relative_setsize=None, use_weights=Fa
abcvoting.preferences.Profile
Preference profile extracted from Preflib file.
"""
if setsize <= 0:
if num_cats is None and setsize is None:
num_cats = 1
if num_cats and setsize:
raise ValueError("Parameters num_cats and setsize cannot be used simultaneously.")
if num_cats and num_cats <= 0:
raise ValueError("Parameter num_cats must be > 0")
if setsize and setsize <= 0:
raise ValueError("Parameter setsize must be > 0")
if relative_setsize and (relative_setsize <= 0.0 or relative_setsize > 1.0):
raise ValueError("Parameter relative_setsize not in interval (0, 1]")
with open(filename) as f:
line = f.readline()
num_cand = int(line.strip())
candidate_map = {}
for _ in range(num_cand):
parts = f.readline().strip().split(",")
candidate_map[int(parts[0].strip())] = ",".join(parts[1:]).strip()

parts = f.readline().split(",")
try:
voter_count, _, unique_orders = (int(p.strip()) for p in parts)
except ValueError as error:
raise MalformattedFileException(
f"Number of voters ill specified ({str(parts)}), should be triple of integers"
) from error

approval_sets = []
lines = [line.strip() for line in f.readlines() if line.strip()]
if len(lines) != unique_orders:
raise MalformattedFileException(
f"Expected {unique_orders} lines that specify voters in the input, "
f"encountered {len(lines)}"
)
try:
preflib_inst = preflib.get_parsed_instance(filename)
except Exception as e:
raise MalformattedFileException(
"The preflib parser returned the following error: " + str(e)
)

for line in lines:
parts = line.split(",")
if len(parts) < 1:
continue
try:
count = int(parts[0])
except ValueError as error:
raise MalformattedFileException(
f"Each ranking must start with count/weight ({line})."
) from error
ranking = parts[1:] # ranking starts after count
if len(ranking) == 0:
raise MalformattedFileException("Empty ranking: " + str(line))
if relative_setsize:
num_appr = int(ceil(len(ranking) * relative_setsize))
else:
num_appr = setsize
approval_set = _approval_set_from_preflib_datastructures(num_appr, ranking, candidate_map)
approval_sets.append((count, approval_set))
if isinstance(preflib_inst, preflib.OrdinalInstance):
if setsize:
preflib_inst = preflib.CategoricalInstance.from_ordinal(
preflib_inst, size_truncators=[setsize]
)
elif num_cats:
preflib_inst = preflib.CategoricalInstance.from_ordinal(
preflib_inst, num_indif_classes=[1] * preflib_inst.num_alternatives
)
elif not isinstance(preflib_inst, preflib.CategoricalInstance):
raise ValueError("Only ordinal and categorical preferences can be converted from PrefLib")

# normalize candidates to 0, 1, 2, ...
cand_names = []
normalize_map = {}
for cand, name in candidate_map.items():
for cand, name in preflib_inst.alternatives_name.items():
cand_names.append(name)
normalize_map[cand] = len(cand_names) - 1

profile = Profile(num_cand, cand_names=cand_names)
profile = Profile(preflib_inst.num_alternatives, cand_names=cand_names)

for preferences, count in preflib_inst.multiplicity.items():
approval_set = []
if setsize:
category = 0
while len(approval_set) < setsize and category < len(preferences):
approval_set.extend([normalize_map[cand] for cand in preferences[category]])
category += 1
if 0 < len(approval_set) < setsize:
approval_set = normalize_map.values()
elif num_cats:
approval_set = [
normalize_map[cand]
for category in range(min(len(preferences), num_cats))
for cand in preferences[category]
]

for count, approval_set in approval_sets:
normalized_approval_set = []
for cand in approval_set:
normalized_approval_set.append(normalize_map[cand])
if use_weights:
profile.add_voter(Voter(normalized_approval_set, weight=count))
profile.add_voter(Voter(approval_set, weight=count))
else:
profile.add_voters([normalized_approval_set] * count)
if use_weights:
if len(profile) != unique_orders:
raise MalformattedFileException("Number of voters wrongly specified in preflib file.")
else:
if len(profile) != voter_count:
raise MalformattedFileException("Number of voters wrongly specified in preflib file.")
profile.add_voters([approval_set] * count)

return profile


def read_preflib_files_from_dir(dir_name, setsize=1, relative_setsize=None):
def read_preflib_files_from_dir(dir_name, num_cats=None, setsize=None):
"""
Read all Preflib files (soi, toi, soc or toc) in a given directory.

Expand All @@ -231,39 +167,35 @@ def read_preflib_files_from_dir(dir_name, setsize=1, relative_setsize=None):
dir_name : str
Path of the directory to be searched for Preflib files.

num_cats : int, default=1
The approval set is composed of the union of the first `num_cats` catefories of the instance.

It cannot be used if parameter `setsize` is used too.

setsize : int
Minimum number of candidates that voters approve.

These candidates are taken from the top of ranking.
In case of ties, more than setsize candidates are approved.

Paramer `setsize` is ignored if `relative_setsize` is used.

relative_setsize : float
Proportion (number between 0 and 1) of candidates that voters approve (rounded up).

In case of ties, more candidates are approved.
E.g., if there are 10 candidates and `relative_setsize=0.75`,
then the voter approves the top 8 candidates.
It cannot be used if parameter `num_cats` is used too.

Returns
-------
dict
Dictionary with file names as keys and profiles (class abcvoting.preferences.Profile)
as values.
"""
files = get_file_names(dir_name, filename_extensions=[".soi", ".toi", ".soc", ".toc"])
files = get_file_names(dir_name, filename_extensions=[".soi", ".toi", ".soc", ".toc", ".cat"])

profiles = {}
for f in files:
profile = read_preflib_file(
os.path.join(dir_name, f), setsize=setsize, relative_setsize=relative_setsize
)
profile = read_preflib_file(os.path.join(dir_name, f), num_cats=num_cats, setsize=setsize)
profiles[f] = profile
return profiles


def write_profile_to_preflib_toi_file(filename, profile):
def write_profile_to_preflib_cat_file(filename, profile):
"""
Write a profile to a Preflib .toi file.

Expand All @@ -279,21 +211,30 @@ def write_profile_to_preflib_toi_file(filename, profile):
-------
None
"""
with open(filename, "w") as f:
# write: number of candidates
f.write(str(profile.num_cand) + "\n")
# write: names of candidates
for cand in profile.candidates:
f.write(f"{cand + 1}, {profile.cand_names[cand]}\n")
# write: info about number of voters and total weight
total_weight = sum(voter.weight for voter in profile)
f.write(f"{total_weight}, {total_weight}, {len(profile)}\n")
# write: approval sets and weights
for voter in profile:
str_approval_set = misc.str_set_of_candidates(
voter.approved, cand_names=list(range(1, profile.num_cand + 1))
)
f.write(f"{voter.weight}, {str_approval_set}\n")
preflib_inst = preflib.CategoricalInstance()
preflib_inst.num_categories = 2
preflib_inst.categories_name = {"1": "Approved", "2": "Not approved"}
preflib_inst.file_name = filename
preflib_inst.num_alternatives = profile.num_cand
for cand in profile.candidates:
preflib_inst.alternatives_name[cand + 1] = profile.cand_names[cand]

for voter in profile:
pref = (
tuple(cand + 1 for cand in voter.approved),
tuple(cand + 1 for cand in profile.candidates if cand not in voter.approved),
)
if int(voter.weight) == voter.weight:
multiplicity = voter.weight
else:
multiplicity = 1
if pref not in preflib_inst.preferences:
preflib_inst.preferences.append(pref)
preflib_inst.multiplicity[pref] = multiplicity
else:
preflib_inst.multiplicity[pref] += multiplicity
preflib_inst.recompute_cardinality_param()
preflib_inst.write(filename)


def _yaml_flow_style_list(x):
Expand Down
6 changes: 3 additions & 3 deletions examples/handling_preflib_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@
# Write a profile to toi file
profile = Profile(5, "ABCDE")
profile.add_voters([{0, 1}, {1, 3, 4}, {2}, {3}, {3}])
fileio.write_profile_to_preflib_toi_file(currdir + "/toi-files/new_example.toi", profile)
fileio.write_profile_to_preflib_cat_file(currdir + "/toi-files/new_example.cat", profile)


# Read a directory of Preflib files (using parameter `relative_setsize`)
profiles = fileio.read_preflib_files_from_dir(currdir + "/toi-files/", relative_setsize=0.7)
# Read a directory of Preflib files (using parameter `num_cats`)
profiles = fileio.read_preflib_files_from_dir(currdir + "/toi-files/", num_cats=3)
# Compute PAV for each profile
committeesize = 2
for profile in profiles.values():
Expand Down
34 changes: 22 additions & 12 deletions examples/toi-files/example.toi
Original file line number Diff line number Diff line change
@@ -1,12 +1,22 @@
6
1,a
2,b
3,c
4,d
5,five
6,e
5,5,4
1,1,2,3
2,2,{1,4}
1,{1,2},4
1,4,5,6
# FILE NAME: example.toi
# TITLE: example.toi
# DESCRIPTION:
# DATA TYPE: toi
# MODIFICATION TYPE: original
# RELATES TO:
# RELATED FILES:
# PUBLICATION DATE:
# MODIFICATION DATE:
# NUMBER ALTERNATIVES: 6
# NUMBER VOTERS: 5
# NUMBER UNIQUE ORDERS: 4
# ALTERNATIVE NAME 1: a
# ALTERNATIVE NAME 2: b
# ALTERNATIVE NAME 3: c
# ALTERNATIVE NAME 4: d
# ALTERNATIVE NAME 5: five
# ALTERNATIVE NAME 6: e
1:1,2,3
2:2,{1,4}
1:{1,2},4
1:4,5,6,1
24 changes: 24 additions & 0 deletions examples/toi-files/new_example.cat
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# FILE NAME: /home/simon/Git/abcvoting/examples/toi-files/new_example.cat
# TITLE:
# DESCRIPTION:
# DATA TYPE:
# MODIFICATION TYPE:
# RELATES TO:
# RELATED FILES:
# PUBLICATION DATE:
# MODIFICATION DATE:
# NUMBER ALTERNATIVES: 5
# NUMBER VOTERS: 5
# NUMBER UNIQUE PREFERENCES: 4
# NUMBER CATEGORIES: 2
# CATEGORY NAME 1: Approved
# CATEGORY NAME 2: Not approved
# ALTERNATIVE NAME 1: A
# ALTERNATIVE NAME 2: B
# ALTERNATIVE NAME 3: C
# ALTERNATIVE NAME 4: D
# ALTERNATIVE NAME 5: E
2: 4, {1, 2, 3, 5}
1: {1, 2}, {3, 4, 5}
1: {2, 4, 5}, {1, 3}
1: 3, {1, 2, 4, 5}
Loading