-
Notifications
You must be signed in to change notification settings - Fork 9
/
vcardtools.py
344 lines (306 loc) · 15.2 KB
/
vcardtools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Command line tool to fix, convert, split, normalize, group, merge, deduplicate
vCard and VCF files from version 2.1 to 3.0 (even large ones)."""
import argparse
import logging
import re
from sys import stderr, exit as sysexit
from os import makedirs
from os.path import exists, isfile, split as pathsplit
import vcardlib
from vcardlib import (
get_vcards_from_files,
get_vcards_groups,
collect_attributes,
set_name,
build_vcard,
write_vcard_to_file)
DEFAULT_VCARD_EXTENSION = '.vcard'
OPTION_NO_SPACE_IN_FILENAME = False
OPTION_REPLACE_INVALID_FILENAME_CHAR_BY = '_'
def init_parser():
"""Setup the CLI argument parser with the definition of arguments and options."""
parser = argparse.ArgumentParser(
description="Automatically fix / convert / split / normalize / group / merge / deduplicate "
"vCard and VCF files from version 2.1 to 3.0 (even large ones)."
)
parser.add_argument(
'dest_dir', metavar='DESTDIR',
help="The directory that will contains VCF (vCard) files merged. "
"It MUST not exists already."
)
parser.add_argument(
'files', metavar='FILES', nargs='+',
help='The vcf/vcard files that contains vCards.'
)
parser.add_argument(
'-e', '--vcard-extension', dest='vcard_extension', type=str, default=DEFAULT_VCARD_EXTENSION,
help="The extension to use for vcard files. Default is: {dve}.".format(dve=DEFAULT_VCARD_EXTENSION)
)
parser.add_argument(
'-g', '--group', dest='group_vcards', action='store_true',
help="Group vcards that match into a directory."
)
parser.add_argument(
'-m', '--merge', dest='merge_vcards', action='store_true',
help="Merge vcards that match into a single file."
)
parser.add_argument(
'-x', '--no-match-approx', dest='no_match_approx', action='store_true',
help="Disable using approximate matching on names (note: names/words order will count)."
)
parser.add_argument(
'-c', '--no-fix-and-convert', dest='no_fix_and_convert', action='store_true',
help="Disable fixing invalid lines, and broken multilines value, "
"and converting from vCard 2.1 to 3.0"
)
parser.add_argument(
'-n', '--no-overwrite-names', dest='no_overwrite_names', action='store_true',
help="Do not overwrite names in the vCard, i.e.: keep 'fn' and 'n' attributes untouched"
)
parser.add_argument(
'-f', '--french-tweaks', dest='french_tweaks', action='store_true',
help="Enable french tweaks (phone number '+33' converted to '0', "
"handling of the name particule ' De ')."
)
parser.add_argument(
'-a', '--match-attributes', dest='match_attributes', action='append',
default=vcardlib.OPTION_MATCH_ATTRIBUTES,
help="Use those attributes to match vCards. Two vCards matches when at least one of those "
"attributes match. Specials attributes: 'names' is an alias for 'fn'+'n' and "
"'mobiles' for 'tel'+filter by phone number. Default is: %s. Use the argument "
"multiple times to specify multiple values." % vcardlib.OPTION_MATCH_ATTRIBUTES
)
parser.add_argument(
'-t', '--match-ratio', dest='match_ratio', type=int, default=100,
help="The ratio score to match the names (see fuzzywuzzy documentation). "
"Default is: 100 (safe)."
)
parser.add_argument(
'-i', '--match-min-length', dest='match_min_length', type=int, default=5,
help="The minimum length of string to allow an approximate match. Default is: 5."
)
parser.add_argument(
'-d', '--match-max-distance', dest='match_max_distance', type=int, default=3,
help="The number of character between to length of names that matches. Default is: 3."
)
parser.add_argument(
'-1', '--no-match-same-first-letter', dest='no_match_same_first_letter',
action='store_true',
help="Do not ensure that name's first letter match when doing approximate matching"
)
parser.add_argument(
'-s', '--match-startswith', dest='match_startswith', action='store_true',
help="Use the startswith comparizon (using --match-max-distance) "
"when doing approximate matching"
)
parser.add_argument(
'--move-name-extra-info-to-note', dest='move_name_parentheses_or_braces_to_note',
action='store_true',
help="Move name's charaecters between parentheses or braces to note attribute"
)
parser.add_argument(
'--no-remove-name-in-email', dest='no_remove_name_in_email', action='store_true',
help="Do not removes name in email, i.e.: keep email like the following untouched: "
"\"John Doe\" <john@doe.com>"
)
parser.add_argument(
'--do-not-force-escape-commas', dest='do_not_force_escape_commas', action='store_true',
help="Disable automatically escaping commas."
)
parser.add_argument(
'--no-space-in-filename', dest='no_space_in_filename', action='store_true',
help="Replace space in generated filename by '" +
OPTION_REPLACE_INVALID_FILENAME_CHAR_BY + "' (or option --rep-invalid-fn-char-by)."
)
parser.add_argument(
'--rep-invalid-fn-char-by', dest='rep_invalid_fn_char_by', type=str,
default=OPTION_REPLACE_INVALID_FILENAME_CHAR_BY,
help="Replace invalid characters in filename by the specified character. Default to '" +
OPTION_REPLACE_INVALID_FILENAME_CHAR_BY + "'"
)
parser.add_argument(
'-l', '--log-level', dest='log_level', default='INFO',
choices=['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'],
help="the logging level in (DEBUG,INFO,WARNING,ERROR), default is: INFO"
)
return parser
def sanitise_name(a_name: str) -> str:
""" Sanitise the name, basically a filename,
by removing characters which would cause a problem when creating a file in the OS
and replacing them with something safe (in this case, an underscore)
"""
FROM_CHARACTERS = '.\\/"\'!@#?$%^&*|(){};:<>[]'
if OPTION_NO_SPACE_IN_FILENAME:
FROM_CHARACTERS = ' ' + FROM_CHARACTERS
a_name = re.sub(r'[' + FROM_CHARACTERS + ']*', OPTION_REPLACE_INVALID_FILENAME_CHAR_BY, a_name)
return re.sub(OPTION_REPLACE_INVALID_FILENAME_CHAR_BY + '+',
OPTION_REPLACE_INVALID_FILENAME_CHAR_BY, a_name)
def generate_vcard_filename(a_name: str = '', ext: str = '') -> str:
""" Make a vcard filename, by first sanitising the filename
and then adding the defined extension.
"""
return sanitise_name(a_name=a_name) + ext
def generate_group_dirname(a_name: str = '') -> str:
""" Return a group name, sanitised
"""
return sanitise_name(a_name=a_name)
def main(): # pylint: disable=too-many-statements,too-many-branches
"""Main program : running the command line."""
global OPTION_NO_SPACE_IN_FILENAME, OPTION_REPLACE_INVALID_FILENAME_CHAR_BY
try: # pylint: disable=too-many-nested-blocks
parser = init_parser()
args = parser.parse_args()
# set the log level and log format accordingly
log_format = '%(levelname)-8s %(message)s'
if args.log_level == 'DEBUG':
logging.basicConfig(level=logging.DEBUG, format=log_format)
elif args.log_level == 'INFO':
logging.basicConfig(level=logging.INFO, format=log_format)
elif args.log_level == 'WARNING':
logging.basicConfig(level=logging.WARNING, format=log_format)
elif args.log_level == 'ERROR':
logging.basicConfig(level=logging.ERROR, format=log_format)
else:
stderr.write("[ERROR] Invalid log level '" + args.log_level + "'\n\n")
parser.print_help()
sysexit(2)
# Set the extension to use when saving vcard files
the_vcard_ext = args.vcard_extension
# no match approx
vcardlib.OPTION_NO_MATCH_APPROX = args.no_match_approx
# match attributes
if args.match_attributes and isinstance(args.match_attributes, list):
if args.match_attributes != vcardlib.OPTION_MATCH_ATTRIBUTES:
vcardlib.OPTION_MATCH_ATTRIBUTES = (
args.match_attributes[len(vcardlib.OPTION_MATCH_ATTRIBUTES):])
# match approx min length
if args.match_min_length and isinstance(args.match_min_length, int):
vcardlib.OPTION_MATCH_APPROX_MIN_LENGTH = args.match_min_length
# match approx same first letter
vcardlib.OPTION_MATCH_APPROX_SAME_FIRST_LETTER = not args.no_match_same_first_letter
# match approx startswith
vcardlib.OPTION_MATCH_APPROX_STARTSWITH = args.match_startswith
# match approx max distance
if args.match_max_distance and isinstance(args.match_max_distance, int):
vcardlib.OPTION_MATCH_APPROX_MAX_DISTANCE = range(
-args.match_max_distance, args.match_max_distance)
# match ratio
if args.match_ratio and isinstance(args.match_ratio, int):
vcardlib.OPTION_MATCH_APPROX_RATIO = args.match_ratio
# french tweaks
vcardlib.OPTION_FRENCH_TWEAKS = args.french_tweaks
# comma auto escape
vcardlib.OPTION_DO_NOT_FORCE_ESCAPE_COMMAS = args.do_not_force_escape_commas
# no space in filename
OPTION_NO_SPACE_IN_FILENAME = args.no_space_in_filename
# replacement of invalid chars in filename
OPTION_REPLACE_INVALID_FILENAME_CHAR_BY = args.rep_invalid_fn_char_by
# check DESTDIR argument
if exists(args.dest_dir):
stderr.write("[ERROR] Directory '" + args.dest_dir + "' exists. "
"Do not want to overwrite something\n\n")
parser.print_help()
sysexit(2)
# create DIR
else:
# Make sure args.dest_dir has not ending '/' before adding a new '/' in other steps
dirname, subdir = pathsplit(args.dest_dir)
if (subdir == ''):
args.dest_dir = dirname
makedirs(args.dest_dir)
logging.info("Created directory '%s'", args.dest_dir)
# check FILES argument
for arg_file in args.files:
if not exists(arg_file):
stderr.write("[ERROR] File '" + arg_file + "' doesn't exist\n\n")
parser.print_help()
sysexit(2)
elif not isfile(arg_file):
stderr.write("[ERROR] '" + arg_file + "' is not a regular file\n\n")
parser.print_help()
sysexit(2)
# summary of options
logging.info("Options:")
logging.info("\tMATCH_ATTRIBUTES: %s", vcardlib.OPTION_MATCH_ATTRIBUTES)
logging.info("\tNO_MATCH_APPROX: %s", vcardlib.OPTION_NO_MATCH_APPROX)
if not vcardlib.OPTION_NO_MATCH_APPROX:
logging.info("\tMATCH_APPROX_SAME_FIRST_LETTER: %s",
vcardlib.OPTION_MATCH_APPROX_SAME_FIRST_LETTER)
logging.info("\tMATCH_APPROX_STARTSWITH: %s", vcardlib.OPTION_MATCH_APPROX_STARTSWITH)
logging.info("\tMATCH_APPROX_MIN_LENGTH: %s", vcardlib.OPTION_MATCH_APPROX_MIN_LENGTH)
logging.info("\tMATCH_APPROX_MAX_DISTANCE: %s",
vcardlib.OPTION_MATCH_APPROX_MAX_DISTANCE)
logging.info("\tMATCH_APPROX_RATIO: %s", vcardlib.OPTION_MATCH_APPROX_RATIO)
logging.info("\tFRENCH_TWEAKS: %s", vcardlib.OPTION_FRENCH_TWEAKS)
# read/parse individual vCard files
vcards = get_vcards_from_files( \
args.files, \
args.no_fix_and_convert, \
args.no_overwrite_names, \
args.move_name_parentheses_or_braces_to_note, \
args.no_remove_name_in_email \
)
# group vcards
if args.group_vcards or args.merge_vcards:
vcards_grouped, vcards_not_grouped = get_vcards_groups(vcards)
# create grouped vCard files in group dirs
logging.info("Processing '%d' grouped vCard ...", len(vcards_grouped))
for g_name, g_list in sorted(vcards_grouped.items()):
if len(g_list) > 1:
logging.debug("\t%s (%d vcards)", g_name, len(g_list))
d_path = args.dest_dir + "/" + generate_group_dirname(g_name)
# d_path = args.dest_dir + "/" + g_name.replace('/', '-')
# merge
if args.merge_vcards:
# collect vcards to merge
vcards_to_merge = []
for key in g_list:
vcards_to_merge.append(vcards[key])
# collect attributes for all vCards
attributes = collect_attributes(vcards_to_merge)
# select a name
set_name(attributes)
# save the remaining attributes to the merged vCard
vcard_merge = build_vcard(attributes)
# write to the file
write_vcard_to_file(vcard_merge, d_path + the_vcard_ext)
# group
else:
makedirs(d_path)
logging.debug("\t%s", d_path)
for key in g_list:
logging.debug("\t\t%s", key)
write_vcard_to_file(
vcards[key],
d_path + '/' + generate_vcard_filename(key, the_vcard_ext))
# d_path + '/' + sanitise_name(key) + '.vcard')
# d_path + '/' + key.replace('/', '-') + '.vcard')
else: # should not happen
raise RuntimeError("Only one vcard in group '" + g_name + "' "
"(should not happen)")
# create vCard files not grouped in dest dir root
if vcards_not_grouped:
logging.info("Creating '%d' not grouped vCard files (in root dir) ...",
len(vcards_not_grouped))
for key in vcards_not_grouped:
write_vcard_to_file(
vcards[key],
args.dest_dir + '/' + generate_vcard_filename(key, the_vcard_ext))
# args.dest_dir + '/' + sanitise_name(key) + '.vcard')
# args.dest_dir + '/' + key.replace('/', '-') + '.vcard')
# no grouping
elif vcards:
# create vCard files not grouped in dest dir root
logging.info("Creating '%d' not grouped vCard files (in root dir) ...", len(vcards))
for key, vcard in vcards.items():
write_vcard_to_file(vcard, args.dest_dir + '/' + generate_vcard_filename(key, the_vcard_ext))
# write_vcard_to_file(vcard, args.dest_dir + '/' + sanitise_name(key) + '.vcard')
# write_vcard_to_file(vcard, args.dest_dir + '/' + key.replace('/', '-') + '.vcard')
# user CTRL-C
except KeyboardInterrupt:
logging.info("\nUser interupted. Bye.")
sysexit(3)
if __name__ == "__main__":
main()