-
Notifications
You must be signed in to change notification settings - Fork 1
/
gvtxto-parser.py
executable file
·132 lines (115 loc) · 4.76 KB
/
gvtxto-parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/usr/bin/python
# -*- coding: latin-1 -*-
# Autor: Javier Fernández Aparicio
# Email: jfernandil@alumnos.unex.es
# Project website:
# https://github.com/WyRe/gvtxto-parser
import click
import re
import csv
# First regex try
# rex = re.compile('(?!(^[0-9]+:$))(?!^$)')
# IRC freenode #regex suggestions;
# regex101.com might be helpful
rexinfo = r"^(?:(?! *\d+:).*\n)*"
rexdata = r"(?<!\S)\d+(?!\S)"
def print_version(ctx, param, value):
if not value or ctx.resilient_parsing:
return
click.echo('Version 0.1.1')
ctx.exit()
# Fetching command line arguments, with click module
@click.command()
@click.option(
'--version',
'-v',
is_flag=True,
callback=print_version,
expose_value=False,
is_eager=True,
help="Prints this script's version")
@click.option(
'--input',
'-i',
prompt=True,
help='Input file; absolute/relative path',
type=click.File('r', encoding='latin-1'))
@click.option( # ^^^^ Attention to input file characters set encoding
'--output',
'-o',
prompt=True,
help="Output file (you'll must add the file extenxion)",
type=click.File('w', encoding='latin-1'))
@click.option( # ^^^^ Attention to input file characters set encoding
'--out-type',
'-ot',
type=click.Choice(['raw', 'enhanced', 'csv']),
default='raw',
help=('raw: only count per channel data \n'
'enhanced: including GammaVision log info \n'
'csv: comma-separated values'
),
)
# Core function. It is parsing the input file and writing an output new file
def parser(input, output, out_type):
"""
This script allows you to convert GammaVision .txt output files into a
much more treatable format, printing just one channel (and its respective
count) per row, by-comma separated.
You can see the available options using --help argument.
$ python gvtxto-parser.py [options] -i <inpfil.txt> -o <outfil.dat>
You may choose the format what you want (.dat in this case).
The core has been designed using regex, so it should keep working
as long as GammaVision does preserve this kind of format for its output
files.
More info and practical examples about regex in https://regex101.com
Author: Javier Fernández <jfernandil@alumnos.unex.es> Project
/ Docs -> https://github.com/WyRe/gvtxto-parser
"""
# Important: all print() functions are intended to debug the code, so
# they are disabled right now.
# Loading raw-GammaVision data in an input string to parse with regex
i_str = input.read()
# print(i_str)
# Parsingtreatable
# First match created (when re.search() find rexinfo)
info_matches = re.search(rexinfo, i_str)
# Parsing info lines. In the case info_matches will be true, it will
# take a decision accordign to user choice for --out-type parameter
# Default: enhanced
if info_matches:
# print("{match}".format(match=info_matches.group()))
# Storing file without GammaVision header in a new string
# called noinf_str. re.sub() function is replacing nexinfo
# match with blank spaces
noinf_str = re.sub(rexinfo, ' ', i_str)
# Second match created, using re.finditer() to iterate the
# searching process. This iterative behavior is also useful
# to print the channel number alongside his count, that's why
# we have used below 'i' index and enumerate()
data_matches = re.finditer(rexdata, noinf_str)
if out_type == 'enhanced':
# include metadata instead write in the file itself ?
# writing enhanced GammaVision header
output.write('{match}\n'.format(match=info_matches.group()))
# writing columns headers
output.write('channel,count\n')
# writing relevant data
for i, match in enumerate(data_matches, start=1):
# print("{i} {match}".format(i=i, match=match.group()))
output.write('{i},{match}\n'.format(i=i, match=match.group()))
elif out_type == 'raw':
for i, match in enumerate(data_matches, start=1):
# print("{i} {match}".format(i=i, match=match.group()))
output.write('{match}\n'.format(match=match.group()))
elif out_type == 'csv':
# writing columns headers. Using csv module
col_nam = ['channel', 'count']
writer = csv.DictWriter(output, fieldnames=col_nam)
writer.writeheader()
# writing relevant data
for i, match in enumerate(data_matches, start=1):
# print("{i} {match}".format(i=i, match=match.group()))
output.write('{i},{match}\n'.format(i=i, match=match.group()))
if __name__ == '__main__':
parser()