-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathHorsParseResult.cs
103 lines (87 loc) · 3.38 KB
/
HorsParseResult.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
using Hors.Utils;
namespace Hors.Models
{
public class HorsParseResult
{
public string SourceText { get; }
public List<string> Tokens { get; }
public string Text { get; }
public List<DateTimeToken> Dates { get; }
private string _textWithTokens;
private readonly List<DateTimeToken> _fullDates;
private readonly HashSet<string> _tokensToRemove = new HashSet<string>();
public HorsParseResult(string sourceText, List<string> tokens, List<DateTimeToken> dates)
{
SourceText = sourceText;
_fullDates = dates;
Dates = CreateDates(dates);
Tokens = tokens.Where(t => !_tokensToRemove.Contains(t)).ToList();
Text = Helpers.TrimPunctuation(CreateText(false)).Trim();
}
private List<DateTimeToken> CreateDates(List<DateTimeToken> dates)
{
var duplicateSeen = new HashSet<double>();
var datesOut = new List<DateTimeToken>();
for (var i = 0; i < dates.Count; i++)
{
var date = dates[i];
if (date.GetDuplicateGroup() == -1)
{
datesOut.Add(date);
}
else if (!duplicateSeen.Contains(date.GetDuplicateGroup()))
{
duplicateSeen.Add(date.GetDuplicateGroup());
datesOut.Add(date);
}
else
{
_tokensToRemove.Add($"{{{i}}}");
}
}
return datesOut;
}
private string CreateText(bool insertTokens)
{
var text = SourceText;
var skippedDates = new HashSet<DateTimeToken>();
// loop dates from last to first
for (var i = _fullDates.Count - 1; i >= 0; i--)
{
var date = _fullDates[i];
if (skippedDates.Contains(date)) continue;
var sameDates = _fullDates.Where(d => d.StartIndex == date.StartIndex && !skippedDates.Contains(d)).ToList();
var tokensToInsert = new List<string>();
foreach (var oDate in sameDates)
{
skippedDates.Add(oDate);
var indexInList = _fullDates.IndexOf(oDate);
tokensToInsert.Add($"{{{indexInList}}}");
}
text = text.Substring(0, date.StartIndex)
+ (insertTokens && Dates.Contains(date) ? string.Join(" ", tokensToInsert) : "")
+ (date.EndIndex < text.Length ? text.Substring(date.EndIndex) : "");
}
return Regex.Replace(text.Trim(), @"\s{2,}", " ");
}
public string CleanTextWithTokens => string.Join(" ", Tokens);
public string TextWithTokens
{
get
{
if (string.IsNullOrEmpty(_textWithTokens))
{
_textWithTokens = CreateText(true);
}
return _textWithTokens;
}
}
public override string ToString()
{
return $"{CleanTextWithTokens} | {string.Join("; ", Dates.Select(d => d.ToString()))}";
}
}
}