Skip to content

Commit 37a92f7

Browse files
author
David Roberts
committed
Make GrokPatternCreator more OO
1 parent 30a1cba commit 37a92f7

File tree

3 files changed

+117
-130
lines changed

3 files changed

+117
-130
lines changed

x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/GrokPatternCreator.java

Lines changed: 68 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -107,19 +107,39 @@ public final class GrokPatternCreator {
107107
// apply some heuristic based on those.
108108
);
109109

110-
private GrokPatternCreator() {
111-
}
110+
/**
111+
* It is expected that the explanation will be shared with other code.
112+
* Both this class and other classes will update it.
113+
*/
114+
private final List<String> explanation;
115+
private final Collection<String> sampleMessages;
112116

113117
/**
114-
* This method attempts to find a Grok pattern that will match all of the sample messages in their entirety.
118+
* It is expected that the mappings will be shared with other code.
119+
* Both this class and other classes will update it.
120+
*/
121+
private final Map<String, Object> mappings;
122+
private final Map<String, Integer> fieldNameCountStore = new HashMap<>();
123+
private final StringBuilder overallGrokPatternBuilder = new StringBuilder();
124+
125+
/**
126+
*
115127
* @param explanation List of reasons for making decisions. May contain items when passed and new reasons
116-
* can be appended by this method.
117-
* @param sampleMessages Sample messages that any non-<code>null</code> return will match.
128+
* can be appended by the methods of this class.
129+
* @param sampleMessages Sample messages that any Grok pattern found must match.
118130
* @param mappings Will be updated with mappings appropriate for the returned pattern, if non-<code>null</code>.
131+
*/
132+
public GrokPatternCreator(List<String> explanation, Collection<String> sampleMessages, Map<String, Object> mappings) {
133+
this.explanation = explanation;
134+
this.sampleMessages = Collections.unmodifiableCollection(sampleMessages);
135+
this.mappings = mappings;
136+
}
137+
138+
/**
139+
* This method attempts to find a Grok pattern that will match all of the sample messages in their entirety.
119140
* @return A tuple of (time field name, Grok string), or <code>null</code> if no suitable Grok pattern was found.
120141
*/
121-
public static Tuple<String, String> findFullLineGrokPattern(List<String> explanation, Collection<String> sampleMessages,
122-
Map<String, Object> mappings) {
142+
public Tuple<String, String> findFullLineGrokPattern() {
123143

124144
for (FullMatchGrokPatternCandidate candidate : FULL_MATCH_GROK_PATTERNS) {
125145
if (candidate.matchesAll(sampleMessages)) {
@@ -132,60 +152,55 @@ public static Tuple<String, String> findFullLineGrokPattern(List<String> explana
132152

133153
/**
134154
* Build a Grok pattern that will match all of the sample messages in their entirety.
135-
* @param explanation List of reasons for making decisions. May contain items when passed and new reasons
136-
* can be appended by this method.
137-
* @param sampleMessages Sample messages that the returned Grok pattern will match.
138155
* @param seedPatternName A pattern that has already been determined to match some portion of every sample message.
139156
* @param seedFieldName The field name to be used for the portion of every sample message that the seed pattern matches.
140-
* @param mappings Will be updated with mappings appropriate for the returned pattern, excluding the seed field name.
141157
* @return The built Grok pattern.
142158
*/
143-
public static String createGrokPatternFromExamples(List<String> explanation, Collection<String> sampleMessages, String seedPatternName,
144-
String seedFieldName, Map<String, Object> mappings) {
159+
public String createGrokPatternFromExamples(String seedPatternName, String seedFieldName) {
145160

146-
GrokPatternCandidate seedCandidate = new NoMappingGrokPatternCandidate(seedPatternName, seedFieldName);
161+
overallGrokPatternBuilder.setLength(0);
147162

148-
Map<String, Integer> fieldNameCountStore = new HashMap<>();
149-
StringBuilder overallGrokPatternBuilder = new StringBuilder();
163+
GrokPatternCandidate seedCandidate = new NoMappingGrokPatternCandidate(seedPatternName, seedFieldName);
150164

151-
processCandidateAndSplit(explanation, fieldNameCountStore, overallGrokPatternBuilder, seedCandidate, true, sampleMessages, mappings,
152-
false, 0, false, 0);
165+
processCandidateAndSplit(seedCandidate, true, sampleMessages, false, 0, false, 0);
153166

154167
return overallGrokPatternBuilder.toString().replace("\t", "\\t").replace("\n", "\\n");
155168
}
156169

170+
/**
171+
* This is purely to allow unit tests to inspect the partial Grok pattern after testing implementation details.
172+
* It should not be used in production code.
173+
*/
174+
StringBuilder getOverallGrokPatternBuilder() {
175+
return overallGrokPatternBuilder;
176+
}
177+
157178
/**
158179
* Given a chosen Grok pattern and a collection of message snippets, split the snippets into the
159180
* matched section and the pieces before and after it. Recurse to find more matches in the pieces
160181
* before and after and update the supplied string builder.
161182
*/
162-
private static void processCandidateAndSplit(List<String> explanation, Map<String, Integer> fieldNameCountStore,
163-
StringBuilder overallGrokPatternBuilder, GrokPatternCandidate chosenPattern,
164-
boolean isLast, Collection<String> snippets, Map<String, Object> mappings,
165-
boolean ignoreKeyValueCandidateLeft, int ignoreValueOnlyCandidatesLeft,
166-
boolean ignoreKeyValueCandidateRight, int ignoreValueOnlyCandidatesRight) {
183+
private void processCandidateAndSplit(GrokPatternCandidate chosenPattern, boolean isLast, Collection<String> snippets,
184+
boolean ignoreKeyValueCandidateLeft, int ignoreValueOnlyCandidatesLeft,
185+
boolean ignoreKeyValueCandidateRight, int ignoreValueOnlyCandidatesRight) {
167186

168187
Collection<String> prefaces = new ArrayList<>();
169188
Collection<String> epilogues = new ArrayList<>();
170189
String patternBuilderContent = chosenPattern.processCaptures(fieldNameCountStore, snippets, prefaces, epilogues, mappings);
171-
appendBestGrokMatchForStrings(explanation, fieldNameCountStore, overallGrokPatternBuilder, false, prefaces, mappings,
172-
ignoreKeyValueCandidateLeft, ignoreValueOnlyCandidatesLeft);
190+
appendBestGrokMatchForStrings(false, prefaces, ignoreKeyValueCandidateLeft, ignoreValueOnlyCandidatesLeft);
173191
overallGrokPatternBuilder.append(patternBuilderContent);
174-
appendBestGrokMatchForStrings(explanation, fieldNameCountStore, overallGrokPatternBuilder, isLast, epilogues, mappings,
175-
ignoreKeyValueCandidateRight, ignoreValueOnlyCandidatesRight);
192+
appendBestGrokMatchForStrings(isLast, epilogues, ignoreKeyValueCandidateRight, ignoreValueOnlyCandidatesRight);
176193
}
177194

178195
/**
179196
* Given a collection of message snippets, work out which (if any) of the Grok patterns we're allowed
180197
* to use matches it best. Then append the appropriate Grok language to represent that finding onto
181198
* the supplied string builder.
182199
*/
183-
static void appendBestGrokMatchForStrings(List<String> explanation, Map<String, Integer> fieldNameCountStore,
184-
StringBuilder overallGrokPatternBuilder, boolean isLast, Collection<String> snippets,
185-
Map<String, Object> mappings, boolean ignoreKeyValueCandidate,
186-
int ignoreValueOnlyCandidates) {
200+
void appendBestGrokMatchForStrings(boolean isLast, Collection<String> snippets,
201+
boolean ignoreKeyValueCandidate, int ignoreValueOnlyCandidates) {
187202

188-
snippets = adjustForPunctuation(snippets, overallGrokPatternBuilder);
203+
snippets = adjustForPunctuation(snippets);
189204

190205
GrokPatternCandidate bestCandidate = null;
191206
if (snippets.isEmpty() == false) {
@@ -207,13 +222,13 @@ static void appendBestGrokMatchForStrings(List<String> explanation, Map<String,
207222

208223
if (bestCandidate == null) {
209224
if (isLast) {
210-
finalizeGrokPattern(overallGrokPatternBuilder, snippets);
225+
finalizeGrokPattern(snippets);
211226
} else {
212-
addIntermediateRegex(overallGrokPatternBuilder, snippets);
227+
addIntermediateRegex(snippets);
213228
}
214229
} else {
215-
processCandidateAndSplit(explanation, fieldNameCountStore, overallGrokPatternBuilder, bestCandidate, isLast, snippets, mappings,
216-
true, ignoreValueOnlyCandidates + (ignoreKeyValueCandidate ? 1 : 0), ignoreKeyValueCandidate, ignoreValueOnlyCandidates);
230+
processCandidateAndSplit(bestCandidate, isLast, snippets, true, ignoreValueOnlyCandidates + (ignoreKeyValueCandidate ? 1 : 0),
231+
ignoreKeyValueCandidate, ignoreValueOnlyCandidates);
217232
}
218233
}
219234

@@ -222,13 +237,10 @@ static void appendBestGrokMatchForStrings(List<String> explanation, Map<String,
222237
* then add all but the last of these characters to the overall pattern and remove them from the
223238
* snippets.
224239
* @param snippets Input snippets - not modified.
225-
* @param overallPatternBuilder The string builder in which a regex is being built to which common
226-
* punctuation characters will be appended (with appropriate escaping
227-
* if necessary).
228240
* @return Output snippets, which will be a copy of the input snippets but with whatever characters
229241
* were added to <code>overallPatternBuilder</code> removed from the beginning.
230242
*/
231-
static Collection<String> adjustForPunctuation(Collection<String> snippets, StringBuilder overallPatternBuilder) {
243+
Collection<String> adjustForPunctuation(Collection<String> snippets) {
232244

233245
assert snippets.isEmpty() == false;
234246

@@ -268,9 +280,9 @@ static Collection<String> adjustForPunctuation(Collection<String> snippets, Stri
268280
for (int index = 0; index < numLiteralCharacters; ++index) {
269281
char ch = commonInitialPunctuation.charAt(index);
270282
if (PUNCTUATION_OR_SPACE_NEEDS_ESCAPING.getOrDefault(ch, false)) {
271-
overallPatternBuilder.append('\\');
283+
overallGrokPatternBuilder.append('\\');
272284
}
273-
overallPatternBuilder.append(ch);
285+
overallGrokPatternBuilder.append(ch);
274286
}
275287

276288
return snippets.stream().map(snippet -> snippet.substring(numLiteralCharacters)).collect(Collectors.toList());
@@ -287,7 +299,11 @@ static String buildFieldName(Map<String, Integer> fieldNameCountStore, String fi
287299
return (numberSeen > 1) ? fieldName + numberSeen : fieldName;
288300
}
289301

290-
public static void addIntermediateRegex(StringBuilder overallPatternBuilder, Collection<String> snippets) {
302+
private void addIntermediateRegex(Collection<String> snippets) {
303+
addIntermediateRegex(overallGrokPatternBuilder, snippets);
304+
}
305+
306+
public static void addIntermediateRegex(StringBuilder patternBuilder, Collection<String> snippets) {
291307
if (snippets.isEmpty()) {
292308
return;
293309
}
@@ -301,26 +317,26 @@ public static void addIntermediateRegex(StringBuilder overallPatternBuilder, Col
301317
Boolean punctuationOrSpaceNeedsEscaping = PUNCTUATION_OR_SPACE_NEEDS_ESCAPING.get(ch);
302318
if (punctuationOrSpaceNeedsEscaping != null && others.stream().allMatch(other -> other.indexOf(ch) >= 0)) {
303319
if (wildcardRequiredIfNonMatchFound && others.stream().anyMatch(other -> other.indexOf(ch) > 0)) {
304-
overallPatternBuilder.append(".*?");
320+
patternBuilder.append(".*?");
305321
}
306322
if (punctuationOrSpaceNeedsEscaping) {
307-
overallPatternBuilder.append('\\');
323+
patternBuilder.append('\\');
308324
}
309-
overallPatternBuilder.append(ch);
325+
patternBuilder.append(ch);
310326
wildcardRequiredIfNonMatchFound = true;
311327
others = others.stream().map(other -> other.substring(other.indexOf(ch) + 1)).collect(Collectors.toList());
312328
} else if (wildcardRequiredIfNonMatchFound) {
313-
overallPatternBuilder.append(".*?");
329+
patternBuilder.append(".*?");
314330
wildcardRequiredIfNonMatchFound = false;
315331
}
316332
}
317333

318334
if (wildcardRequiredIfNonMatchFound && others.stream().anyMatch(s -> s.isEmpty() == false)) {
319-
overallPatternBuilder.append(".*?");
335+
patternBuilder.append(".*?");
320336
}
321337
}
322338

323-
private static void finalizeGrokPattern(StringBuilder overallPatternBuilder, Collection<String> snippets) {
339+
private void finalizeGrokPattern(Collection<String> snippets) {
324340
if (snippets.stream().allMatch(String::isEmpty)) {
325341
return;
326342
}
@@ -335,9 +351,9 @@ private static void finalizeGrokPattern(StringBuilder overallPatternBuilder, Col
335351
if (punctuationOrSpaceNeedsEscaping != null &&
336352
others.stream().allMatch(other -> other.length() > driverIndex && other.charAt(driverIndex) == ch)) {
337353
if (punctuationOrSpaceNeedsEscaping) {
338-
overallPatternBuilder.append('\\');
354+
overallGrokPatternBuilder.append('\\');
339355
}
340-
overallPatternBuilder.append(ch);
356+
overallGrokPatternBuilder.append(ch);
341357
if (i == driver.length() - 1 && others.stream().allMatch(driver::equals)) {
342358
return;
343359
}
@@ -346,7 +362,7 @@ private static void finalizeGrokPattern(StringBuilder overallPatternBuilder, Col
346362
}
347363
}
348364

349-
overallPatternBuilder.append(".*");
365+
overallGrokPatternBuilder.append(".*");
350366
}
351367

352368
interface GrokPatternCandidate {

x-pack/plugin/ml/log-structure-finder/src/main/java/org/elasticsearch/xpack/ml/logstructurefinder/TextLogStructureFinder.java

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,15 +84,14 @@ public class TextLogStructureFinder extends AbstractLogStructureFinder implement
8484
// We can't parse directly into @timestamp using Grok, so parse to some other time field, which the date filter will then remove
8585
String interimTimestampField;
8686
String grokPattern;
87-
Tuple<String, String> timestampFieldAndFullMatchGrokPattern =
88-
GrokPatternCreator.findFullLineGrokPattern(explanation, sampleMessages, mappings);
87+
GrokPatternCreator grokPatternCreator = new GrokPatternCreator(explanation, sampleMessages, mappings);
88+
Tuple<String, String> timestampFieldAndFullMatchGrokPattern = grokPatternCreator.findFullLineGrokPattern();
8989
if (timestampFieldAndFullMatchGrokPattern != null) {
9090
interimTimestampField = timestampFieldAndFullMatchGrokPattern.v1();
9191
grokPattern = timestampFieldAndFullMatchGrokPattern.v2();
9292
} else {
9393
interimTimestampField = "timestamp";
94-
grokPattern = GrokPatternCreator.createGrokPatternFromExamples(explanation, sampleMessages, bestTimestamp.v1().grokPatternName,
95-
interimTimestampField, mappings);
94+
grokPattern = grokPatternCreator.createGrokPatternFromExamples(bestTimestamp.v1().grokPatternName, interimTimestampField);
9695
}
9796

9897
structure = structureBuilder

0 commit comments

Comments
 (0)