@@ -107,19 +107,39 @@ public final class GrokPatternCreator {
107107 // apply some heuristic based on those.
108108 );
109109
110- private GrokPatternCreator () {
111- }
110+ /**
111+ * It is expected that the explanation will be shared with other code.
112+ * Both this class and other classes will update it.
113+ */
114+ private final List <String > explanation ;
115+ private final Collection <String > sampleMessages ;
112116
113117 /**
114- * This method attempts to find a Grok pattern that will match all of the sample messages in their entirety.
118+ * It is expected that the mappings will be shared with other code.
119+ * Both this class and other classes will update it.
120+ */
121+ private final Map <String , Object > mappings ;
122+ private final Map <String , Integer > fieldNameCountStore = new HashMap <>();
123+ private final StringBuilder overallGrokPatternBuilder = new StringBuilder ();
124+
125+ /**
126+ *
115127 * @param explanation List of reasons for making decisions. May contain items when passed and new reasons
116- * can be appended by this method .
117- * @param sampleMessages Sample messages that any non-<code>null</code> return will match.
128+ * can be appended by the methods of this class .
129+ * @param sampleMessages Sample messages that any Grok pattern found must match.
118130 * @param mappings Will be updated with mappings appropriate for the returned pattern, if non-<code>null</code>.
131+ */
132+ public GrokPatternCreator (List <String > explanation , Collection <String > sampleMessages , Map <String , Object > mappings ) {
133+ this .explanation = explanation ;
134+ this .sampleMessages = Collections .unmodifiableCollection (sampleMessages );
135+ this .mappings = mappings ;
136+ }
137+
138+ /**
139+ * This method attempts to find a Grok pattern that will match all of the sample messages in their entirety.
119140 * @return A tuple of (time field name, Grok string), or <code>null</code> if no suitable Grok pattern was found.
120141 */
121- public static Tuple <String , String > findFullLineGrokPattern (List <String > explanation , Collection <String > sampleMessages ,
122- Map <String , Object > mappings ) {
142+ public Tuple <String , String > findFullLineGrokPattern () {
123143
124144 for (FullMatchGrokPatternCandidate candidate : FULL_MATCH_GROK_PATTERNS ) {
125145 if (candidate .matchesAll (sampleMessages )) {
@@ -132,60 +152,55 @@ public static Tuple<String, String> findFullLineGrokPattern(List<String> explana
132152
133153 /**
134154 * Build a Grok pattern that will match all of the sample messages in their entirety.
135- * @param explanation List of reasons for making decisions. May contain items when passed and new reasons
136- * can be appended by this method.
137- * @param sampleMessages Sample messages that the returned Grok pattern will match.
138155 * @param seedPatternName A pattern that has already been determined to match some portion of every sample message.
139156 * @param seedFieldName The field name to be used for the portion of every sample message that the seed pattern matches.
140- * @param mappings Will be updated with mappings appropriate for the returned pattern, excluding the seed field name.
141157 * @return The built Grok pattern.
142158 */
143- public static String createGrokPatternFromExamples (List <String > explanation , Collection <String > sampleMessages , String seedPatternName ,
144- String seedFieldName , Map <String , Object > mappings ) {
159+ public String createGrokPatternFromExamples (String seedPatternName , String seedFieldName ) {
145160
146- GrokPatternCandidate seedCandidate = new NoMappingGrokPatternCandidate ( seedPatternName , seedFieldName );
161+ overallGrokPatternBuilder . setLength ( 0 );
147162
148- Map <String , Integer > fieldNameCountStore = new HashMap <>();
149- StringBuilder overallGrokPatternBuilder = new StringBuilder ();
163+ GrokPatternCandidate seedCandidate = new NoMappingGrokPatternCandidate (seedPatternName , seedFieldName );
150164
151- processCandidateAndSplit (explanation , fieldNameCountStore , overallGrokPatternBuilder , seedCandidate , true , sampleMessages , mappings ,
152- false , 0 , false , 0 );
165+ processCandidateAndSplit (seedCandidate , true , sampleMessages , false , 0 , false , 0 );
153166
154167 return overallGrokPatternBuilder .toString ().replace ("\t " , "\\ t" ).replace ("\n " , "\\ n" );
155168 }
156169
170+ /**
171+ * This is purely to allow unit tests to inspect the partial Grok pattern after testing implementation details.
172+ * It should not be used in production code.
173+ */
174+ StringBuilder getOverallGrokPatternBuilder () {
175+ return overallGrokPatternBuilder ;
176+ }
177+
157178 /**
158179 * Given a chosen Grok pattern and a collection of message snippets, split the snippets into the
159180 * matched section and the pieces before and after it. Recurse to find more matches in the pieces
160181 * before and after and update the supplied string builder.
161182 */
162- private static void processCandidateAndSplit (List <String > explanation , Map <String , Integer > fieldNameCountStore ,
163- StringBuilder overallGrokPatternBuilder , GrokPatternCandidate chosenPattern ,
164- boolean isLast , Collection <String > snippets , Map <String , Object > mappings ,
165- boolean ignoreKeyValueCandidateLeft , int ignoreValueOnlyCandidatesLeft ,
166- boolean ignoreKeyValueCandidateRight , int ignoreValueOnlyCandidatesRight ) {
183+ private void processCandidateAndSplit (GrokPatternCandidate chosenPattern , boolean isLast , Collection <String > snippets ,
184+ boolean ignoreKeyValueCandidateLeft , int ignoreValueOnlyCandidatesLeft ,
185+ boolean ignoreKeyValueCandidateRight , int ignoreValueOnlyCandidatesRight ) {
167186
168187 Collection <String > prefaces = new ArrayList <>();
169188 Collection <String > epilogues = new ArrayList <>();
170189 String patternBuilderContent = chosenPattern .processCaptures (fieldNameCountStore , snippets , prefaces , epilogues , mappings );
171- appendBestGrokMatchForStrings (explanation , fieldNameCountStore , overallGrokPatternBuilder , false , prefaces , mappings ,
172- ignoreKeyValueCandidateLeft , ignoreValueOnlyCandidatesLeft );
190+ appendBestGrokMatchForStrings (false , prefaces , ignoreKeyValueCandidateLeft , ignoreValueOnlyCandidatesLeft );
173191 overallGrokPatternBuilder .append (patternBuilderContent );
174- appendBestGrokMatchForStrings (explanation , fieldNameCountStore , overallGrokPatternBuilder , isLast , epilogues , mappings ,
175- ignoreKeyValueCandidateRight , ignoreValueOnlyCandidatesRight );
192+ appendBestGrokMatchForStrings (isLast , epilogues , ignoreKeyValueCandidateRight , ignoreValueOnlyCandidatesRight );
176193 }
177194
178195 /**
179196 * Given a collection of message snippets, work out which (if any) of the Grok patterns we're allowed
180197 * to use matches it best. Then append the appropriate Grok language to represent that finding onto
181198 * the supplied string builder.
182199 */
183- static void appendBestGrokMatchForStrings (List <String > explanation , Map <String , Integer > fieldNameCountStore ,
184- StringBuilder overallGrokPatternBuilder , boolean isLast , Collection <String > snippets ,
185- Map <String , Object > mappings , boolean ignoreKeyValueCandidate ,
186- int ignoreValueOnlyCandidates ) {
200+ void appendBestGrokMatchForStrings (boolean isLast , Collection <String > snippets ,
201+ boolean ignoreKeyValueCandidate , int ignoreValueOnlyCandidates ) {
187202
188- snippets = adjustForPunctuation (snippets , overallGrokPatternBuilder );
203+ snippets = adjustForPunctuation (snippets );
189204
190205 GrokPatternCandidate bestCandidate = null ;
191206 if (snippets .isEmpty () == false ) {
@@ -207,13 +222,13 @@ static void appendBestGrokMatchForStrings(List<String> explanation, Map<String,
207222
208223 if (bestCandidate == null ) {
209224 if (isLast ) {
210- finalizeGrokPattern (overallGrokPatternBuilder , snippets );
225+ finalizeGrokPattern (snippets );
211226 } else {
212- addIntermediateRegex (overallGrokPatternBuilder , snippets );
227+ addIntermediateRegex (snippets );
213228 }
214229 } else {
215- processCandidateAndSplit (explanation , fieldNameCountStore , overallGrokPatternBuilder , bestCandidate , isLast , snippets , mappings ,
216- true , ignoreValueOnlyCandidates + ( ignoreKeyValueCandidate ? 1 : 0 ), ignoreKeyValueCandidate , ignoreValueOnlyCandidates );
230+ processCandidateAndSplit (bestCandidate , isLast , snippets , true , ignoreValueOnlyCandidates + ( ignoreKeyValueCandidate ? 1 : 0 ) ,
231+ ignoreKeyValueCandidate , ignoreValueOnlyCandidates );
217232 }
218233 }
219234
@@ -222,13 +237,10 @@ static void appendBestGrokMatchForStrings(List<String> explanation, Map<String,
222237 * then add all but the last of these characters to the overall pattern and remove them from the
223238 * snippets.
224239 * @param snippets Input snippets - not modified.
225- * @param overallPatternBuilder The string builder in which a regex is being built to which common
226- * punctuation characters will be appended (with appropriate escaping
227- * if necessary).
228240 * @return Output snippets, which will be a copy of the input snippets but with whatever characters
229241 * were added to <code>overallPatternBuilder</code> removed from the beginning.
230242 */
231- static Collection <String > adjustForPunctuation (Collection <String > snippets , StringBuilder overallPatternBuilder ) {
243+ Collection <String > adjustForPunctuation (Collection <String > snippets ) {
232244
233245 assert snippets .isEmpty () == false ;
234246
@@ -268,9 +280,9 @@ static Collection<String> adjustForPunctuation(Collection<String> snippets, Stri
268280 for (int index = 0 ; index < numLiteralCharacters ; ++index ) {
269281 char ch = commonInitialPunctuation .charAt (index );
270282 if (PUNCTUATION_OR_SPACE_NEEDS_ESCAPING .getOrDefault (ch , false )) {
271- overallPatternBuilder .append ('\\' );
283+ overallGrokPatternBuilder .append ('\\' );
272284 }
273- overallPatternBuilder .append (ch );
285+ overallGrokPatternBuilder .append (ch );
274286 }
275287
276288 return snippets .stream ().map (snippet -> snippet .substring (numLiteralCharacters )).collect (Collectors .toList ());
@@ -287,7 +299,11 @@ static String buildFieldName(Map<String, Integer> fieldNameCountStore, String fi
287299 return (numberSeen > 1 ) ? fieldName + numberSeen : fieldName ;
288300 }
289301
290- public static void addIntermediateRegex (StringBuilder overallPatternBuilder , Collection <String > snippets ) {
302+ private void addIntermediateRegex (Collection <String > snippets ) {
303+ addIntermediateRegex (overallGrokPatternBuilder , snippets );
304+ }
305+
306+ public static void addIntermediateRegex (StringBuilder patternBuilder , Collection <String > snippets ) {
291307 if (snippets .isEmpty ()) {
292308 return ;
293309 }
@@ -301,26 +317,26 @@ public static void addIntermediateRegex(StringBuilder overallPatternBuilder, Col
301317 Boolean punctuationOrSpaceNeedsEscaping = PUNCTUATION_OR_SPACE_NEEDS_ESCAPING .get (ch );
302318 if (punctuationOrSpaceNeedsEscaping != null && others .stream ().allMatch (other -> other .indexOf (ch ) >= 0 )) {
303319 if (wildcardRequiredIfNonMatchFound && others .stream ().anyMatch (other -> other .indexOf (ch ) > 0 )) {
304- overallPatternBuilder .append (".*?" );
320+ patternBuilder .append (".*?" );
305321 }
306322 if (punctuationOrSpaceNeedsEscaping ) {
307- overallPatternBuilder .append ('\\' );
323+ patternBuilder .append ('\\' );
308324 }
309- overallPatternBuilder .append (ch );
325+ patternBuilder .append (ch );
310326 wildcardRequiredIfNonMatchFound = true ;
311327 others = others .stream ().map (other -> other .substring (other .indexOf (ch ) + 1 )).collect (Collectors .toList ());
312328 } else if (wildcardRequiredIfNonMatchFound ) {
313- overallPatternBuilder .append (".*?" );
329+ patternBuilder .append (".*?" );
314330 wildcardRequiredIfNonMatchFound = false ;
315331 }
316332 }
317333
318334 if (wildcardRequiredIfNonMatchFound && others .stream ().anyMatch (s -> s .isEmpty () == false )) {
319- overallPatternBuilder .append (".*?" );
335+ patternBuilder .append (".*?" );
320336 }
321337 }
322338
323- private static void finalizeGrokPattern (StringBuilder overallPatternBuilder , Collection <String > snippets ) {
339+ private void finalizeGrokPattern (Collection <String > snippets ) {
324340 if (snippets .stream ().allMatch (String ::isEmpty )) {
325341 return ;
326342 }
@@ -335,9 +351,9 @@ private static void finalizeGrokPattern(StringBuilder overallPatternBuilder, Col
335351 if (punctuationOrSpaceNeedsEscaping != null &&
336352 others .stream ().allMatch (other -> other .length () > driverIndex && other .charAt (driverIndex ) == ch )) {
337353 if (punctuationOrSpaceNeedsEscaping ) {
338- overallPatternBuilder .append ('\\' );
354+ overallGrokPatternBuilder .append ('\\' );
339355 }
340- overallPatternBuilder .append (ch );
356+ overallGrokPatternBuilder .append (ch );
341357 if (i == driver .length () - 1 && others .stream ().allMatch (driver ::equals )) {
342358 return ;
343359 }
@@ -346,7 +362,7 @@ private static void finalizeGrokPattern(StringBuilder overallPatternBuilder, Col
346362 }
347363 }
348364
349- overallPatternBuilder .append (".*" );
365+ overallGrokPatternBuilder .append (".*" );
350366 }
351367
352368 interface GrokPatternCandidate {
0 commit comments