Skip to content

Commit

Permalink
Merge pull request #2513 from petchema/csv-parsing-no-exponential-regex
Browse files Browse the repository at this point in the history
Rewrite of CSV regex parsing to avoid exponential behavior
  • Loading branch information
Interkarma authored Apr 27, 2023
2 parents c1e0869 + 28be08e commit bf8094d
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 25 deletions.
49 changes: 26 additions & 23 deletions Assets/Scripts/Game/StringTableCSVParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,15 @@ public class StringTableCSVParser
const string keyString = "Key";
const string valueString = "Value";

static readonly char[] trimChars = { '\r', '\n' };

/// <summary>
/// Loads a CSV patch file for in-game text.
/// Seeks mods first then StreamingAssets/Text folder.
/// </summary>
/// <param name="filename">Filename of StringTable CSV file.</param>
/// <returns>KeyValuePair for each row if successful, otherwise null.</returns>
public static KeyValuePair<string, string>[] Load(string filename)
public static List<KeyValuePair<string, string>> Load(string filename)
{
string csvText = null;

Expand Down Expand Up @@ -62,7 +64,7 @@ public static KeyValuePair<string, string>[] Load(string filename)
return null;

// Parse into CSV rows
KeyValuePair<string, string>[] rows = null;
List<KeyValuePair<string, string>> rows;
try
{
rows = ParseCSVRows(csvText);
Expand All @@ -82,37 +84,38 @@ public static KeyValuePair<string, string>[] Load(string filename)
/// </summary>
/// <param name="csvText">Source CSV data.</param>
/// <returns>KeyValuePair for each row.</returns>
static KeyValuePair<string, string>[] ParseCSVRows(string csvText)
static List<KeyValuePair<string, string>> ParseCSVRows(string csvText)
{
// Regex pattern from https://gist.github.com/awwsmm/886ac0ce0cef517ad7092915f708175f
const string linePattern = "(?:,|\\n|^)(\"(?:(?:\"\")*[^\"]*)*\"|[^\",\\n]*|(?:\\n|$))";
// Regex pattern inspired by https://gist.github.com/awwsmm/886ac0ce0cef517ad7092915f708175f
// but without the exponential behavior
const string linePattern = "(?:\\n|^)([^\",\\n]*),((?:\"[^\"]*\")+|[^\",\\n]*)";

// Split source CSV based on regex matches
char[] trimChars = { '\r', '\n', '\"', ',' };
List<KeyValuePair<string, string>> rows = new List<KeyValuePair<string, string>>();
string[] matches = (from Match m in Regex.Matches(csvText, linePattern, RegexOptions.ExplicitCapture) select m.Groups[0].Value).ToArray();
int pos = 0;
while (pos < matches.Length)
{
if (pos + 1 == matches.Length)
{
// Exit if no valid pair at end of csv (likely an empty line at end of source data)
break;
}
string key = matches[pos++].Trim(trimChars);
string value = matches[pos++].Trim(trimChars);
value = value.Replace("\"\"", "\""); // Replace escaped quotes in value with single quote marks
KeyValuePair<string, string> kvp = new KeyValuePair<string, string>(key, value);
rows.Add(kvp);
}
List<KeyValuePair<string, string>> rows = (from Match m in
Regex.Matches(csvText, linePattern)
select new KeyValuePair<string, string>(
m.Groups[1].Value.Trim(trimChars),
UnescapeCSVvalue(m.Groups[2].Value).Trim(trimChars)
)
).ToList();

// Remove first row if it contains "Key" as key and "Value" as value
// This is the expected header row but doesn't need to be present
// First row will be accepted if any other key/value pair is present instead
if (rows.Count > 0 && rows[0].Key == keyString && rows[0].Value == valueString)
rows.RemoveAt(0);

return rows.ToArray();
return rows;
}

static string UnescapeCSVvalue(string value)
{
if (value.Length > 0 && value[0] == '"')
{
return value.Substring(1, value.Length - 2)
.Replace("\"\"", "\""); // unescape quote marks
}
return value;
}

/// <summary>
Expand Down
4 changes: 2 additions & 2 deletions Assets/Scripts/Game/StringTablePatcher.cs
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ public void PostprocessTable(LocalizationTable table)
return;

// Load table patch data (if present)
KeyValuePair<string, string>[] rows = StringTableCSVParser.Load(table.TableCollectionName);
if (rows == null || rows.Length == 0)
List<KeyValuePair<string, string>> rows = StringTableCSVParser.Load(table.TableCollectionName);
if (rows == null || rows.Count == 0)
return;

// Patch string table from patch data
Expand Down

0 comments on commit bf8094d

Please sign in to comment.