Splitting on a string is easy.
Respecting qualified (quoted) strings can be hard.
Identifying escaped characters in qualified strings is very tricky.
Splitting on a qualified string that takes escape characters into account is really difficult!
Unit Tests
[Theory]
[InlineData(null, new string[0])]
[InlineData("", new string[0])]
[InlineData("hello world", new[] { "hello", "world" })]
[InlineData("hello world", new[] { "hello", "world" })]
[InlineData("\"hello world\"", new[] { "\"hello world\"" })]
[InlineData("\"hello world\"", new[] { "\"hello world\"" })]
[InlineData("hello \"goodnight moon\" world", new[]
{
"hello",
"\"goodnight moon\"",
"world",
})]
[InlineData("hello \"goodnight \\\" moon\" world", new[]
{
"hello",
"\"goodnight \\\" moon\"",
"world",
})]
[InlineData("hello \"goodnight \\\\\" moon\" world", new[]
{
"hello",
"\"goodnight \\\\\"",
"moon\"",
"world",
})]
public void SplitQualified(string input, IList<string> expected)
{
var actual = input
.SplitQualified(' ', '"')
.ToList();
Assert.Equal(expected.Count, actual.Count);
for (var i = 0; i < actual.Count; i++)
Assert.Equal(expected[i], actual[i]);
}
String Extension Methods
public static IEnumerable<string> SplitQualified(
this string input,
char separator,
char qualifier,
StringSplitOptions options = StringSplitOptions.RemoveEmptyEntries,
char escape = '\\')
{
if (String.IsNullOrWhiteSpace(input))
return new string[0];
var results = SplitQualified(input, separator, qualifier, escape);
return options == StringSplitOptions.None
? results
: results.Where(r => !String.IsNullOrWhiteSpace(r));
}
private static IEnumerable<string> SplitQualified(
string input,
char separator,
char qualifier,
char escape)
{
var separatorIndexes = input
.IndexesOf(separator)
.ToList();
var qualifierIndexes = input
.IndexesOf(qualifier)
.ToList();
// Remove Escaped Qualifiers
for (var i = 0; i < qualifierIndexes.Count; i++)
{
var qualifierIndex = qualifierIndexes[i];
if (qualifierIndex == 0)
continue;
if (input[qualifierIndex - 1] != escape)
continue;
// Watch out for a series of escaped escape characters.
var escapeResult = false;
for (var j = 2; qualifierIndex - j > 0; j++)
{
if (input[qualifierIndex - j] == escape)
continue;
escapeResult = j % 2 == 1;
break;
}
if (qualifierIndex > 1 && escapeResult)
continue;
qualifierIndexes.RemoveAt(i);
i--;
}
// Remove Qualified Separators
if (qualifierIndexes.Count > 1)
for (var i = 0; i < separatorIndexes.Count; i++)
{
var separatorIndex = separatorIndexes[i];
for (var j = 0; j < qualifierIndexes.Count - 1; j += 2)
{
if (separatorIndex <= qualifierIndexes[j])
continue;
if (separatorIndex >= qualifierIndexes[j + 1])
continue;
separatorIndexes.RemoveAt(i);
i--;
}
}
// Split String On Separators
var previousSeparatorIndex = 0;
foreach (var separatorIndex in separatorIndexes)
{
var startIndex = previousSeparatorIndex == 0
? previousSeparatorIndex
: previousSeparatorIndex + 1;
var endIndex = separatorIndex == input.Length - 1
|| previousSeparatorIndex == 0
? separatorIndex - previousSeparatorIndex
: separatorIndex - previousSeparatorIndex - 1;
yield return input.Substring(startIndex, endIndex);
previousSeparatorIndex = separatorIndex;
}
if (previousSeparatorIndex == 0)
yield return input;
else
yield return input.Substring(previousSeparatorIndex + 1);
}
public static IEnumerable<int> IndexesOf(
this string input,
char value)
{
if (!String.IsNullOrWhiteSpace(input))
{
var index = -1;
do
{
index++;
index = input.IndexOf(value, index);
if (index > -1)
yield return index;
else
break;
}
while (index < input.Length);
}
}
Enjoy,
Tom