Skip to content

Commit

Permalink
SepReader improve exception for duplicate col names in header (#162)
Browse files Browse the repository at this point in the history
Fixes #161
  • Loading branch information
nietras authored Oct 8, 2024
1 parent ca40c60 commit 6a12252
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 23 deletions.
24 changes: 20 additions & 4 deletions src/Sep.Test/SepReaderHeaderTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public void SepReaderHeaderTest_Empty()
[TestMethod]
public void SepReaderHeaderTest_EmptyString()
{
var header = SepReaderHeader.Parse(Sep.Default, string.Empty);
var header = Parse(Sep.Default, string.Empty);

Assert.AreEqual(false, header.IsEmpty);
Assert.AreEqual(1, header.ColNames.Count);
Expand All @@ -33,7 +33,7 @@ public void SepReaderHeaderTest_EmptyString()
[TestMethod]
public void SepReaderHeaderTest_NotEmpty()
{
var header = SepReaderHeader.Parse(Sep.New(';'), "A;B;C");
var header = Parse(Sep.New(';'), "A;B;C");

Assert.AreEqual(false, header.IsEmpty);
Assert.AreEqual(3, header.ColNames.Count);
Expand Down Expand Up @@ -62,14 +62,14 @@ public void SepReaderHeaderTest_NotEmpty()
[TestMethod]
public void SepReaderHeaderTest_NamesStartingWith()
{
var header = SepReaderHeader.Parse(Sep.New(';'), "A;B;C;GT_0;RE_0;GT_1;RE_1");
var header = Parse(Sep.New(';'), "A;B;C;GT_0;RE_0;GT_1;RE_1");
AreEqual(new[] { "GT_0", "GT_1" }, header.NamesStartingWith("GT_"));
}

[TestMethod]
public void SepReaderHeaderTest_IndicesOf_LengthsNotSame_Throws()
{
var header = SepReaderHeader.Parse(Sep.New(';'), "A;B;C");
var header = Parse(Sep.New(';'), "A;B;C");

var e = Assert.ThrowsException<ArgumentException>(() =>
{
Expand All @@ -82,4 +82,20 @@ public void SepReaderHeaderTest_IndicesOf_LengthsNotSame_Throws()

static void AreEqual<T>(IReadOnlyList<T> expected, IReadOnlyList<T> actual) =>
CollectionAssert.AreEqual((ICollection)expected, (ICollection)actual);

// Convenience method for testing only
static SepReaderHeader Parse(Sep sep, string line) =>
Parse(sep, line, SepDefaults.ColNameComparer);

static SepReaderHeader Parse(Sep sep, string line, IEqualityComparer<string> comparer)
{
var colNames = sep.Split(line);
var colNameToIndex = new Dictionary<string, int>(colNames.Length, comparer);
for (var i = 0; i < colNames.Length; i++)
{
var colName = colNames[i];
colNameToIndex.Add(colName, i);
}
return new SepReaderHeader(line, colNameToIndex);
}
}
29 changes: 29 additions & 0 deletions src/Sep.Test/SepReaderTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,35 @@ public void SepReaderTest_ColNameComparer_OrdinalIgnoreCase()
Assert.ThrowsException<KeyNotFoundException>(() => reader.Current["X"].ToString());
}

[DataTestMethod]
[DataRow("A;B;C;A;D;E", "Col name 'A' found 2 times at 0:'A' 3:'A' in header row 'A;B;C;A;D;E'")]
[DataRow("A;B;C;A;D;A;E;A", "Col name 'A' found 4 times at 0:'A' 3:'A' 5:'A' 7:'A' in header row 'A;B;C;A;D;A;E;A'")]
public void SepReaderTest_DuplicateColumnNames_ThrowsWithDetails(string text, string expected)
{
var e = Assert.ThrowsException<ArgumentException>(() => Sep.Reader().FromText(text));
Assert.AreEqual(expected, e.Message);
}

[DataTestMethod]
[DataRow("A;B;C;\"A\";D;E", "Col name 'A' found 2 times at 0:'A' 3:'A' in header row 'A;B;C;\"A\";D;E'")]
[DataRow("\"A\";B;C;A;D;\"A\";E;A", "Col name 'A' found 4 times at 0:'A' 3:'A' 5:'A' 7:'A' in header row '\"A\";B;C;A;D;\"A\";E;A'")]
public void SepReaderTest_DuplicateColumnNames_Unescape_ThrowsWithDetails(string text, string expected)
{
var e = Assert.ThrowsException<ArgumentException>(() =>
Sep.Reader(o => o with { Unescape = true }).FromText(text));
Assert.AreEqual(expected, e.Message);
}

[DataTestMethod]
[DataRow("A;B;C;a;D;E", "Col name 'a' found 2 times at 0:'A' 3:'a' in header row 'A;B;C;a;D;E'")]
[DataRow("a;B;C;A;D;A;E;a", "Col name 'A' found 4 times at 0:'a' 3:'A' 5:'A' 7:'a' in header row 'a;B;C;A;D;A;E;a'")]
public void SepReaderTest_DuplicateColumnNames_ColNameComparerOrdinalIgnoreCase_ThrowsWithDetails(string text, string expected)
{
var e = Assert.ThrowsException<ArgumentException>(() =>
Sep.Reader(o => o with { ColNameComparer = StringComparer.OrdinalIgnoreCase }).FromText(text));
Assert.AreEqual(expected, e.Message);
}

[TestMethod]
public void SepReaderTest_Info_Ctor()
{
Expand Down
47 changes: 47 additions & 0 deletions src/Sep/Internals/SepThrow.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.IO;
using System.Runtime.CompilerServices;
using static nietras.SeparatedValues.SepWriter;

namespace nietras.SeparatedValues;
Expand Down Expand Up @@ -102,6 +103,52 @@ internal static void NotSupportedException_BufferOrRowLengthExceedsMaximumSuppor
$"If no such row should exist ensure quotes \" are terminated.");
}

// C# compiler does not support DoesNotReturn in face of try/finally currently
//[DoesNotReturn]
[MethodImpl(MethodImplOptions.NoInlining)]
internal static void ArgumentException_DuplicateColNamesFound(SepReaderState reader,
Dictionary<string, int> colNameToIndexUntilDuplicate,
string duplicateColName, int headerColCount,
IEqualityComparer<string> colNameComparer,
string headerRow)
{
var colNames = new string[headerColCount];
var colIndex = 0;
foreach (var colName in colNameToIndexUntilDuplicate.Keys)
{
colNames[colIndex] = colName;
++colIndex;
}
for (; colIndex < headerColCount; ++colIndex)
{
colNames[colIndex] = reader.ToStringDirect(colIndex);
}
var sb = SepStringBuilderPool.Take();
try
{
var duplicates = new List<(int colIndex, string colName)>();
for (colIndex = 0; colIndex < headerColCount; ++colIndex)
{
var colName = colNames[colIndex];
if (colNameComparer.Equals(colName, duplicateColName))
{
duplicates.Add((colIndex, colName));
}
}
sb.Append($"Col name '{duplicateColName}' found {duplicates.Count} times at");
foreach (var (duplicateColIndex, duplicateColNameFound) in duplicates)
{
sb.Append($" {duplicateColIndex}:'{duplicateColNameFound}'");
}
sb.Append($" in header row '{headerRow}'");
throw new ArgumentException(sb.ToString());
}
finally
{
SepStringBuilderPool.Return(sb);
}
}

[DoesNotReturn]
internal static void ArgumentException_ColNameAlreadyExists(string colName)
{
Expand Down
11 changes: 8 additions & 3 deletions src/Sep/SepReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -113,13 +113,18 @@ internal void Initialize(SepReaderOptions options)
_colCountExpected = firstRowColCount;
if (options.HasHeader)
{
var colNameToIndex = new Dictionary<string, int>(firstRowColCount, options.ColNameComparer);
var headerRow = new string(RowSpan());
var colNameComparer = options.ColNameComparer;
var colNameToIndex = new Dictionary<string, int>(firstRowColCount, colNameComparer);
for (var colIndex = 0; colIndex < firstRowColCount; colIndex++)
{
var colName = ToStringDirect(colIndex);
colNameToIndex.Add(colName, colIndex);
if (!colNameToIndex.TryAdd(colName, colIndex))
{
SepThrow.ArgumentException_DuplicateColNamesFound(this, colNameToIndex,
colName, firstRowColCount, colNameComparer, headerRow);
}
}
var headerRow = new string(RowSpan());
_header = new(headerRow, colNameToIndex);

HasHeader = true;
Expand Down
16 changes: 0 additions & 16 deletions src/Sep/SepReaderHeader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,22 +19,6 @@ internal SepReaderHeader(string row, Dictionary<string, int> colNameToIndex)

public static SepReaderHeader Empty { get; } = new(string.Empty, []);

internal static SepReaderHeader Parse(Sep sep, string line) =>
Parse(sep, line, SepDefaults.ColNameComparer);

internal static SepReaderHeader Parse(Sep sep, string line, IEqualityComparer<string> comparer)
{
var colNames = sep.Split(line);
var colNameToIndex = new Dictionary<string, int>(colNames.Length, comparer);
for (var i = 0; i < colNames.Length; i++)
{
var colName = colNames[i];
colNameToIndex.Add(colName, i);
}
return new SepReaderHeader(line, colNameToIndex);

}

public bool IsEmpty => _colNameToIndex.Count == 0;

public IReadOnlyList<string> ColNames => _colNames;
Expand Down

0 comments on commit 6a12252

Please sign in to comment.