diff --git a/src/Sep.Test/SepReaderHeaderTest.cs b/src/Sep.Test/SepReaderHeaderTest.cs index 9af102a..a43d33b 100644 --- a/src/Sep.Test/SepReaderHeaderTest.cs +++ b/src/Sep.Test/SepReaderHeaderTest.cs @@ -22,7 +22,7 @@ public void SepReaderHeaderTest_Empty() [TestMethod] public void SepReaderHeaderTest_EmptyString() { - var header = SepReaderHeader.Parse(Sep.Default, string.Empty); + var header = Parse(Sep.Default, string.Empty); Assert.AreEqual(false, header.IsEmpty); Assert.AreEqual(1, header.ColNames.Count); @@ -33,7 +33,7 @@ public void SepReaderHeaderTest_EmptyString() [TestMethod] public void SepReaderHeaderTest_NotEmpty() { - var header = SepReaderHeader.Parse(Sep.New(';'), "A;B;C"); + var header = Parse(Sep.New(';'), "A;B;C"); Assert.AreEqual(false, header.IsEmpty); Assert.AreEqual(3, header.ColNames.Count); @@ -62,14 +62,14 @@ public void SepReaderHeaderTest_NotEmpty() [TestMethod] public void SepReaderHeaderTest_NamesStartingWith() { - var header = SepReaderHeader.Parse(Sep.New(';'), "A;B;C;GT_0;RE_0;GT_1;RE_1"); + var header = Parse(Sep.New(';'), "A;B;C;GT_0;RE_0;GT_1;RE_1"); AreEqual(new[] { "GT_0", "GT_1" }, header.NamesStartingWith("GT_")); } [TestMethod] public void SepReaderHeaderTest_IndicesOf_LengthsNotSame_Throws() { - var header = SepReaderHeader.Parse(Sep.New(';'), "A;B;C"); + var header = Parse(Sep.New(';'), "A;B;C"); var e = Assert.ThrowsException(() => { @@ -82,4 +82,20 @@ public void SepReaderHeaderTest_IndicesOf_LengthsNotSame_Throws() static void AreEqual(IReadOnlyList expected, IReadOnlyList actual) => CollectionAssert.AreEqual((ICollection)expected, (ICollection)actual); + + // Convenience method for testing only + static SepReaderHeader Parse(Sep sep, string line) => + Parse(sep, line, SepDefaults.ColNameComparer); + + static SepReaderHeader Parse(Sep sep, string line, IEqualityComparer comparer) + { + var colNames = sep.Split(line); + var colNameToIndex = new Dictionary(colNames.Length, comparer); + for (var i = 0; i < colNames.Length; i++) + { + var colName = colNames[i]; + colNameToIndex.Add(colName, i); + } + return new SepReaderHeader(line, colNameToIndex); + } } diff --git a/src/Sep.Test/SepReaderTest.cs b/src/Sep.Test/SepReaderTest.cs index f5d6f4e..7f63f6b 100644 --- a/src/Sep.Test/SepReaderTest.cs +++ b/src/Sep.Test/SepReaderTest.cs @@ -247,6 +247,35 @@ public void SepReaderTest_ColNameComparer_OrdinalIgnoreCase() Assert.ThrowsException(() => reader.Current["X"].ToString()); } + [DataTestMethod] + [DataRow("A;B;C;A;D;E", "Col name 'A' found 2 times at 0:'A' 3:'A' in header row 'A;B;C;A;D;E'")] + [DataRow("A;B;C;A;D;A;E;A", "Col name 'A' found 4 times at 0:'A' 3:'A' 5:'A' 7:'A' in header row 'A;B;C;A;D;A;E;A'")] + public void SepReaderTest_DuplicateColumnNames_ThrowsWithDetails(string text, string expected) + { + var e = Assert.ThrowsException(() => Sep.Reader().FromText(text)); + Assert.AreEqual(expected, e.Message); + } + + [DataTestMethod] + [DataRow("A;B;C;\"A\";D;E", "Col name 'A' found 2 times at 0:'A' 3:'A' in header row 'A;B;C;\"A\";D;E'")] + [DataRow("\"A\";B;C;A;D;\"A\";E;A", "Col name 'A' found 4 times at 0:'A' 3:'A' 5:'A' 7:'A' in header row '\"A\";B;C;A;D;\"A\";E;A'")] + public void SepReaderTest_DuplicateColumnNames_Unescape_ThrowsWithDetails(string text, string expected) + { + var e = Assert.ThrowsException(() => + Sep.Reader(o => o with { Unescape = true }).FromText(text)); + Assert.AreEqual(expected, e.Message); + } + + [DataTestMethod] + [DataRow("A;B;C;a;D;E", "Col name 'a' found 2 times at 0:'A' 3:'a' in header row 'A;B;C;a;D;E'")] + [DataRow("a;B;C;A;D;A;E;a", "Col name 'A' found 4 times at 0:'a' 3:'A' 5:'A' 7:'a' in header row 'a;B;C;A;D;A;E;a'")] + public void SepReaderTest_DuplicateColumnNames_ColNameComparerOrdinalIgnoreCase_ThrowsWithDetails(string text, string expected) + { + var e = Assert.ThrowsException(() => + Sep.Reader(o => o with { ColNameComparer = StringComparer.OrdinalIgnoreCase }).FromText(text)); + Assert.AreEqual(expected, e.Message); + } + [TestMethod] public void SepReaderTest_Info_Ctor() { diff --git a/src/Sep/Internals/SepThrow.cs b/src/Sep/Internals/SepThrow.cs index bfafb12..c1950b1 100644 --- a/src/Sep/Internals/SepThrow.cs +++ b/src/Sep/Internals/SepThrow.cs @@ -3,6 +3,7 @@ using System.Diagnostics; using System.Diagnostics.CodeAnalysis; using System.IO; +using System.Runtime.CompilerServices; using static nietras.SeparatedValues.SepWriter; namespace nietras.SeparatedValues; @@ -102,6 +103,52 @@ internal static void NotSupportedException_BufferOrRowLengthExceedsMaximumSuppor $"If no such row should exist ensure quotes \" are terminated."); } + // C# compiler does not support DoesNotReturn in face of try/finally currently + //[DoesNotReturn] + [MethodImpl(MethodImplOptions.NoInlining)] + internal static void ArgumentException_DuplicateColNamesFound(SepReaderState reader, + Dictionary colNameToIndexUntilDuplicate, + string duplicateColName, int headerColCount, + IEqualityComparer colNameComparer, + string headerRow) + { + var colNames = new string[headerColCount]; + var colIndex = 0; + foreach (var colName in colNameToIndexUntilDuplicate.Keys) + { + colNames[colIndex] = colName; + ++colIndex; + } + for (; colIndex < headerColCount; ++colIndex) + { + colNames[colIndex] = reader.ToStringDirect(colIndex); + } + var sb = SepStringBuilderPool.Take(); + try + { + var duplicates = new List<(int colIndex, string colName)>(); + for (colIndex = 0; colIndex < headerColCount; ++colIndex) + { + var colName = colNames[colIndex]; + if (colNameComparer.Equals(colName, duplicateColName)) + { + duplicates.Add((colIndex, colName)); + } + } + sb.Append($"Col name '{duplicateColName}' found {duplicates.Count} times at"); + foreach (var (duplicateColIndex, duplicateColNameFound) in duplicates) + { + sb.Append($" {duplicateColIndex}:'{duplicateColNameFound}'"); + } + sb.Append($" in header row '{headerRow}'"); + throw new ArgumentException(sb.ToString()); + } + finally + { + SepStringBuilderPool.Return(sb); + } + } + [DoesNotReturn] internal static void ArgumentException_ColNameAlreadyExists(string colName) { diff --git a/src/Sep/SepReader.cs b/src/Sep/SepReader.cs index 1abbfec..fd592c1 100644 --- a/src/Sep/SepReader.cs +++ b/src/Sep/SepReader.cs @@ -113,13 +113,18 @@ internal void Initialize(SepReaderOptions options) _colCountExpected = firstRowColCount; if (options.HasHeader) { - var colNameToIndex = new Dictionary(firstRowColCount, options.ColNameComparer); + var headerRow = new string(RowSpan()); + var colNameComparer = options.ColNameComparer; + var colNameToIndex = new Dictionary(firstRowColCount, colNameComparer); for (var colIndex = 0; colIndex < firstRowColCount; colIndex++) { var colName = ToStringDirect(colIndex); - colNameToIndex.Add(colName, colIndex); + if (!colNameToIndex.TryAdd(colName, colIndex)) + { + SepThrow.ArgumentException_DuplicateColNamesFound(this, colNameToIndex, + colName, firstRowColCount, colNameComparer, headerRow); + } } - var headerRow = new string(RowSpan()); _header = new(headerRow, colNameToIndex); HasHeader = true; diff --git a/src/Sep/SepReaderHeader.cs b/src/Sep/SepReaderHeader.cs index 0fd597c..f793b99 100644 --- a/src/Sep/SepReaderHeader.cs +++ b/src/Sep/SepReaderHeader.cs @@ -19,22 +19,6 @@ internal SepReaderHeader(string row, Dictionary colNameToIndex) public static SepReaderHeader Empty { get; } = new(string.Empty, []); - internal static SepReaderHeader Parse(Sep sep, string line) => - Parse(sep, line, SepDefaults.ColNameComparer); - - internal static SepReaderHeader Parse(Sep sep, string line, IEqualityComparer comparer) - { - var colNames = sep.Split(line); - var colNameToIndex = new Dictionary(colNames.Length, comparer); - for (var i = 0; i < colNames.Length; i++) - { - var colName = colNames[i]; - colNameToIndex.Add(colName, i); - } - return new SepReaderHeader(line, colNameToIndex); - - } - public bool IsEmpty => _colNameToIndex.Count == 0; public IReadOnlyList ColNames => _colNames;