Skip to content

Commit

Permalink
feat: handle invalid xml characters (#89)
Browse files Browse the repository at this point in the history
* feat: handle invalid xml characters

we want to preserve previous behavior that throws exception

* fix: fix build

* cr: fixes after review
  • Loading branch information
NikiforovAll authored Nov 22, 2024
1 parent b7ed142 commit 6f9e4b0
Show file tree
Hide file tree
Showing 2 changed files with 132 additions and 75 deletions.
162 changes: 95 additions & 67 deletions Clippit.Tests/Excel/Samples/SpreadsheetWriterSamples.cs
Original file line number Diff line number Diff line change
@@ -1,29 +1,24 @@
using System;
using System.IO;
using Clippit.Excel;
using Clippit.Excel;
using Xunit;
using Xunit.Abstractions;

namespace Clippit.Tests.Excel.Samples
{
public class SpreadsheetWriterSamples : TestsBase
public class SpreadsheetWriterSamples(ITestOutputHelper log) : TestsBase(log)
{
public SpreadsheetWriterSamples(ITestOutputHelper log)
: base(log) { }

[Fact]
public void Sample1()
{
var wb = new WorkbookDfn
{
Worksheets = new[]
{
Worksheets =
[
new WorksheetDfn
{
Name = "MyFirstSheet",
TableName = "NamesAndRates",
ColumnHeadings = new[]
{
ColumnHeadings =
[
new CellDfn { Value = "Name", Bold = true },
new CellDfn
{
Expand All @@ -37,13 +32,13 @@ public void Sample1()
Bold = true,
HorizontalCellAlignment = HorizontalCellAlignment.Left,
},
},
Rows = new[]
{
],
Rows =
[
new RowDfn
{
Cells = new[]
{
Cells =
[
new CellDfn { CellDataType = CellDataType.String, Value = "Eric" },
new CellDfn { CellDataType = CellDataType.Number, Value = 50 },
new CellDfn
Expand All @@ -52,12 +47,12 @@ public void Sample1()
Value = (decimal)45.00,
FormatCode = "0.00",
},
},
],
},
new RowDfn
{
Cells = new[]
{
Cells =
[
new CellDfn { CellDataType = CellDataType.String, Value = "Bob" },
new CellDfn { CellDataType = CellDataType.Number, Value = 42 },
new CellDfn
Expand All @@ -66,11 +61,11 @@ public void Sample1()
Value = (decimal)78.00,
FormatCode = "0.00",
},
},
],
},
},
],
},
},
],
};

var fileName = Path.Combine(TempDir, "Sw_Example1.xlsx");
Expand All @@ -83,120 +78,120 @@ public void Sample2()
{
var wb = new WorkbookDfn
{
Worksheets = new[]
{
Worksheets =
[
new WorksheetDfn
{
Name = "MyFirstSheet",
ColumnHeadings = new[]
{
ColumnHeadings =
[
new CellDfn { Value = "DataType", Bold = true },
new CellDfn
{
Value = "Value",
Bold = true,
HorizontalCellAlignment = HorizontalCellAlignment.Right,
},
},
Rows = new[]
{
],
Rows =
[
new RowDfn
{
Cells = new[]
{
Cells =
[
new CellDfn { CellDataType = CellDataType.String, Value = "Boolean" },
new CellDfn { CellDataType = CellDataType.Boolean, Value = true },
},
],
},
new RowDfn
{
Cells = new[]
{
Cells =
[
new CellDfn { CellDataType = CellDataType.String, Value = "Boolean" },
new CellDfn { CellDataType = CellDataType.Boolean, Value = false },
},
],
},
new RowDfn
{
Cells = new[]
{
Cells =
[
new CellDfn { CellDataType = CellDataType.String, Value = "String" },
new CellDfn
{
CellDataType = CellDataType.String,
Value = "A String",
HorizontalCellAlignment = HorizontalCellAlignment.Right,
},
},
],
},
new RowDfn
{
Cells = new[]
{
Cells =
[
new CellDfn { CellDataType = CellDataType.String, Value = "int" },
new CellDfn { CellDataType = CellDataType.Number, Value = 100 },
},
],
},
new RowDfn
{
Cells = new[]
{
Cells =
[
new CellDfn { CellDataType = CellDataType.String, Value = "int?" },
new CellDfn { CellDataType = CellDataType.Number, Value = (int?)100 },
},
],
},
new RowDfn
{
Cells = new[]
{
Cells =
[
new CellDfn { CellDataType = CellDataType.String, Value = "int? (is null)" },
new CellDfn { CellDataType = CellDataType.Number, Value = null },
},
],
},
new RowDfn
{
Cells = new[]
{
Cells =
[
new CellDfn { CellDataType = CellDataType.String, Value = "uint" },
new CellDfn { CellDataType = CellDataType.Number, Value = (uint)101 },
},
],
},
new RowDfn
{
Cells = new[]
{
Cells =
[
new CellDfn { CellDataType = CellDataType.String, Value = "long" },
new CellDfn { CellDataType = CellDataType.Number, Value = long.MaxValue },
},
],
},
new RowDfn
{
Cells = new[]
{
Cells =
[
new CellDfn { CellDataType = CellDataType.String, Value = "float" },
new CellDfn { CellDataType = CellDataType.Number, Value = (float)123.45 },
},
],
},
new RowDfn
{
Cells = new[]
{
Cells =
[
new CellDfn { CellDataType = CellDataType.String, Value = "double" },
new CellDfn { CellDataType = CellDataType.Number, Value = 123.45 },
},
],
},
new RowDfn
{
Cells = new[]
{
Cells =
[
new CellDfn { CellDataType = CellDataType.String, Value = "decimal" },
new CellDfn { CellDataType = CellDataType.Number, Value = (decimal)123.45 },
},
],
},
new RowDfn
{
Cells = new[]
{
Cells =
[
new CellDfn
{
CellDataType = CellDataType.Date,
Expand All @@ -211,16 +206,49 @@ public void Sample2()
Bold = true,
HorizontalCellAlignment = HorizontalCellAlignment.Center,
},
},
],
},
},
],
},
},
],
};

var fileName = Path.Combine(TempDir, "Sw_Example2.xlsx");
using var stream = File.Open(fileName, FileMode.OpenOrCreate);
wb.WriteTo(stream);
}

[Fact]
public void CanEncodeInvalidXmlCharacters()
{
var wb = new WorkbookDfn
{
Worksheets =
[
new WorksheetDfn
{
Name = "MyFirstSheet",
Rows =
[
new RowDfn
{
Cells =
[
new CellDfn
{
CellDataType = CellDataType.String,
Value = "Invalid character: \uFFFF",
},
],
},
],
},
],
};

var fileName = Path.Combine(TempDir, $"{nameof(CanEncodeInvalidXmlCharacters)}.xlsx");
using var stream = File.Open(fileName, FileMode.OpenOrCreate);
wb.WriteTo(stream);
}
}
}
45 changes: 37 additions & 8 deletions Clippit/Excel/SpreadsheetWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@

#undef DisplayWorkingSet

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Globalization;
using System.Text;
using System.Text.RegularExpressions;
using System.Xml;
using System.Xml.Linq;
Expand All @@ -23,7 +21,7 @@ namespace Clippit.Excel

public class WorkbookDfn
{
public IEnumerable<WorksheetDfn> Worksheets { get; set; }
public IEnumerable<WorksheetDfn> Worksheets { get; set; } = [];
}

public class WorksheetDfn
Expand Down Expand Up @@ -458,14 +456,21 @@ out int numColumns
switch (cell.Value)
{
case DateTime dt:
xw.WriteValue(dt.ToString("yyyy'-'MM'-'dd'T'HH':'mm':'ss'.'fff"));
xw.WriteValue(
dt.ToString("yyyy'-'MM'-'dd'T'HH':'mm':'ss'.'fff", CultureInfo.InvariantCulture)
);
break;
case DateTimeOffset dts:
xw.WriteValue(dts.ToString("yyyy'-'MM'-'dd'T'HH':'mm':'ss'.'fffzzz"));
xw.WriteValue(
dts.ToString("yyyy'-'MM'-'dd'T'HH':'mm':'ss'.'fffzzz", CultureInfo.InvariantCulture)
);
break;
default:
case bool b:
xw.WriteValue(cell.Value);
break;
default:
xw.WriteValue(SanitizeXmlString(cell.Value.ToString()));
break;
}
xw.WriteEndElement();
}
Expand Down Expand Up @@ -683,6 +688,30 @@ private static bool MatchFormat(XDocument sXDoc, XElement xf, CellDfn cell)
return match;
}

private static string SanitizeXmlString(string? xml)
{
if (string.IsNullOrEmpty(xml))
{
return string.Empty;
}

var buffer = new StringBuilder(xml.Length);

foreach (var c in xml)
{
if (XmlConvert.IsXmlChar(c))
{
buffer.Append(c);
}
else
{
buffer.AppendFormat(CultureInfo.InvariantCulture, "&#x{0:X};", (int)c);
}
}

return buffer.ToString();
}

private static readonly string _EmptyXlsx =
@"UEsDBBQABgAIAAAAIQBi7p1oYQEAAJAEAAATAAgCW0NvbnRlbnRfVHlwZXNdLnhtbCCiBAIooAAC
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
Expand Down

0 comments on commit 6f9e4b0

Please sign in to comment.