Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: handle invalid xml characters #89

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
162 changes: 95 additions & 67 deletions Clippit.Tests/Excel/Samples/SpreadsheetWriterSamples.cs
Original file line number Diff line number Diff line change
@@ -1,29 +1,24 @@
using System;
using System.IO;
using Clippit.Excel;
using Clippit.Excel;
using Xunit;
using Xunit.Abstractions;

namespace Clippit.Tests.Excel.Samples
{
public class SpreadsheetWriterSamples : TestsBase
public class SpreadsheetWriterSamples(ITestOutputHelper log) : TestsBase(log)
{
public SpreadsheetWriterSamples(ITestOutputHelper log)
: base(log) { }

[Fact]
public void Sample1()
{
var wb = new WorkbookDfn
{
Worksheets = new[]
{
Worksheets =
[
new WorksheetDfn
{
Name = "MyFirstSheet",
TableName = "NamesAndRates",
ColumnHeadings = new[]
{
ColumnHeadings =
[
new CellDfn { Value = "Name", Bold = true },
new CellDfn
{
Expand All @@ -37,13 +32,13 @@ public void Sample1()
Bold = true,
HorizontalCellAlignment = HorizontalCellAlignment.Left,
},
},
Rows = new[]
{
],
Rows =
[
new RowDfn
{
Cells = new[]
{
Cells =
[
new CellDfn { CellDataType = CellDataType.String, Value = "Eric" },
new CellDfn { CellDataType = CellDataType.Number, Value = 50 },
new CellDfn
Expand All @@ -52,12 +47,12 @@ public void Sample1()
Value = (decimal)45.00,
FormatCode = "0.00",
},
},
],
},
new RowDfn
{
Cells = new[]
{
Cells =
[
new CellDfn { CellDataType = CellDataType.String, Value = "Bob" },
new CellDfn { CellDataType = CellDataType.Number, Value = 42 },
new CellDfn
Expand All @@ -66,11 +61,11 @@ public void Sample1()
Value = (decimal)78.00,
FormatCode = "0.00",
},
},
],
},
},
],
},
},
],
};

var fileName = Path.Combine(TempDir, "Sw_Example1.xlsx");
Expand All @@ -83,120 +78,120 @@ public void Sample2()
{
var wb = new WorkbookDfn
{
Worksheets = new[]
{
Worksheets =
[
new WorksheetDfn
{
Name = "MyFirstSheet",
ColumnHeadings = new[]
{
ColumnHeadings =
[
new CellDfn { Value = "DataType", Bold = true },
new CellDfn
{
Value = "Value",
Bold = true,
HorizontalCellAlignment = HorizontalCellAlignment.Right,
},
},
Rows = new[]
{
],
Rows =
[
new RowDfn
{
Cells = new[]
{
Cells =
[
new CellDfn { CellDataType = CellDataType.String, Value = "Boolean" },
new CellDfn { CellDataType = CellDataType.Boolean, Value = true },
},
],
},
new RowDfn
{
Cells = new[]
{
Cells =
[
new CellDfn { CellDataType = CellDataType.String, Value = "Boolean" },
new CellDfn { CellDataType = CellDataType.Boolean, Value = false },
},
],
},
new RowDfn
{
Cells = new[]
{
Cells =
[
new CellDfn { CellDataType = CellDataType.String, Value = "String" },
new CellDfn
{
CellDataType = CellDataType.String,
Value = "A String",
HorizontalCellAlignment = HorizontalCellAlignment.Right,
},
},
],
},
new RowDfn
{
Cells = new[]
{
Cells =
[
new CellDfn { CellDataType = CellDataType.String, Value = "int" },
new CellDfn { CellDataType = CellDataType.Number, Value = 100 },
},
],
},
new RowDfn
{
Cells = new[]
{
Cells =
[
new CellDfn { CellDataType = CellDataType.String, Value = "int?" },
new CellDfn { CellDataType = CellDataType.Number, Value = (int?)100 },
},
],
},
new RowDfn
{
Cells = new[]
{
Cells =
[
new CellDfn { CellDataType = CellDataType.String, Value = "int? (is null)" },
new CellDfn { CellDataType = CellDataType.Number, Value = null },
},
],
},
new RowDfn
{
Cells = new[]
{
Cells =
[
new CellDfn { CellDataType = CellDataType.String, Value = "uint" },
new CellDfn { CellDataType = CellDataType.Number, Value = (uint)101 },
},
],
},
new RowDfn
{
Cells = new[]
{
Cells =
[
new CellDfn { CellDataType = CellDataType.String, Value = "long" },
new CellDfn { CellDataType = CellDataType.Number, Value = long.MaxValue },
},
],
},
new RowDfn
{
Cells = new[]
{
Cells =
[
new CellDfn { CellDataType = CellDataType.String, Value = "float" },
new CellDfn { CellDataType = CellDataType.Number, Value = (float)123.45 },
},
],
},
new RowDfn
{
Cells = new[]
{
Cells =
[
new CellDfn { CellDataType = CellDataType.String, Value = "double" },
new CellDfn { CellDataType = CellDataType.Number, Value = 123.45 },
},
],
},
new RowDfn
{
Cells = new[]
{
Cells =
[
new CellDfn { CellDataType = CellDataType.String, Value = "decimal" },
new CellDfn { CellDataType = CellDataType.Number, Value = (decimal)123.45 },
},
],
},
new RowDfn
{
Cells = new[]
{
Cells =
[
new CellDfn
{
CellDataType = CellDataType.Date,
Expand All @@ -211,16 +206,49 @@ public void Sample2()
Bold = true,
HorizontalCellAlignment = HorizontalCellAlignment.Center,
},
},
],
},
},
],
},
},
],
};

var fileName = Path.Combine(TempDir, "Sw_Example2.xlsx");
using var stream = File.Open(fileName, FileMode.OpenOrCreate);
wb.WriteTo(stream);
}

[Fact]
public void CanEncodeInvalidXmlCharacters()
{
var wb = new WorkbookDfn
{
Worksheets =
[
new WorksheetDfn
{
Name = "MyFirstSheet",
Rows =
[
new RowDfn
{
Cells =
[
new CellDfn
{
CellDataType = CellDataType.String,
Value = "Invalid character: \uFFFF",
},
],
},
],
},
],
};

var fileName = Path.Combine(TempDir, $"{nameof(CanEncodeInvalidXmlCharacters)}.xlsx");
using var stream = File.Open(fileName, FileMode.OpenOrCreate);
wb.WriteTo(stream);
}
}
}
45 changes: 37 additions & 8 deletions Clippit/Excel/SpreadsheetWriter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,8 @@

#undef DisplayWorkingSet

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Globalization;
using System.Text;
using System.Text.RegularExpressions;
using System.Xml;
using System.Xml.Linq;
Expand All @@ -23,7 +21,7 @@ namespace Clippit.Excel

public class WorkbookDfn
{
public IEnumerable<WorksheetDfn> Worksheets { get; set; }
public IEnumerable<WorksheetDfn> Worksheets { get; set; } = [];
}

public class WorksheetDfn
Expand Down Expand Up @@ -458,14 +456,21 @@ out int numColumns
switch (cell.Value)
{
case DateTime dt:
xw.WriteValue(dt.ToString("yyyy'-'MM'-'dd'T'HH':'mm':'ss'.'fff"));
xw.WriteValue(
dt.ToString("yyyy'-'MM'-'dd'T'HH':'mm':'ss'.'fff", CultureInfo.InvariantCulture)
);
break;
case DateTimeOffset dts:
xw.WriteValue(dts.ToString("yyyy'-'MM'-'dd'T'HH':'mm':'ss'.'fffzzz"));
xw.WriteValue(
dts.ToString("yyyy'-'MM'-'dd'T'HH':'mm':'ss'.'fffzzz", CultureInfo.InvariantCulture)
);
break;
default:
case bool b:
xw.WriteValue(cell.Value);
break;
default:
xw.WriteValue(SanitizeXmlString(cell.Value.ToString()));
break;
}
xw.WriteEndElement();
}
Expand Down Expand Up @@ -683,6 +688,30 @@ private static bool MatchFormat(XDocument sXDoc, XElement xf, CellDfn cell)
return match;
}

private static string SanitizeXmlString(string? xml)
{
if (string.IsNullOrEmpty(xml))
{
return string.Empty;
}

var buffer = new StringBuilder(xml.Length);

foreach (var c in xml)
{
if (XmlConvert.IsXmlChar(c))
{
buffer.Append(c);
}
else
{
buffer.AppendFormat(CultureInfo.InvariantCulture, "&#x{0:X};", (int)c);
}
}

return buffer.ToString();
}

private static readonly string _EmptyXlsx =
@"UEsDBBQABgAIAAAAIQBi7p1oYQEAAJAEAAATAAgCW0NvbnRlbnRfVHlwZXNdLnhtbCCiBAIooAAC
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
Expand Down
Loading