Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update your benchmarks #93

Open
teoadal opened this issue Dec 26, 2024 · 1 comment
Open

Update your benchmarks #93

teoadal opened this issue Dec 26, 2024 · 1 comment

Comments

@teoadal
Copy link

teoadal commented Dec 26, 2024

Hi! Please, update your benchmarks with other popular libraries

    <ItemGroup>
        <PackageReference Include="MiniExcel" Version="1.36.0" />
        <PackageReference Include="NPOI" Version="2.7.2" />
        <PackageReference Include="SpreadCheetah" Version="1.19.0" />
        <PackageReference Include="ClosedXML" Version="0.104.2" />
        <PackageReference Include="DocumentFormat.OpenXml" Version="3.2.0" />
        <PackageReference Include="EPPlusFree" Version="4.5.3.8" />
    </ItemGroup>

BenchmarkDotNet v0.14.0, Windows 11 (10.0.22621.4037/22H2/2022Update/SunValley2)
Intel Xeon CPU E5-2697 v3 2.60GHz, 2 CPU, 56 logical and 28 physical cores
.NET SDK 9.0.100
  [Host]   : .NET 8.0.10 (8.0.1024.46610), X64 RyuJIT AVX2
  .NET 9.0 : .NET 9.0.0 (9.0.24.52809), X64 RyuJIT AVX2

Job=.NET 9.0  Runtime=.NET 9.0  

Method NumberOfRows Mean Error StdDev Median Ratio RatioSD Gen0 Gen1 Gen2 Allocated Alloc Ratio
SpreadCheetah_DataCell 20000 20.82 ms 0.233 ms 0.218 ms 20.84 ms 0.92 0.01 281.2500 - - 5.05 MB 0.49
SpreadCheetah 20000 22.60 ms 0.111 ms 0.087 ms 22.62 ms 1.00 0.01 562.5000 - - 10.23 MB 1.00
MinExcel_Cached 20000 134.75 ms 1.592 ms 1.489 ms 134.89 ms 5.96 0.07 4500.0000 750.0000 750.0000 87.21 MB 8.52
MinExcel 20000 157.91 ms 2.271 ms 2.124 ms 158.53 ms 6.99 0.09 5000.0000 1750.0000 750.0000 95.76 MB 9.36
OpenXml_Sax 20000 292.50 ms 5.669 ms 6.301 ms 293.29 ms 12.94 0.28 3000.0000 1000.0000 1000.0000 64.5 MB 6.30
Npoi 20000 493.00 ms 9.437 ms 8.366 ms 492.59 ms 21.82 0.37 5000.0000 4000.0000 1000.0000 140.74 MB 13.75
OpenXml_Dom 20000 634.55 ms 7.131 ms 6.322 ms 636.66 ms 28.08 0.29 7000.0000 6000.0000 1000.0000 132.41 MB 12.94
ClosedXml_Batch 20000 687.19 ms 6.663 ms 5.906 ms 688.57 ms 30.41 0.28 7000.0000 3000.0000 2000.0000 162.47 MB 15.88
EpPlus4 20000 924.52 ms 25.208 ms 69.431 ms 933.42 ms 40.92 3.06 9000.0000 5000.0000 2000.0000 193.29 MB 18.89
ClosedXml 20000 926.01 ms 18.255 ms 44.779 ms 945.93 ms 40.98 1.97 11000.0000 2000.0000 1000.0000 259.51 MB 25.36
using BenchmarkDotNet.Order;
using ClosedXML.Excel;
using DocumentFormat.OpenXml;
using DocumentFormat.OpenXml.Packaging;
using DocumentFormat.OpenXml.Spreadsheet;
using MiniExcelLibs;
using NPOI.HSSF.UserModel;
using OfficeOpenXml;
using SpreadCheetah;
using Cell = SpreadCheetah.Cell;
using OpenXmlCell = DocumentFormat.OpenXml.Spreadsheet.Cell;
using OpenXmlCellValue = DocumentFormat.OpenXml.Spreadsheet.CellValues;

namespace ConsoleApp1;

[SimpleJob(RuntimeMoniker.Net90)]
[Orderer(SummaryOrderPolicy.FastestToSlowest)]
[MeanColumn, MemoryDiagnoser]
public class XlsxBench
{
    private const string SheetName = "Test_Book";
    private const int NumberOfColumns = 10;

    private static readonly Stream OutputStream = new MemoryStream(6_000_000);

    [Params(20_000)] public int NumberOfRows { get; set; }

    private List<string> _columnNames = null!;
    private List<Dictionary<string, object>> _miniExcelSheet = null!;
    private List<List<string>> _values = null!;

    [Benchmark]
    public long ClosedXml()
    {
        OutputStream.Seek(0, SeekOrigin.Begin);

        using var workbook = new XLWorkbook();
        var worksheet = workbook.Worksheets.Add(SheetName);

        for (var row = 0; row < _values.Count; row++)
        {
            var rowValues = _values[row];
            for (var col = 0; col < rowValues.Count; col++)
            {
                worksheet
                    .Cell(row + 1, col + 1)
                    .SetValue(rowValues[col]);
            }
        }

        workbook.SaveAs(OutputStream);

        return OutputStream.Position;
    }

    [Benchmark]
    public long ClosedXml_Batch()
    {
        OutputStream.Seek(0, SeekOrigin.Begin);

        using var workbook = new XLWorkbook();

        var worksheet = workbook.Worksheets.Add(SheetName);

        worksheet
            .Cell(1, 1)
            .InsertData(_values);

        workbook.SaveAs(OutputStream);

        return OutputStream.Position;
    }

    [Benchmark]
    public long EpPlus4()
    {
        OutputStream.Seek(0, SeekOrigin.Begin);

        using var package = new ExcelPackage();

        var worksheet = package.Workbook.Worksheets.Add(SheetName);

        for (var row = 0; row < _values.Count; row++)
        {
            var columns = _values[row];
            for (var col = 0; col < columns.Count; ++col)
            {
                worksheet.Cells[row + 1, col + 1].Value = columns[col];
            }
        }

        package.SaveAs(OutputStream);
        return OutputStream.Position;
    }

    [Benchmark]
    public long MinExcel()
    {
        OutputStream.Seek(0, SeekOrigin.Begin);

        var values = new List<Dictionary<string, object>>(NumberOfRows);

        foreach (var rowValues in _values)
        {
            var row = new Dictionary<string, object>(NumberOfColumns);
            for (var i = 0; i < rowValues.Count; i++)
            {
                var columnName = _columnNames[i];
                var rowValue = rowValues[i];
                row.Add(columnName, rowValue);
            }

            values.Add(row);
        }

        OutputStream.SaveAs(values);
        return OutputStream.Position;
    }

    [Benchmark]
    public long MinExcel_Cached()
    {
        OutputStream.Seek(0, SeekOrigin.Begin);

        for (var rowNumber = 0; rowNumber < _values.Count; rowNumber++)
        {
            var rowValues = _values[rowNumber];
            var rowData = _miniExcelSheet[rowNumber];
            for (var i = 0; i < rowValues.Count; i++)
            {
                var columnName = _columnNames[i];
                var rowValue = rowValues[i];
                rowData.Add(columnName, rowValue);
            }
        }

        OutputStream.SaveAs(_miniExcelSheet);

        foreach (var row in _miniExcelSheet)
        {
            row.Clear();
        }

        return OutputStream.Position;
    }

    [Benchmark]
    public long Npoi()
    {
        OutputStream.Seek(0, SeekOrigin.Begin);
        
        var workbook = new HSSFWorkbook();
        var sheet = workbook.CreateSheet(SheetName);

        // This Where the Data row starts from
        var rowIndex = 0;

        //Iteration through some collection
        foreach (var rowValues in _values)
        {
            var currentRow = sheet.CreateRow(rowIndex);
            for (var colIndex = 0; colIndex < rowValues.Count; colIndex++)
            {
                var cell = currentRow.CreateCell(colIndex);
                cell.SetCellValue(rowValues[colIndex]);
            }

            rowIndex++;
        }

        workbook.Write(OutputStream);
        return OutputStream.Position;
    }

    [Benchmark]
    public long OpenXml_Sax()
    {
        OutputStream.Seek(0, SeekOrigin.Begin);

        // Is a stream-based processor.
        // You only have a tiny part in memory at any time and you "sniff" the XML stream by implementing callback code for events like tagStarted() etc.
        // It uses almost no memory, but you can't do "DOM" stuff, like use xpath or traverse trees.
        // https://en.wikipedia.org/wiki/Simple_API_for_XML

        using var xl = SpreadsheetDocument.Create(OutputStream, SpreadsheetDocumentType.Workbook);
        var workbookPart = xl.AddWorkbookPart();
        var wsp = workbookPart.AddNewPart<WorksheetPart>();

        var oxw = OpenXmlWriter.Create(wsp);
        oxw.WriteStartElement(new Worksheet());
        oxw.WriteStartElement(new SheetData());

        var rowObject = new Row();
        var cellAttributes = new[] { new OpenXmlAttribute("t", "", "inlineStr") };
        var cell = new OpenXmlCell();
        var inlineString = new InlineString();

        for (var row = 0; row < NumberOfRows; row++)
        {
            var rowAttributes = new[] { new OpenXmlAttribute("r", "", (row + 1).ToString()) };
            oxw.WriteStartElement(rowObject, rowAttributes);
            var columns = _values[row];

            foreach (var col in columns)
            {
                oxw.WriteStartElement(cell, cellAttributes);
                oxw.WriteStartElement(inlineString);
                oxw.WriteElement(new Text(col));
                oxw.WriteEndElement();
                oxw.WriteEndElement();
            }

            oxw.WriteEndElement();
        }

        oxw.WriteEndElement();
        oxw.WriteEndElement();
        oxw.Close();

        oxw = OpenXmlWriter.Create(workbookPart);
        oxw.WriteStartElement(new Workbook());
        oxw.WriteStartElement(new Sheets());

        oxw.WriteElement(new Sheet
        {
            Name = "Sheet1",
            SheetId = 1,
            Id = workbookPart.GetIdOfPart(wsp)
        });

        oxw.WriteEndElement();
        oxw.WriteEndElement();
        oxw.Close();

        return OutputStream.Position;
    }

    [Benchmark]
    public long OpenXml_Dom()
    {
        OutputStream.Seek(0, SeekOrigin.Begin);

        // You load the whole thing into memory - it's a massive memory hog.
        // You can blow memory with even medium sized documents.
        // But you can use xpath and traverse the tree etc.
        // https://en.wikipedia.org/wiki/Document_Object_Model

        using var xl = SpreadsheetDocument.Create(OutputStream, SpreadsheetDocumentType.Workbook);
        var workbookpart = xl.AddWorkbookPart();
        workbookpart.Workbook = new Workbook();
        var worksheetPart = workbookpart.AddNewPart<WorksheetPart>();
        var sheetData = new SheetData();
        worksheetPart.Worksheet = new Worksheet();
        worksheetPart.Worksheet.AppendChild(sheetData);

        var sheets = workbookpart.Workbook.AppendChild(new Sheets());
        var sheet = new Sheet
        {
            Id = workbookpart.GetIdOfPart(worksheetPart),
            SheetId = 1,
            Name = SheetName
        };

        var cells = new OpenXmlElement[NumberOfColumns];

        for (var row = 0; row < _values.Count; ++row)
        {
            var rowValues = _values[row];
            for (var col = 0; col < rowValues.Count; ++col)
            {
                var inlineString = new InlineString();
                inlineString.AppendChild(new Text(rowValues[col]));
                var cell = new OpenXmlCell { DataType = OpenXmlCellValue.InlineString };
                cell.AppendChild(inlineString);
                cells[col] = cell;
            }

            var rowObject = new Row(cells) { RowIndex = (uint)row + 1 };
            sheetData.AppendChild(rowObject);
        }

        sheets.AppendChild(sheet);
        workbookpart.Workbook.Save();

        return OutputStream.Position;
    }

    [Benchmark(Baseline = true)]
    public async Task SpreadCheetah()
    {
        OutputStream.Seek(0, SeekOrigin.Begin);

        await using var spreadsheet = await Spreadsheet.CreateNewAsync(OutputStream);

        await spreadsheet.StartWorksheetAsync(SheetName);

        foreach (var rowValues in _values)
        {
            var rowValuesCount = rowValues.Count;

            var rowCells = new List<Cell>(rowValuesCount);
            for (var col = 0; col < rowValuesCount; col++)
            {
                rowCells.Add(new Cell(rowValues[col]));
            }

            await spreadsheet.AddRowAsync(rowCells);
        }

        await spreadsheet.FinishAsync();
    }

    [Benchmark]
    public async Task SpreadCheetahDataCell()
    {
        OutputStream.Seek(0, SeekOrigin.Begin);

        await using var spreadsheet = await Spreadsheet.CreateNewAsync(OutputStream);

        await spreadsheet.StartWorksheetAsync(SheetName);

        foreach (var rowValues in _values)
        {
            var rowValuesCount = rowValues.Count;

            var rowCells = new DataCell[rowValuesCount];
            for (var col = 0; col < rowValuesCount; col++)
            {
                rowCells[col] = new DataCell(rowValues[col]);
            }

            await spreadsheet.AddRowAsync(rowCells);
        }

        await spreadsheet.FinishAsync();
    }

    [GlobalSetup]
    public void Setup()
    {
        _columnNames = Enumerable.Range(0, NumberOfColumns).Select(c => $"Column{c}").ToList();
        _values = new List<List<string>>(NumberOfRows);
        for (var row = 1; row <= NumberOfRows; row++)
        {
            var cells = new List<string>(NumberOfColumns);
            _values.Add(cells);
            for (var col = 1; col <= NumberOfColumns; col++)
            {
                cells.Add($"{col}-{row}");
            }
        }

        _miniExcelSheet = new List<Dictionary<string, object>>(NumberOfRows);
        foreach (var _ in _values)
        {
            _miniExcelSheet.Add(new Dictionary<string, object>(NumberOfColumns));
        }
    }
}

I wrot a post in telegram about your perferct library!

@sveinungf
Copy link
Owner

Hi! Thanks for this, and thank you for the feedback.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants