How to parse excel rows back to types using EPPlus

Inspired by the above I took it a slightly different route.

  1. I created an attribute and mapped each property to a column.
  2. I use the DTO type to define what I expect each column to be
  3. Allow columns to not be requried
  4. Use EPPlus to convert the types

By doing so it allows me to use traditional model validation, and embrace changes to column headers


Usage:

using(FileStream fileStream = new FileStream(_fileName, FileMode.Open)){
      ExcelPackage excel = new ExcelPackage(fileStream);
      var workSheet = excel.Workbook.Worksheets[RESOURCES_WORKSHEET];

      IEnumerable<ExcelResourceDto> newcollection = workSheet.ConvertSheetToObjects<ExcelResourceDto>();
      newcollection.ToList().ForEach(x => Console.WriteLine(x.Title));
 }

Dto that maps to excel

public class ExcelResourceDto
{
    [Column(1)]
    [Required]
    public string Title { get; set; }

    [Column(2)]
    [Required]
    public string SearchTags { get; set; }
}

This is the attribute definition

[AttributeUsage(AttributeTargets.All)]
public class Column : System.Attribute
{
    public int ColumnIndex { get; set; }


    public Column(int column) 
    {
        ColumnIndex = column;
    }
} 

Extension class to handle mapping rows to DTO

public static class EPPLusExtensions
{
   public static IEnumerable<T> ConvertSheetToObjects<T>(this ExcelWorksheet worksheet) where T : new()
    {

        Func<CustomAttributeData, bool> columnOnly = y => y.AttributeType == typeof(Column);

        var columns = typeof(T)
                .GetProperties()
                .Where(x => x.CustomAttributes.Any(columnOnly))
        .Select(p => new
        {
            Property = p,
            Column = p.GetCustomAttributes<Column>().First().ColumnIndex //safe because if where above
        }).ToList();


        var rows= worksheet.Cells
            .Select(cell => cell.Start.Row)
            .Distinct()
            .OrderBy(x=>x);


        //Create the collection container
        var collection = rows.Skip(1)
            .Select(row =>
            {
                var tnew = new T();
                columns.ForEach(col =>
                {
                    //This is the real wrinkle to using reflection - Excel stores all numbers as double including int
                    var val = worksheet.Cells[row, col.Column];
                    //If it is numeric it is a double since that is how excel stores all numbers
                    if (val.Value == null)
                    {
                        col.Property.SetValue(tnew, null);
                        return;
                    }
                    if (col.Property.PropertyType == typeof(Int32))
                    {
                        col.Property.SetValue(tnew, val.GetValue<int>());
                        return;
                    }
                    if (col.Property.PropertyType == typeof(double))
                    {
                        col.Property.SetValue(tnew, val.GetValue<double>());
                        return;
                    }
                    if (col.Property.PropertyType == typeof(DateTime))
                    {
                        col.Property.SetValue(tnew, val.GetValue<DateTime>());
                        return;
                    }
                    //Its a string
                    col.Property.SetValue(tnew, val.GetValue<string>());
                });

                return tnew;
            });


        //Send it back
        return collection;
    }
}

Leave a Comment