CSV Parser
/// <summary>
/// Simon's stock CSV parser class
/// </summary>
/// <see cref="http://www.nullify.net/Article/331.aspx"/>
public class CSVParser
{
//these are constants purely because of their repetition, which makes them harder to read than the actual text versions
public const string Quote = "\"";
public const string CommaQuote = ",\"";
public const string EscapedQuote = "\"\"";
/// <summary>
/// Takes the CSV files contents and creates a data table from it
/// </summary>
/// <param name="csvFileContents">The entire contents of a CSV file split by line (rather than the filename)</param>
/// <param name="validateLayout">Validate the file to check conformance of the layout with expected standards</param>
/// <param name="topRowIsHeader">The top row is the header row, take it as the column names rather than data</param>
/// <returns>A datatable</returns>
public static DataTable DataTableFromCSV(string[] csvFileContents, bool validateLayout, bool topRowIsHeader)
{
DataTable outputDataTable = new DataTable();
List<string[]> csvFileRows = new List<string[]>();
List<string> columns = new List<string>();
#region Pre-parse the file
int columnCount = 0;
bool gotHeaders = false;
int rowNumber = 0;
foreach (string line in csvFileContents)
{
string[] parts = ExtractCSVElements(line);
//initial set of header names but only if the top row is header option is set
if (!gotHeaders && topRowIsHeader)
{
columns.AddRange(parts);
columnCount = parts.Length;
gotHeaders = true;
}
else
{
if (parts.Length > 0)
{
csvFileRows.Add(parts);
}
}
if (parts.Length > columnCount)
{
//if set to validate the layout and that the first row contains the headers then we know any extra columns are wrong
if (validateLayout && gotHeaders)
{
throw new Exception("Row has extra data columns: " + rowNumber.ToString());
}
//new column detected mid-data-set!
for (int i = columnCount; i < parts.Length; i++)
{
columns.Add("Column " + i.ToString());
}
columnCount = parts.Length;
}
//we always ignore zero length rows as the last line can be empty
if (parts.Length < columnCount && parts.Length != 0)
{
if (validateLayout)
{
throw new Exception("Row has missing data columns: " + rowNumber.ToString());
}
}
rowNumber++;
}
#endregion
#region Build the data tables layout and data
//columns
foreach (string column in columns)
{
outputDataTable.Columns.Add(column);
}
//rows
foreach (string[] row in csvFileRows)
{
outputDataTable.Rows.Add(row);
}
#endregion
return outputDataTable;
}
/// <summary>
/// Create objects from a data table
/// </summary>
/// <typeparam name="T"></typeparam>
/// <param name="dataTable"></param>
/// <returns></returns>
public static T[] ObjectsFromDataTable<T>(DataTable dataTable)
where T : new()
{
List<T> items = new List<T>();
var props = typeof(T).GetProperties();
foreach (DataRow row in dataTable.Rows)
{
T p = new T();
foreach (var prop in props)
{
var attributes = prop.GetCustomAttributes(typeof(CSVAliasAttribute), true);
foreach (CSVAliasAttribute attrib in attributes)
{
attrib.StoreValueIntoObject(p, dataTable, row, prop);
}
}
items.Add(p);
}
return items.ToArray();
}
/// <summary>
/// Create objects from a CSV file
/// </summary>
/// <typeparam name="T"></typeparam>
/// <param name="csvFileContents"></param>
/// <param name="validateLayout"></param>
/// <param name="topRowIsHeader"></param>
/// <returns></returns>
public static T[] ObjectsFromCSV<T>(string[] csvFileContents, bool validateLayout, bool topRowIsHeader)
where T : new()
{
var dt = DataTableFromCSV(csvFileContents, validateLayout, topRowIsHeader);
return ObjectsFromDataTable<T>(dt);
}
/// <summary>
/// Extract the elements of a line from a CSV file with support for quotes
/// </summary>
/// <param name="line">The data to parse</param>
private static string[] ExtractCSVElements(string line)
{
List<string> elements = new();
//do the initial split, based on commas
string[] firstParts = line.Split(',');
//reparse it
StringBuilder temporaryPart = new StringBuilder("");
bool inside = false;
foreach (string part in firstParts)
{
if (part.StartsWith(Quote) && part.EndsWith(Quote))
{
//entire part is one quoted object
elements.Add(part.TrimStart('"').TrimEnd('"'));
}
else
{
//are we inside a quoted part, or did we just find a quote?
if (!inside && !part.Contains(Quote))
{
//just a normal unquoted CSV part
elements.Add(part);
}
else
{
if (inside)
{
//we are still inside a quote...
temporaryPart.Append(',').Append(part);
if (part.Contains(Quote))
{
//then we are also at the end!
elements.Add(temporaryPart.Replace(Quote, "").ToString()); //add the part minus its quotes to the array
//all done!
inside = false;
}
}
else
{
//else we just found a quote!
inside = true;
temporaryPart = new StringBuilder(part);
}
}
}
}
return elements.ToArray();
}
}
/// <summary>
/// An attribute to perform mapping between a CSV file and an object
/// </summary>
public class CSVAliasAttribute : Attribute
{
/// <summary>
/// Create a new instance of the CSV mapping alias
/// </summary>
/// <param name="alias">The column name in the CSV file</param>
public CSVAliasAttribute(string alias)
{
this.Aliases = new string[] { alias };
}
public CSVAliasAttribute(params string[] aliases)
{
this.Aliases = aliases;
}
/// <summary>
/// The column name in the CSV file
/// </summary>
public string[] Aliases { get; set; }
/// <summary>
/// The default value in the instance that the column does not exist or is empty
/// </summary>
public string DefaultValue { get; set; }
public virtual void StoreValueIntoObject<T>(T item, DataTable dt, DataRow dr, PropertyInfo destination)
{
var value = GetValueFromDataRow(dt, dr);
if (value == DBNull.Value
|| (value is string && ((string)value) == ""))
return;
switch (destination.PropertyType.FullName)
{
case "System.DateTime":
destination.SetValue(item, DateTime.Parse(value.ToString()));
break;
case "System.Boolean":
destination.SetValue(item, Convert.ToBoolean(value), null);
break;
case "System.Int16":
destination.SetValue(item, Convert.ToInt16(value), null);
break;
case "System.Decimal":
destination.SetValue(item, Convert.ToDecimal(value), null);
break;
case "System.Int32":
destination.SetValue(item, Convert.ToInt32(value), null);
break;
case "System.Guid":
destination.SetValue(item, new Guid(value.ToString()), null);
break;
case "System.String":
if (value == DBNull.Value)
{
destination.SetValue(item, "", null);
}
else
{
if (value is Int32)
{
destination.SetValue(item, ((Int32)value).ToString(), null);
return;
}
if (value is Decimal)
{
destination.SetValue(item, ((Decimal)value).ToString(), null);
return;
}
if (value is Double)
{
destination.SetValue(item, ((Double)value).ToString(), null);
return;
}
if (value is DateTime)
{
destination.SetValue(item, ((DateTime)value).ToString(), null);
return;
}
destination.SetValue(item, value, null);
}
break;
default:
destination.SetValue(item, value, null);
break;
}
}
/// <summary>
/// Read a value from a data row, but do not perform any action to store or process that data
/// </summary>
/// <remarks>The default will be returned if the data row is empty or does not contain the alias column</remarks>
/// <param name="dt"></param>
/// <param name="dr"></param>
/// <returns></returns>
public virtual object GetValueFromDataRow(DataTable dt, DataRow dr)
{
// case sensitive first, this is quick
foreach (var alias in Aliases)
{
if (dt.Columns.Contains(alias))
{
return dr[alias];
}
}
//case insensitive next, slower
var nameMap = dt.Columns.Cast<DataColumn>().Select(a => a.ColumnName).ToDictionary(a => a.ToLower());
foreach (var alias in Aliases)
{
if (nameMap.ContainsKey(alias.ToLower()))
{
return dr[nameMap[alias.ToLower()]];
}
}
return Defaultify(dt, dr);
}
/// <summary>
/// Return the default
/// </summary>
/// <remarks>Exists to provide scope to replace this default with somethign calculated based on a different field</remarks>
/// <param name="dt"></param>
/// <param name="dr"></param>
/// <returns></returns>
public virtual object Defaultify(DataTable dt, DataRow dr)
{
return DefaultValue;
}
}
No Comments