Berlin, 05.09.2010
Zoom : Impressum

Kulisch Software-Entwicklung

Kurz und knapp : Leistungen : Projekte : Kunden : Kontakt : Grüße

C#-Programmierung CSV Parser

Einlesen von MS-Excel Dateien im CSV-Format.

CSVParser.cs
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.IO;

namespace FOO
{
    public class CSVParser
    {
        private string _text;
        private char _separator;
        private int _index;

        public CSVParser(string input, char separator)
        {
            _index = 0;
            _text = NewLine(input);
            _separator = separator;
        }

        public string[] ReadLine()
        {
            Regex QUOTED = new Regex(string.Concat("\"{1,1}(([^\"]|\"\")*?)\"{1,1}(",_separator,"|\r?\n)"), RegexOptions.Multiline);
            Regex UNQUOTED = new Regex(string.Concat("(.*?)(",_separator,"|\r?\n)"), RegexOptions.Multiline);
            Regex ENDLINE = new Regex("\r?\n");
            Regex pattern = null;

            List token = new List();
            bool dequote = false;

            while (_text.Length > _index)
            {
                if (_text[_index] == _separator)
                {
                    token.Add(string.Empty);
                    _index++;
                    continue;
                }
                else if (ENDLINE.IsMatch(_text[_index].ToString()))
                {
                    token.Add(string.Empty);
                    _index++;
                    break;
                }
                else if (_text[_index] == '"')
                {
                    pattern = QUOTED;
                    dequote = true;
                }
                else
                {
                    pattern = UNQUOTED;
                    dequote = false;
                }
                Match match = pattern.Match(_text.ToString(), _index);

                string item = match.Groups[1].Value;
                if (dequote)
                {
                    item = Regex.Replace(item, "\"+", "\"");
                }

                token.Add(item);
                _index += match.Value.Length;

                if (match.Value.EndsWith("\n"))
                {
                    break;
                }
            }                

            return (token.Count > 0) ? token.ToArray() : null;
        }

        private string NewLine(string value)
        {
            if (!value.EndsWith("\n"))
            {
                return new StringBuilder(value).Append("\n").ToString();
            }
            return value;
        }
    }
}

C# für Einsteiger

C# für Fortgeschrittene