package timeflow.format.file;
|
|
import java.util.*;
|
|
import timeflow.util.*;
|
|
import timeflow.model.Display;
|
|
public class DelimitedText {
|
private char delimiter;
|
|
public DelimitedText(char delimiter)
|
{
|
if (delimiter=='"')
|
throw new IllegalArgumentException("Can't use quote as delimiter.");
|
this.delimiter=delimiter;
|
}
|
|
private static boolean isBreak(char c)
|
{
|
return c=='\n' || c=='\r';
|
}
|
|
public List<String[]> read(String text)
|
{
|
ArrayList<String[]> results=new ArrayList<String[]>();
|
int n=text.length();
|
StringBuffer currentToken=new StringBuffer();
|
ArrayList<String> currentList=new ArrayList<String>();
|
|
boolean quoted=false;
|
for (int i=0; i<n; i++)
|
{
|
char c=text.charAt(i);
|
if (quoted)
|
{
|
if (c=='"')
|
{
|
if (i==n-1) // end of file, ignore quote.
|
{
|
quoted=false;
|
continue;
|
}
|
char next=text.charAt(i+1);
|
if (next=='"') // a quoted quote.
|
{
|
currentToken.append('"');
|
i++;
|
|
// Alas, there is a weird special case here
|
// if the user has pasted from Excel.
|
// If a field starts with a quote, and ends with two quotes,
|
// it turns out to be ambiguous!
|
// Excel doesn't do any escaping on: "blah blah""
|
// But, it does escape: blah "\n
|
// turning it into: "blah blah""\n
|
// So if "blah blah"" occurs at the end of the line,
|
// you actually do not know which it is!
|
// In practice, our first bug report was for a literal of "blah blah""
|
// so that is what we will choose.
|
|
//System.out.println("next++: '"+text.charAt(i+1)+"'="+(int)text.charAt(i+1));
|
if (i<n-1 && isBreak(text.charAt(i+1)))
|
{
|
quoted=false;
|
}
|
|
continue;
|
}
|
if (isBreak(next)) // end of line
|
{
|
quoted=false;
|
currentList.add(currentToken.toString());
|
currentToken.setLength(0);
|
results.add((String[])currentList.toArray(new String[0]));
|
currentList=new ArrayList<String>();
|
i++;
|
if (i<n-1 && isBreak(text.charAt(i+1)))
|
i++;
|
continue;
|
}
|
if (next==delimiter)
|
{
|
quoted=false;
|
continue;
|
}
|
System.out.println("a bad quote from excel: next char="+(int)next);
|
quoted=false;
|
}
|
currentToken.append(c);
|
continue;
|
}
|
|
// ok, not quoted.
|
if (c==delimiter)
|
{
|
currentList.add(currentToken.toString());
|
currentToken.setLength(0);
|
quoted=false;
|
continue;
|
}
|
|
// not delimiter, not in the middle of a quote.
|
if (c=='"')
|
{
|
if (currentToken.length()==0) // we are at beginning of a token, so this is a quote.
|
{
|
quoted=true;
|
continue;
|
}
|
}
|
|
// is it a line feed? we're not in the middle of a quote, so this means a new line.
|
if (c=='\n' || c=='\r' || c=='\f')
|
{
|
currentList.add(currentToken.toString());
|
currentToken.setLength(0);
|
results.add((String[])currentList.toArray(new String[0]));
|
currentList=new ArrayList<String>();
|
if (i<n-1 && (text.charAt(i+1)=='\n' || text.charAt(i+1)=='\r'))
|
i++;
|
continue;
|
}
|
|
// by golly, just a normal character!
|
currentToken.append(c);
|
}
|
|
// did it just end in a blank line?
|
|
if (currentList.size()>0 || currentToken.toString().trim().length()>0)
|
{
|
currentList.add(currentToken.toString());
|
results.add((String[])currentList.toArray(new String[0]));
|
}
|
return results;
|
}
|
|
public String write(String s)
|
{
|
return write(new String[] {s});
|
}
|
|
public String write(String[] data)
|
{
|
StringBuffer b=new StringBuffer();
|
for (int i=0; i<data.length; i++)
|
{
|
// add a delimiter if necessary.
|
if (i>0)
|
b.append(delimiter);
|
|
// if null, just don't write anything.
|
if (data[i]==null)
|
continue;
|
|
// does it have weird characters in it?
|
boolean weird=false;
|
int n=data[i].length();
|
for (int j=0; j<n; j++)
|
{
|
char c=data[i].charAt(j);
|
if (c==delimiter || isBreak(c))
|
{
|
weird=true;
|
break;
|
}
|
}
|
|
if (weird)
|
{
|
b.append('"');
|
for (int j=0; j<n; j++)
|
{
|
char c=data[i].charAt(j);
|
if (c=='"')
|
b.append('"');
|
b.append(c);
|
}
|
b.append('"');
|
}
|
else
|
b.append(data[i]);
|
}
|
return b.toString();
|
}
|
|
public static String[] split(String s, char delimiter)
|
{
|
DelimitedText t= new DelimitedText(delimiter);
|
List<String[]> lines=t.read(s);
|
return lines.get(0);
|
}
|
|
public static void main(String[] args) throws Exception
|
{
|
String bad=IO.read("test/bad-all.txt");
|
String[][] s=DelimitedFormat.readArrayFromString(bad, System.out);
|
System.out.println("len="+s.length);
|
|
/*
|
//DelimitedText c=new DelimitedText(';');
|
//List<String[]> arrays=c.read(IO.read("test/bad.txt"));
|
//List<String[]> arrays=c.read("a;b;\"x;y\";c");
|
//List<String[]> arrays=c.read("a;\"a\n\rq\";b;\"x;y\";c");
|
//List<String[]> arrays=c.read("a;b;\"with a \"\"blah\";c\nd;e;f\ng;h;i");
|
//List<String[]> arrays=c.read("a,\"b\",\"c\r\nd\"\r\ne,f,g\nh,i,j");
|
for (String[] s:arrays)
|
{
|
System.out.println("["+Display.arrayToString(s)+"]");
|
}
|
*/
|
}
|
}
|