001/**
002 * TabDataReader -- Translates data sets containing ArrayParam objects to and from tab
003 * delimited arrays.
004 *
005 * Copyright (C) 2003-2017, by Joseph A. Huwaldt. All rights reserved.
006 *
007 * This library is free software; you can redistribute it and/or modify it under the terms
008 * of the GNU Lesser General Public License as published by the Free Software Foundation;
009 * either version 2 of the License, or (at your option) any later version.
010 *
011 * This library is distributed in the hope that it will be useful, but WITHOUT ANY
012 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
013 * PARTICULAR PURPOSE. See the GNU Library General Public License for more details.
014 *
015 * You should have received a copy of the GNU Lesser General Public License along with
016 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place -
017 * Suite 330, Boston, MA 02111-1307, USA. Or visit: http://www.gnu.org/licenses/lgpl.html
018 */
019package jahuwaldt.js.datareader;
020
021import jahuwaldt.io.FileUtils;
022import jahuwaldt.js.util.TextTokenizer;
023import jahuwaldt.swing.AppUtilities;
024import java.awt.Frame;
025import java.io.*;
026import java.text.MessageFormat;
027import java.util.List;
028import java.util.NoSuchElementException;
029import javax.measure.quantity.Dimensionless;
030import javax.measure.unit.Unit;
031import javax.measure.unit.UnitFormat;
032import javolution.text.Text;
033import javolution.text.TypeFormat;
034import javolution.util.FastTable;
035
036/**
037 * This class translates data between a list of DataSet data structures and a stream
038 * containing tab, space, or comma delimited columns of array data with a single line
039 * header indicating the names of the parameters and an optional second line indicating
040 * the units of each parameter. Only arrays can be read or written to this format. They
041 * all must be the same length (in the same case).
042 *
043 * <p> Modified by: Joseph A. Huwaldt </p>
044 *
045 * @author Joseph A. Huwaldt, Date: May 19, 2003
046 * @version March 21, 2017
047 */
048public class TabDataReader implements DataReader {
049
050    //  The default name given to DAT data sets.
051    private CharSequence _defaultSetName = RESOURCES.getString("defSetName");
052
053    //  The delimiters to use to separate columns in the file.
054    //  Use space, tab, and comma as delimiters.
055    private String DELIMITERS = " \t,";
056
057    //  The delimiter to use when writing out files.
058    private static final String DELIM = "\t";
059
060    //  Text displayed when there are no units defined for an item.
061    public static final String NO_UNITS = "nd";
062
063    //  A brief description of the data read by this reader.
064    private static final String DESCRIPTION = RESOURCES.getString("tabDesc");
065
066    //  The preferred file extension for files of this reader's type.
067    public static final String EXTENSION = RESOURCES.getString("tabDefExt");
068
069    /**
070     * Returns a string representation of the object. This will return a brief description
071     * of the format read by this reader.
072     */
073    @Override
074    public String toString() {
075        return DESCRIPTION;
076    }
077
078    /**
079     * Returns the preferred file extension (not including the ".") for files of this
080     * DataReader's type.
081     *
082     * @return The preferred file extension for this file's type.
083     */
084    @Override
085    public String getExtension() {
086        return EXTENSION;
087    }
088
089    /**
090     * Compares this object with the specified object for order based on the
091     * <code>toString().compareTo(o.toString())</code> method. Returns a negative integer,
092     * zero, or a positive integer as this object is less than, equal to, or greater than
093     * the specified object.
094     */
095    @Override
096    public int compareTo(DataReader o) {
097        return this.toString().compareTo(o.toString());
098    }
099
100    /**
101     * Method that determines if this reader can read data from the specified input
102     * stream.
103     *
104     * @param pathName The path to the file to be read.
105     * @param input    An input stream containing the data to be read. Any methods that
106     *                 read from this stream must first set a mark and then reset back to
107     *                 that mark before the method returns (even if it returns with an
108     *                 exception).
109     * @return DataReader.NO if the file is not recognized at all or DataReader.MAYBE if
110     * the file has an appropriate extension.
111     * @throws java.io.IOException If the input stream could not be read.
112     */
113    @Override
114    public int canReadData(String pathName, BufferedInputStream input) throws IOException {
115
116        //  Get the file name extension.
117        String extension = FileUtils.getExtension(pathName);
118
119        // Get the list of extensions that we MIGHT be able to read.
120        String[] extensions = RESOURCES.getString("tabExtensions").split(",");
121        for (String ext : extensions) {
122            if (extension.equalsIgnoreCase(ext.trim()))
123                return MAYBE;
124        }
125
126        return NO;
127    }
128
129    /**
130     * Returns true. This class can write data to a tabbed arrays formatted file.
131     * 
132     * @return Always returns true.
133     */
134    @Override
135    public boolean canWriteData() {
136        return true;
137    }
138
139    /**
140     * The Tabbed Array format can save only a single case of arrays.
141     *
142     * @param parent Determines the Frame in which the dialog is displayed; if null, or if
143     *               the parentComponent has no Frame, a default Frame is used.
144     * @param data   The input data set that is to be selected for saving.
145     * @return A list of DataSet objects containing the selected data to be saved. Could
146     *         return null if the user selects nothing.
147     */
148    @Override
149    public List<DataSet> selectDataForSaving(Frame parent, List<DataSet> data) {
150
151        if (data == null || data.size() < 1)
152            return null;
153
154        DataSet selected;
155
156        if (data.size() == 1 && data.get(0).size() == 1)
157            //  If there is just one case, save that.
158            selected = data.get(0);
159
160        else {
161            //  If there is more than one case, 
162            //  ask the user to select cases to export.
163            SelectCasesDialog dialog = new SelectCasesDialog(parent, RESOURCES.getString("selectCaseTitle"),
164                    RESOURCES.getString("selectCaseMsg"), data, true, true);
165            dialog.setLocation(AppUtilities.dialogPosition(dialog));
166            dialog.setVisible(true);
167
168            //  Retrieve the selected case.
169            selected = dialog.getSelected();
170            dialog.dispose();
171        }
172        if (selected == null || selected.size() < 1)
173            return null;
174
175        FastTable<DataSet> output = FastTable.newInstance();
176        output.add(selected);
177
178        return output;
179    }
180
181    /**
182     * Method that reads in tab, space or comma delimited array data from the specified
183     * input stream and returns that data as a list of {@link DataSet} objects.
184     *
185     * @param pathName The path to the file to be read.
186     * @param input    An input stream containing the tab, space or comma delimited array
187     *                 data.
188     * @return A list of DataSet objects that contains the data read in from the
189     * specified stream (will contain a single data set which will contain a single case
190     * with ArrayParam objects for each column).
191     * @throws IOException If there is a problem reading the specified stream.
192     */
193    @Override
194    public List<DataSet> read(String pathName, InputStream input) throws IOException {
195
196        //  Wrap the input stream in a line number reader.
197        LineNumberReader reader = new LineNumberReader(new InputStreamReader(input));
198
199        //  Create an empty list to store DataSets into.
200        FastTable<DataSet> dataSets = FastTable.newInstance();
201
202        //  Tab files contain only a single data set, so create that.
203        DataSet set = DataSet.newInstance(_defaultSetName);
204
205        //  Tab files contain only a single case, so create that.
206        DataCase aCase = DataCase.newInstance(_defaultSetName);
207
208        //  Parse out the parameter names from the 1st line.
209        FastTable<Text> names = inputParameterNames(reader);
210
211        //  Read in the optional units from the 2nd line.
212        FastTable<Unit<?>> units = inputUnits(reader, names.size());
213
214        //  Now read in the arrays of data.
215        inputDataArrays(reader, aCase, names, units);
216
217        //  Add the case to the set.
218        set.add(aCase);
219
220        //  Add the set to the list of data sets being output.
221        dataSets.add(set);
222
223        //  Clean up before leaving.
224        FastTable.recycle(names);
225        FastTable.recycle(units);
226
227        return dataSets;
228    }
229
230    /**
231     * Method that writes out all the data stored in the specified list of {@link DataSet}
232     * objects to the specified output stream in tabbed array format. Only the 1st case in
233     * the 1st data set in the specified list is written out since the tabbed array format
234     * does not support multiple cases. Only arrays are written out since the tabbed array
235     * format does not support scalars or text notes.
236     *
237     * @param output The output stream to which the data is to be written.
238     * @param data   A list of {@link DataSet} objects containing data to be written out.
239     *               Only the 1st case in the 1st data set in the list is written out.
240     * @throws IOException If there is a problem writing to the specified stream.
241     */
242    @Override
243    public void write(OutputStream output, List<DataSet> data) throws IOException {
244
245        if (data.size() < 1)
246            return;
247
248        //  Wrap the output stream in a writer.
249        BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(output));
250
251        // Extract the 1st data set from the list.
252        DataSet set = data.get(0);
253
254        //  Extract the 1st case from the data set.
255        DataCase aCase = set.get(0);
256
257        //  Extract all the array parameters.
258        List<ArrayParam> arrays = aCase.getAllArrays();
259        if (arrays == null || arrays.size() < 1)
260            throw new IOException(RESOURCES.getString("emptyDataSetErr"));
261
262        //  Write out the arrays.
263        writeArrays(writer, arrays);
264
265        //  Clean up before leaving.
266        FastTable.recycle((FastTable<?>)arrays);
267
268        writer.flush();
269    }
270
271    /**
272     * Sets the default set name to use.
273     * 
274     * @param name The name to use as the default set name.
275     */
276    @Override
277    public void setDefaultSetName(CharSequence name) {
278        _defaultSetName = name;
279    }
280
281    /**
282     * Method that reads in and parses out the optional parameter names from the 1st line
283     * of the file. If the file does not contain a header line, then a list of generic
284     * parameter names is returned.
285     */
286    private FastTable<Text> inputParameterNames(LineNumberReader reader) throws IOException {
287
288        //  Read in the 1st line.
289        reader.mark(65536);
290        String line = reader.readLine();
291        if (line == null)
292            throw new IOException(MessageFormat.format(
293                    RESOURCES.getString("eolErr"),reader.getLineNumber()));
294
295        //  Create a list of accumulated data from the 1st line.
296        FastTable<Text> columns = FastTable.newInstance();
297
298        //  Create a list to contain the parameter names.
299        FastTable<Text> pNames = FastTable.newInstance();
300
301        //  Determine the delimiter(s) to use.
302        if (line.contains("\t"))
303            DELIMITERS = "\t";
304        else
305            DELIMITERS = " ,\t";
306
307        //  Create a tokenizer.
308        TextTokenizer tokenizer = TextTokenizer.valueOf(line, DELIMITERS);
309        tokenizer.setHonorQuotes(true);
310        tokenizer.setQuoteChar('"');
311
312        //  Add each label found to the list of parameter names.
313        boolean foundText = false;
314        int i = 1;
315        for (Text token : tokenizer) {
316            token = token.replace("\"", " ").trim();    //  Trim off any quote characters.
317            columns.add(token);
318            try {
319                TypeFormat.parseDouble(token);
320
321                //  Conversion to double did not fail, we have a number -- use default label.
322                pNames.add(Text.valueOf(MessageFormat.format(RESOURCES.getString("defColumnLabel"),i)));
323
324            } catch (NumberFormatException e) {
325                //  Conversion to double failed, we have found text.
326                foundText = true;
327                pNames.add(token);
328            }
329            ++i;
330        }
331
332        TextTokenizer.recycle(tokenizer);
333        FastTable.recycle(columns);
334
335        int length = pNames.size();
336        if (length < 1)
337            throw new IOException(RESOURCES.getString("noParamNamesErr"));
338
339        //  If no labels were found, then reset the read buffer to read the 1st lime again.
340        if (!foundText)
341            reader.reset();
342
343        return pNames;
344    }
345
346    /**
347     * Reads in the optional units (if they are present) and returns them in a list. The
348     * specified number of parameter units are returned.
349     */
350    private FastTable<Unit<?>> inputUnits(LineNumberReader reader, int numParams) throws IOException {
351
352        //  Create a list of "no units" units.
353        FastTable<Unit<?>> units = FastTable.newInstance();
354        for (int i = 0; i < numParams; ++i)
355            units.add(Dimensionless.UNIT);
356        int i = -1;
357
358        //  Mark position and read in unit line.
359        reader.mark(65536);
360        String line = reader.readLine();
361
362        //  Create a tokenizer.
363        TextTokenizer tokenizer = TextTokenizer.valueOf(line, DELIMITERS);
364
365        Text unitName = null;
366        try {
367            //  Loop through the line.
368            for (i = 0; i < numParams; ++i) {
369                unitName = Text.valueOf(NO_UNITS);
370
371                if (tokenizer.hasMoreTokens()) {
372                    unitName = tokenizer.nextToken().trim();
373
374                    //  Is this "unit" actually a number?
375                    if (i == 0) {
376                        try {
377                            TypeFormat.parseDouble(unitName);
378
379                            //  If that didn't cause an exception, stop processing.
380                            //  We have found numbers, not units.
381                            reader.reset();
382                            break;
383                        } catch (NumberFormatException e) {
384                        }
385                    }
386                }
387
388                if (!unitName.equals(Text.valueOf(NO_UNITS))) {
389                    //  Do some common unit name substitutions.
390                    unitName = unitName.replace("sec", "s");
391                    unitName = unitName.replace("-", "*");
392                    unitName = unitName.replace("deg", "\u00B0");
393                    Unit<?> unit = Unit.valueOf(unitName);
394                    units.set(i, unit);
395                }
396            }
397
398        } catch (IllegalArgumentException e) {
399            ++i;
400            throw new IOException(MessageFormat.format(
401                    RESOURCES.getString("unkwnUnitErr"),unitName,i,reader.getLineNumber()));
402        }
403
404        return units;
405    }
406
407    /**
408     * Method that reads in arrays of delimited data and creates array parameters out of
409     * them.
410     *
411     * @param reader The reader for our input stream.
412     * @param aCase  The data case that parameters should be added to.
413     * @param names  A list of parameter names.
414     * @param units  A list of parameter units.
415     * @param nf     The NumberFormat used to parse the numbers.
416     */
417    private void inputDataArrays(LineNumberReader reader, DataCase aCase,
418            List<Text> names, FastTable<Unit<?>> units) throws IOException {
419
420        int numParams = names.size();
421        int lineNum = reader.getLineNumber();
422
423        //  Read in all the lines of the file, storing them in memory, to count how
424        //  many lines (array elements) there are.
425        FastTable<Text> data = FastTable.newInstance();
426        String aLine;
427        do {
428            //  Read a line from the file.
429            aLine = reader.readLine();
430
431            //  Skip blank lines and lines starting with "--".
432            if (aLine == null)
433                break;
434            if (aLine.startsWith("--") || aLine.trim().equals(""))
435                continue;
436
437            //  Add the line to our list of lines.
438            data.add(Text.valueOf(aLine));
439
440        } while (true);
441
442        //  Create a list of properly sized arrays, one for each parameter.
443        FastTable<double[]> arrays = FastTable.newInstance();
444        int numElements = data.size();
445        for (int i = 0; i < numParams; ++i) {
446            arrays.add(new double[numElements]);
447        }
448
449        //  Now go through the list of data strings and parse out the numbers.
450        TextTokenizer tokenizer = TextTokenizer.newInstance();
451        tokenizer.setDelimiters(DELIMITERS);
452        Text token = null;
453        int column = 0;
454        try {
455            for (int row = 0; row < numElements; ++row, ++lineNum) {
456                Text text = data.get(row);
457
458                //  Create a tokenizer.
459                tokenizer.setText(text);
460
461                //  Loop through the line.
462                for (column = 0; column < numParams; ++column) {
463                    //  Parse out the value.
464                    token = tokenizer.nextToken();
465                    token = token.trim().toUpperCase();
466
467                    //  Catch any NaNs.
468                    double value = Double.NaN;
469                    if (!token.contentEquals("NAN")) {
470
471                        //  The POST2 to tabular data file converter has a bug that outputs
472                        //  some numbers without the "E" exponent. This code trys to deal 
473                        //  with that situation.
474                        int idx = token.indexOf("E");
475                        if (idx < 0) {
476                            //  Look for an "embedded" + or - sign that is NOT the 1st character in the token.
477                            idx = token.indexOf("-");
478                            if (idx < 0)
479                                idx = token.indexOf("+");
480
481                            if (idx > 0) {
482                                // Insert an "E" before the embedded + or - sign.
483                                token = token.insert(idx, Text.valueOf("E"));
484                            }
485                        }
486
487                        //  Now parse the token into a number.
488                        value = TypeFormat.parseDouble(token);
489                    }
490
491                    //  Store the value.
492                    arrays.get(column)[row] = value;
493                }
494            }   //  next row
495
496        } catch (NumberFormatException e) {
497            ++column;
498            ++lineNum;
499            String msg = "null";
500            if (token != null)
501                msg = token.toString();
502            throw new IOException(MessageFormat.format(RESOURCES.getString("numFmtErr"),
503                    msg, lineNum, column, names.get(column - 1)),e);
504
505        } catch (NoSuchElementException e) {
506            ++lineNum;
507            throw new IOException(MessageFormat.format(RESOURCES.getString("eolErr"),lineNum), e);
508        }
509
510        //  Convert from simple arrays to ArrayParam instances.
511        FastTable<ArrayParam> arrParams = FastTable.newInstance();
512        for (int i = 0; i < numParams; ++i) {
513            ArrayParam array = ArrayParam.valueOf(names.get(i), units.get(i), arrays.get(i));
514            arrParams.add(array);
515        }
516
517        //  The list of arrays should now be filled with numbers.
518        //  Put the arrays in the data case.
519        aCase.addAll(arrParams);
520
521        //  Clean up before leaving.
522        TextTokenizer.recycle(tokenizer);
523        FastTable.recycle(data);
524        FastTable.recycle(arrays);
525        FastTable.recycle(arrParams);
526    }
527
528    /**
529     * Method that writes out the specified list of array parameter objects to a tab
530     * delimited table of arrays.
531     */
532    private void writeArrays(BufferedWriter writer, List<ArrayParam> arrays) throws IOException {
533
534        //  First, write out all the parameter names.
535        for (ArrayParam array : arrays) {
536            Text name = Text.valueOf(array.getName());
537            name.print(writer);
538            writer.write(DELIM);
539        }
540        writer.newLine();
541
542        //  Now write out the units.
543        UnitFormat fmt = UnitFormat.getUCUMInstance();
544        for (ArrayParam array : arrays) {
545            Unit<?> units = array.getUnit();
546            if (units != null && !units.equals(Dimensionless.UNIT))
547                fmt.format(units, writer);
548            else
549                Text.valueOf(NO_UNITS).print(writer);
550            writer.write(DELIM);
551        }
552        writer.newLine();
553
554        //  Start writing out the arrays of numbers.
555        int numElements = arrays.get(0).size();
556        for (int i = 0; i < numElements; ++i) {
557            for (ArrayParam array : arrays) {
558                TypeFormat.format(array.getValue(i), writer);
559                writer.write(DELIM);
560            }
561            writer.newLine();
562        }
563
564    }
565
566}