001/** 002 * TabDataReader -- Translates data sets containing ArrayParam objects to and from tab 003 * delimited arrays. 004 * 005 * Copyright (C) 2003-2017, by Joseph A. Huwaldt. All rights reserved. 006 * 007 * This library is free software; you can redistribute it and/or modify it under the terms 008 * of the GNU Lesser General Public License as published by the Free Software Foundation; 009 * either version 2 of the License, or (at your option) any later version. 010 * 011 * This library is distributed in the hope that it will be useful, but WITHOUT ANY 012 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A 013 * PARTICULAR PURPOSE. See the GNU Library General Public License for more details. 014 * 015 * You should have received a copy of the GNU Lesser General Public License along with 016 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - 017 * Suite 330, Boston, MA 02111-1307, USA. Or visit: http://www.gnu.org/licenses/lgpl.html 018 */ 019package jahuwaldt.js.datareader; 020 021import jahuwaldt.io.FileUtils; 022import jahuwaldt.js.util.TextTokenizer; 023import jahuwaldt.swing.AppUtilities; 024import java.awt.Frame; 025import java.io.*; 026import java.text.MessageFormat; 027import java.util.List; 028import java.util.NoSuchElementException; 029import javax.measure.quantity.Dimensionless; 030import javax.measure.unit.Unit; 031import javax.measure.unit.UnitFormat; 032import javolution.text.Text; 033import javolution.text.TypeFormat; 034import javolution.util.FastTable; 035 036/** 037 * This class translates data between a list of DataSet data structures and a stream 038 * containing tab, space, or comma delimited columns of array data with a single line 039 * header indicating the names of the parameters and an optional second line indicating 040 * the units of each parameter. Only arrays can be read or written to this format. They 041 * all must be the same length (in the same case). 042 * 043 * <p> Modified by: Joseph A. Huwaldt </p> 044 * 045 * @author Joseph A. Huwaldt, Date: May 19, 2003 046 * @version March 21, 2017 047 */ 048public class TabDataReader implements DataReader { 049 050 // The default name given to DAT data sets. 051 private CharSequence _defaultSetName = RESOURCES.getString("defSetName"); 052 053 // The delimiters to use to separate columns in the file. 054 // Use space, tab, and comma as delimiters. 055 private String DELIMITERS = " \t,"; 056 057 // The delimiter to use when writing out files. 058 private static final String DELIM = "\t"; 059 060 // Text displayed when there are no units defined for an item. 061 public static final String NO_UNITS = "nd"; 062 063 // A brief description of the data read by this reader. 064 private static final String DESCRIPTION = RESOURCES.getString("tabDesc"); 065 066 // The preferred file extension for files of this reader's type. 067 public static final String EXTENSION = RESOURCES.getString("tabDefExt"); 068 069 /** 070 * Returns a string representation of the object. This will return a brief description 071 * of the format read by this reader. 072 */ 073 @Override 074 public String toString() { 075 return DESCRIPTION; 076 } 077 078 /** 079 * Returns the preferred file extension (not including the ".") for files of this 080 * DataReader's type. 081 * 082 * @return The preferred file extension for this file's type. 083 */ 084 @Override 085 public String getExtension() { 086 return EXTENSION; 087 } 088 089 /** 090 * Compares this object with the specified object for order based on the 091 * <code>toString().compareTo(o.toString())</code> method. Returns a negative integer, 092 * zero, or a positive integer as this object is less than, equal to, or greater than 093 * the specified object. 094 */ 095 @Override 096 public int compareTo(DataReader o) { 097 return this.toString().compareTo(o.toString()); 098 } 099 100 /** 101 * Method that determines if this reader can read data from the specified input 102 * stream. 103 * 104 * @param pathName The path to the file to be read. 105 * @param input An input stream containing the data to be read. Any methods that 106 * read from this stream must first set a mark and then reset back to 107 * that mark before the method returns (even if it returns with an 108 * exception). 109 * @return DataReader.NO if the file is not recognized at all or DataReader.MAYBE if 110 * the file has an appropriate extension. 111 * @throws java.io.IOException If the input stream could not be read. 112 */ 113 @Override 114 public int canReadData(String pathName, BufferedInputStream input) throws IOException { 115 116 // Get the file name extension. 117 String extension = FileUtils.getExtension(pathName); 118 119 // Get the list of extensions that we MIGHT be able to read. 120 String[] extensions = RESOURCES.getString("tabExtensions").split(","); 121 for (String ext : extensions) { 122 if (extension.equalsIgnoreCase(ext.trim())) 123 return MAYBE; 124 } 125 126 return NO; 127 } 128 129 /** 130 * Returns true. This class can write data to a tabbed arrays formatted file. 131 * 132 * @return Always returns true. 133 */ 134 @Override 135 public boolean canWriteData() { 136 return true; 137 } 138 139 /** 140 * The Tabbed Array format can save only a single case of arrays. 141 * 142 * @param parent Determines the Frame in which the dialog is displayed; if null, or if 143 * the parentComponent has no Frame, a default Frame is used. 144 * @param data The input data set that is to be selected for saving. 145 * @return A list of DataSet objects containing the selected data to be saved. Could 146 * return null if the user selects nothing. 147 */ 148 @Override 149 public List<DataSet> selectDataForSaving(Frame parent, List<DataSet> data) { 150 151 if (data == null || data.size() < 1) 152 return null; 153 154 DataSet selected; 155 156 if (data.size() == 1 && data.get(0).size() == 1) 157 // If there is just one case, save that. 158 selected = data.get(0); 159 160 else { 161 // If there is more than one case, 162 // ask the user to select cases to export. 163 SelectCasesDialog dialog = new SelectCasesDialog(parent, RESOURCES.getString("selectCaseTitle"), 164 RESOURCES.getString("selectCaseMsg"), data, true, true); 165 dialog.setLocation(AppUtilities.dialogPosition(dialog)); 166 dialog.setVisible(true); 167 168 // Retrieve the selected case. 169 selected = dialog.getSelected(); 170 dialog.dispose(); 171 } 172 if (selected == null || selected.size() < 1) 173 return null; 174 175 FastTable<DataSet> output = FastTable.newInstance(); 176 output.add(selected); 177 178 return output; 179 } 180 181 /** 182 * Method that reads in tab, space or comma delimited array data from the specified 183 * input stream and returns that data as a list of {@link DataSet} objects. 184 * 185 * @param pathName The path to the file to be read. 186 * @param input An input stream containing the tab, space or comma delimited array 187 * data. 188 * @return A list of DataSet objects that contains the data read in from the 189 * specified stream (will contain a single data set which will contain a single case 190 * with ArrayParam objects for each column). 191 * @throws IOException If there is a problem reading the specified stream. 192 */ 193 @Override 194 public List<DataSet> read(String pathName, InputStream input) throws IOException { 195 196 // Wrap the input stream in a line number reader. 197 LineNumberReader reader = new LineNumberReader(new InputStreamReader(input)); 198 199 // Create an empty list to store DataSets into. 200 FastTable<DataSet> dataSets = FastTable.newInstance(); 201 202 // Tab files contain only a single data set, so create that. 203 DataSet set = DataSet.newInstance(_defaultSetName); 204 205 // Tab files contain only a single case, so create that. 206 DataCase aCase = DataCase.newInstance(_defaultSetName); 207 208 // Parse out the parameter names from the 1st line. 209 FastTable<Text> names = inputParameterNames(reader); 210 211 // Read in the optional units from the 2nd line. 212 FastTable<Unit<?>> units = inputUnits(reader, names.size()); 213 214 // Now read in the arrays of data. 215 inputDataArrays(reader, aCase, names, units); 216 217 // Add the case to the set. 218 set.add(aCase); 219 220 // Add the set to the list of data sets being output. 221 dataSets.add(set); 222 223 // Clean up before leaving. 224 FastTable.recycle(names); 225 FastTable.recycle(units); 226 227 return dataSets; 228 } 229 230 /** 231 * Method that writes out all the data stored in the specified list of {@link DataSet} 232 * objects to the specified output stream in tabbed array format. Only the 1st case in 233 * the 1st data set in the specified list is written out since the tabbed array format 234 * does not support multiple cases. Only arrays are written out since the tabbed array 235 * format does not support scalars or text notes. 236 * 237 * @param output The output stream to which the data is to be written. 238 * @param data A list of {@link DataSet} objects containing data to be written out. 239 * Only the 1st case in the 1st data set in the list is written out. 240 * @throws IOException If there is a problem writing to the specified stream. 241 */ 242 @Override 243 public void write(OutputStream output, List<DataSet> data) throws IOException { 244 245 if (data.size() < 1) 246 return; 247 248 // Wrap the output stream in a writer. 249 BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(output)); 250 251 // Extract the 1st data set from the list. 252 DataSet set = data.get(0); 253 254 // Extract the 1st case from the data set. 255 DataCase aCase = set.get(0); 256 257 // Extract all the array parameters. 258 List<ArrayParam> arrays = aCase.getAllArrays(); 259 if (arrays == null || arrays.size() < 1) 260 throw new IOException(RESOURCES.getString("emptyDataSetErr")); 261 262 // Write out the arrays. 263 writeArrays(writer, arrays); 264 265 // Clean up before leaving. 266 FastTable.recycle((FastTable<?>)arrays); 267 268 writer.flush(); 269 } 270 271 /** 272 * Sets the default set name to use. 273 * 274 * @param name The name to use as the default set name. 275 */ 276 @Override 277 public void setDefaultSetName(CharSequence name) { 278 _defaultSetName = name; 279 } 280 281 /** 282 * Method that reads in and parses out the optional parameter names from the 1st line 283 * of the file. If the file does not contain a header line, then a list of generic 284 * parameter names is returned. 285 */ 286 private FastTable<Text> inputParameterNames(LineNumberReader reader) throws IOException { 287 288 // Read in the 1st line. 289 reader.mark(65536); 290 String line = reader.readLine(); 291 if (line == null) 292 throw new IOException(MessageFormat.format( 293 RESOURCES.getString("eolErr"),reader.getLineNumber())); 294 295 // Create a list of accumulated data from the 1st line. 296 FastTable<Text> columns = FastTable.newInstance(); 297 298 // Create a list to contain the parameter names. 299 FastTable<Text> pNames = FastTable.newInstance(); 300 301 // Determine the delimiter(s) to use. 302 if (line.contains("\t")) 303 DELIMITERS = "\t"; 304 else 305 DELIMITERS = " ,\t"; 306 307 // Create a tokenizer. 308 TextTokenizer tokenizer = TextTokenizer.valueOf(line, DELIMITERS); 309 tokenizer.setHonorQuotes(true); 310 tokenizer.setQuoteChar('"'); 311 312 // Add each label found to the list of parameter names. 313 boolean foundText = false; 314 int i = 1; 315 for (Text token : tokenizer) { 316 token = token.replace("\"", " ").trim(); // Trim off any quote characters. 317 columns.add(token); 318 try { 319 TypeFormat.parseDouble(token); 320 321 // Conversion to double did not fail, we have a number -- use default label. 322 pNames.add(Text.valueOf(MessageFormat.format(RESOURCES.getString("defColumnLabel"),i))); 323 324 } catch (NumberFormatException e) { 325 // Conversion to double failed, we have found text. 326 foundText = true; 327 pNames.add(token); 328 } 329 ++i; 330 } 331 332 TextTokenizer.recycle(tokenizer); 333 FastTable.recycle(columns); 334 335 int length = pNames.size(); 336 if (length < 1) 337 throw new IOException(RESOURCES.getString("noParamNamesErr")); 338 339 // If no labels were found, then reset the read buffer to read the 1st lime again. 340 if (!foundText) 341 reader.reset(); 342 343 return pNames; 344 } 345 346 /** 347 * Reads in the optional units (if they are present) and returns them in a list. The 348 * specified number of parameter units are returned. 349 */ 350 private FastTable<Unit<?>> inputUnits(LineNumberReader reader, int numParams) throws IOException { 351 352 // Create a list of "no units" units. 353 FastTable<Unit<?>> units = FastTable.newInstance(); 354 for (int i = 0; i < numParams; ++i) 355 units.add(Dimensionless.UNIT); 356 int i = -1; 357 358 // Mark position and read in unit line. 359 reader.mark(65536); 360 String line = reader.readLine(); 361 362 // Create a tokenizer. 363 TextTokenizer tokenizer = TextTokenizer.valueOf(line, DELIMITERS); 364 365 Text unitName = null; 366 try { 367 // Loop through the line. 368 for (i = 0; i < numParams; ++i) { 369 unitName = Text.valueOf(NO_UNITS); 370 371 if (tokenizer.hasMoreTokens()) { 372 unitName = tokenizer.nextToken().trim(); 373 374 // Is this "unit" actually a number? 375 if (i == 0) { 376 try { 377 TypeFormat.parseDouble(unitName); 378 379 // If that didn't cause an exception, stop processing. 380 // We have found numbers, not units. 381 reader.reset(); 382 break; 383 } catch (NumberFormatException e) { 384 } 385 } 386 } 387 388 if (!unitName.equals(Text.valueOf(NO_UNITS))) { 389 // Do some common unit name substitutions. 390 unitName = unitName.replace("sec", "s"); 391 unitName = unitName.replace("-", "*"); 392 unitName = unitName.replace("deg", "\u00B0"); 393 Unit<?> unit = Unit.valueOf(unitName); 394 units.set(i, unit); 395 } 396 } 397 398 } catch (IllegalArgumentException e) { 399 ++i; 400 throw new IOException(MessageFormat.format( 401 RESOURCES.getString("unkwnUnitErr"),unitName,i,reader.getLineNumber())); 402 } 403 404 return units; 405 } 406 407 /** 408 * Method that reads in arrays of delimited data and creates array parameters out of 409 * them. 410 * 411 * @param reader The reader for our input stream. 412 * @param aCase The data case that parameters should be added to. 413 * @param names A list of parameter names. 414 * @param units A list of parameter units. 415 * @param nf The NumberFormat used to parse the numbers. 416 */ 417 private void inputDataArrays(LineNumberReader reader, DataCase aCase, 418 List<Text> names, FastTable<Unit<?>> units) throws IOException { 419 420 int numParams = names.size(); 421 int lineNum = reader.getLineNumber(); 422 423 // Read in all the lines of the file, storing them in memory, to count how 424 // many lines (array elements) there are. 425 FastTable<Text> data = FastTable.newInstance(); 426 String aLine; 427 do { 428 // Read a line from the file. 429 aLine = reader.readLine(); 430 431 // Skip blank lines and lines starting with "--". 432 if (aLine == null) 433 break; 434 if (aLine.startsWith("--") || aLine.trim().equals("")) 435 continue; 436 437 // Add the line to our list of lines. 438 data.add(Text.valueOf(aLine)); 439 440 } while (true); 441 442 // Create a list of properly sized arrays, one for each parameter. 443 FastTable<double[]> arrays = FastTable.newInstance(); 444 int numElements = data.size(); 445 for (int i = 0; i < numParams; ++i) { 446 arrays.add(new double[numElements]); 447 } 448 449 // Now go through the list of data strings and parse out the numbers. 450 TextTokenizer tokenizer = TextTokenizer.newInstance(); 451 tokenizer.setDelimiters(DELIMITERS); 452 Text token = null; 453 int column = 0; 454 try { 455 for (int row = 0; row < numElements; ++row, ++lineNum) { 456 Text text = data.get(row); 457 458 // Create a tokenizer. 459 tokenizer.setText(text); 460 461 // Loop through the line. 462 for (column = 0; column < numParams; ++column) { 463 // Parse out the value. 464 token = tokenizer.nextToken(); 465 token = token.trim().toUpperCase(); 466 467 // Catch any NaNs. 468 double value = Double.NaN; 469 if (!token.contentEquals("NAN")) { 470 471 // The POST2 to tabular data file converter has a bug that outputs 472 // some numbers without the "E" exponent. This code trys to deal 473 // with that situation. 474 int idx = token.indexOf("E"); 475 if (idx < 0) { 476 // Look for an "embedded" + or - sign that is NOT the 1st character in the token. 477 idx = token.indexOf("-"); 478 if (idx < 0) 479 idx = token.indexOf("+"); 480 481 if (idx > 0) { 482 // Insert an "E" before the embedded + or - sign. 483 token = token.insert(idx, Text.valueOf("E")); 484 } 485 } 486 487 // Now parse the token into a number. 488 value = TypeFormat.parseDouble(token); 489 } 490 491 // Store the value. 492 arrays.get(column)[row] = value; 493 } 494 } // next row 495 496 } catch (NumberFormatException e) { 497 ++column; 498 ++lineNum; 499 String msg = "null"; 500 if (token != null) 501 msg = token.toString(); 502 throw new IOException(MessageFormat.format(RESOURCES.getString("numFmtErr"), 503 msg, lineNum, column, names.get(column - 1)),e); 504 505 } catch (NoSuchElementException e) { 506 ++lineNum; 507 throw new IOException(MessageFormat.format(RESOURCES.getString("eolErr"),lineNum), e); 508 } 509 510 // Convert from simple arrays to ArrayParam instances. 511 FastTable<ArrayParam> arrParams = FastTable.newInstance(); 512 for (int i = 0; i < numParams; ++i) { 513 ArrayParam array = ArrayParam.valueOf(names.get(i), units.get(i), arrays.get(i)); 514 arrParams.add(array); 515 } 516 517 // The list of arrays should now be filled with numbers. 518 // Put the arrays in the data case. 519 aCase.addAll(arrParams); 520 521 // Clean up before leaving. 522 TextTokenizer.recycle(tokenizer); 523 FastTable.recycle(data); 524 FastTable.recycle(arrays); 525 FastTable.recycle(arrParams); 526 } 527 528 /** 529 * Method that writes out the specified list of array parameter objects to a tab 530 * delimited table of arrays. 531 */ 532 private void writeArrays(BufferedWriter writer, List<ArrayParam> arrays) throws IOException { 533 534 // First, write out all the parameter names. 535 for (ArrayParam array : arrays) { 536 Text name = Text.valueOf(array.getName()); 537 name.print(writer); 538 writer.write(DELIM); 539 } 540 writer.newLine(); 541 542 // Now write out the units. 543 UnitFormat fmt = UnitFormat.getUCUMInstance(); 544 for (ArrayParam array : arrays) { 545 Unit<?> units = array.getUnit(); 546 if (units != null && !units.equals(Dimensionless.UNIT)) 547 fmt.format(units, writer); 548 else 549 Text.valueOf(NO_UNITS).print(writer); 550 writer.write(DELIM); 551 } 552 writer.newLine(); 553 554 // Start writing out the arrays of numbers. 555 int numElements = arrays.get(0).size(); 556 for (int i = 0; i < numElements; ++i) { 557 for (ArrayParam array : arrays) { 558 TypeFormat.format(array.getValue(i), writer); 559 writer.write(DELIM); 560 } 561 writer.newLine(); 562 } 563 564 } 565 566}