001 /* 002 * CSVReader.java 003 * 004 * Copyright (C) 2005 Anupam Sengupta ([email protected]) 005 * 006 * This program is free software; you can redistribute it and/or 007 * modify it under the terms of the GNU General Public License 008 * as published by the Free Software Foundation; either version 2 009 * of the License, or (at your option) any later version. 010 * 011 * This program is distributed in the hope that it will be useful, 012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 014 * GNU General Public License for more details. 015 * 016 * You should have received a copy of the GNU General Public License 017 * along with this program; if not, write to the Free Software 018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 019 * 020 * Version $Revision: 1.3 $ 021 */ 022 package net.sf.anupam.csv; 023 024 import com.Ostermiller.util.CSVParse; 025 import com.Ostermiller.util.ExcelCSVParser; 026 import org.apache.commons.collections.CollectionUtils; 027 import org.apache.commons.logging.Log; 028 import org.apache.commons.logging.LogFactory; 029 030 import java.io.IOException; 031 import java.io.Reader; 032 import java.util.ArrayList; 033 import java.util.Iterator; 034 import java.util.List; 035 import java.util.NoSuchElementException; 036 037 /** 038 * Reads a CSV file and parses the individual fields for each CSV record in the 039 * file. The default delimiter is assumed to be the <code>,</code> (comma). 040 * <p/> 041 * <p/> 042 * The class uses the CSV Parser engines from <a 043 * href="http://ostermiller.org/utils/" target="_blank">Steven Ostermiller's 044 * site</a>. 045 * </p> 046 * 047 * @author Anupam Sengupta 048 * @version $Revision: 1.3 $ 049 * @see com.Ostermiller.util.CSVParse 050 * @since 1.5 051 */ 052 class CSVReader implements Iterable<List<String>> { 053 054 /** 055 * Logger to use. 056 */ 057 private static final Log LOG = LogFactory.getLog(CSVReader.class); 058 059 /** 060 * The CSV parser engine. 061 */ 062 private CSVParse parser; 063 064 /** 065 * Flag which indicates whether the reader has read all the records. 066 */ 067 private boolean readingComplete; 068 069 /** 070 * Flag which indicates whether the CSV file has a header row. 071 */ 072 private boolean headerPresent; 073 074 /** 075 * Constructor which accepts a reader on the CSV stream to parse. The 076 * presence of a CSV header row is also specified. If present, the header 077 * row will be skipped. 078 * 079 * @param csvReader the CSV stream reader from which to parse 080 * @param headerPresent indicates whether the CSV stream has a header record 081 */ 082 public CSVReader(final Reader csvReader, final boolean headerPresent) { 083 super(); 084 this.headerPresent = headerPresent; 085 086 parser = new ExcelCSVParser(csvReader); 087 088 } 089 090 /** 091 * Releases all system resources. 092 */ 093 public void close() { 094 try { 095 if (parser != null) { 096 parser.close(); 097 LOG.debug("Closed the CSV Reader"); 098 } 099 } catch (final IOException e) { 100 // Do nothing 101 } finally { 102 parser = null; 103 } 104 } 105 106 /** 107 * Finalizes this CSV reader and closes the IO connections. 108 * 109 * @throws Throwable thrown if the finalization fails. 110 * @see Object#finalize() 111 */ 112 @Override 113 protected void finalize() throws Throwable { 114 super.finalize(); 115 close(); 116 } 117 118 /** 119 * Returns an iterator over the parsed lines. The iterator returns a list of 120 * the CSV field values as a single value over each iteration. 121 * 122 * @return an iterator over the lines. 123 */ 124 public Iterator<List<String>> iterator() { 125 return new LineIterator(); 126 } 127 128 // ~ Inner Classes 129 // ---------------------------------------------------------- 130 131 /** 132 * Inner iterator class to provide the Iterable interface to the reader. 133 */ 134 private class LineIterator implements Iterator<List<String>> { 135 // ~ Methods 136 // ------------------------------------------------------------ 137 138 /** 139 * The parsed CSV field values. 140 */ 141 private String[] parsedValues; 142 143 /** 144 * Flag indicating whether the previous line was read. 145 */ 146 private boolean haveReadPreviousLine; 147 148 /** 149 * Default Constructor. 150 */ 151 public LineIterator() { 152 super(); 153 if (isHeaderPresent()) { 154 readOneLine(); 155 } 156 } 157 158 /** 159 * Returns <code>true</code> if there is at least one more parsed CSV line. 160 * 161 * @return <code>true></code> if there is at least one more parsed line 162 * @see java.util.Iterator#hasNext() 163 */ 164 public boolean hasNext() { 165 if (isReadingComplete()) { 166 return false; 167 } 168 169 if (!haveReadPreviousLine) { 170 readOneLine(); 171 haveReadPreviousLine = true; 172 } 173 return !isReadingComplete(); 174 } 175 176 /** 177 * Returns a list of the CSV field values for the current line. 178 * 179 * @return the next list of parsed CSV field values 180 * @see java.util.Iterator#next() 181 */ 182 public List<String> next() { 183 184 if (!haveReadPreviousLine) { 185 readOneLine(); 186 } else { 187 haveReadPreviousLine = false; 188 } 189 190 if (isReadingComplete()) { 191 throw new NoSuchElementException(); 192 } 193 194 final List<String> valueList = new ArrayList<String>( 195 parsedValues.length); 196 CollectionUtils.addAll(valueList, parsedValues); 197 198 return valueList; 199 200 } 201 202 /** 203 * Reads one CSV line using the CSV parser engine and stores the parsed 204 * line fields. 205 */ 206 private void readOneLine() { 207 try { 208 parsedValues = getParser().getLine(); 209 if (parsedValues == null) { 210 readingIsComplete(); 211 } 212 } catch (final IOException e) { 213 LOG.warn("Error in reading a line from the CSV stream ", e); 214 readingIsComplete(); 215 } 216 217 } 218 219 /** 220 * This method is not supported. 221 * 222 * @see java.util.Iterator#remove() 223 */ 224 public void remove() { 225 LOG 226 .debug("Invalid call to the unsupported remove() method on the iterator"); 227 throw new UnsupportedOperationException( 228 "This method is not supported"); 229 } 230 } 231 232 /** 233 * Indicates whether the header row is present or not. 234 * 235 * @return Returns <code>true</code> if the header row is present 236 */ 237 public boolean isHeaderPresent() { 238 return this.headerPresent; 239 } 240 241 /** 242 * Indicates whether the reader has read all CSV lines. 243 * 244 * @return Returns <code>true</code> if all CSV lines have been read 245 */ 246 public boolean isReadingComplete() { 247 return this.readingComplete; 248 } 249 250 /** 251 * Sets the flag to denote that all lines have been read. 252 */ 253 protected void readingIsComplete() { 254 this.readingComplete = true; 255 } 256 257 /** 258 * Returns the internal CSV parser engine instance for this reader. 259 * 260 * @return Returns the parser instance 261 */ 262 protected CSVParse getParser() { 263 return this.parser; 264 } 265 }