001 /*
002 * CSVReader.java
003 *
004 * Copyright (C) 2005 Anupam Sengupta ([email protected])
005 *
006 * This program is free software; you can redistribute it and/or
007 * modify it under the terms of the GNU General Public License
008 * as published by the Free Software Foundation; either version 2
009 * of the License, or (at your option) any later version.
010 *
011 * This program is distributed in the hope that it will be useful,
012 * but WITHOUT ANY WARRANTY; without even the implied warranty of
013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
014 * GNU General Public License for more details.
015 *
016 * You should have received a copy of the GNU General Public License
017 * along with this program; if not, write to the Free Software
018 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
019 *
020 * Version $Revision: 1.3 $
021 */
022 package net.sf.anupam.csv;
023
024 import com.Ostermiller.util.CSVParse;
025 import com.Ostermiller.util.ExcelCSVParser;
026 import org.apache.commons.collections.CollectionUtils;
027 import org.apache.commons.logging.Log;
028 import org.apache.commons.logging.LogFactory;
029
030 import java.io.IOException;
031 import java.io.Reader;
032 import java.util.ArrayList;
033 import java.util.Iterator;
034 import java.util.List;
035 import java.util.NoSuchElementException;
036
037 /**
038 * Reads a CSV file and parses the individual fields for each CSV record in the
039 * file. The default delimiter is assumed to be the <code>,</code> (comma).
040 * <p/>
041 * <p/>
042 * The class uses the CSV Parser engines from <a
043 * href="http://ostermiller.org/utils/" target="_blank">Steven Ostermiller's
044 * site</a>.
045 * </p>
046 *
047 * @author Anupam Sengupta
048 * @version $Revision: 1.3 $
049 * @see com.Ostermiller.util.CSVParse
050 * @since 1.5
051 */
052 class CSVReader implements Iterable<List<String>> {
053
054 /**
055 * Logger to use.
056 */
057 private static final Log LOG = LogFactory.getLog(CSVReader.class);
058
059 /**
060 * The CSV parser engine.
061 */
062 private CSVParse parser;
063
064 /**
065 * Flag which indicates whether the reader has read all the records.
066 */
067 private boolean readingComplete;
068
069 /**
070 * Flag which indicates whether the CSV file has a header row.
071 */
072 private boolean headerPresent;
073
074 /**
075 * Constructor which accepts a reader on the CSV stream to parse. The
076 * presence of a CSV header row is also specified. If present, the header
077 * row will be skipped.
078 *
079 * @param csvReader the CSV stream reader from which to parse
080 * @param headerPresent indicates whether the CSV stream has a header record
081 */
082 public CSVReader(final Reader csvReader, final boolean headerPresent) {
083 super();
084 this.headerPresent = headerPresent;
085
086 parser = new ExcelCSVParser(csvReader);
087
088 }
089
090 /**
091 * Releases all system resources.
092 */
093 public void close() {
094 try {
095 if (parser != null) {
096 parser.close();
097 LOG.debug("Closed the CSV Reader");
098 }
099 } catch (final IOException e) {
100 // Do nothing
101 } finally {
102 parser = null;
103 }
104 }
105
106 /**
107 * Finalizes this CSV reader and closes the IO connections.
108 *
109 * @throws Throwable thrown if the finalization fails.
110 * @see Object#finalize()
111 */
112 @Override
113 protected void finalize() throws Throwable {
114 super.finalize();
115 close();
116 }
117
118 /**
119 * Returns an iterator over the parsed lines. The iterator returns a list of
120 * the CSV field values as a single value over each iteration.
121 *
122 * @return an iterator over the lines.
123 */
124 public Iterator<List<String>> iterator() {
125 return new LineIterator();
126 }
127
128 // ~ Inner Classes
129 // ----------------------------------------------------------
130
131 /**
132 * Inner iterator class to provide the Iterable interface to the reader.
133 */
134 private class LineIterator implements Iterator<List<String>> {
135 // ~ Methods
136 // ------------------------------------------------------------
137
138 /**
139 * The parsed CSV field values.
140 */
141 private String[] parsedValues;
142
143 /**
144 * Flag indicating whether the previous line was read.
145 */
146 private boolean haveReadPreviousLine;
147
148 /**
149 * Default Constructor.
150 */
151 public LineIterator() {
152 super();
153 if (isHeaderPresent()) {
154 readOneLine();
155 }
156 }
157
158 /**
159 * Returns <code>true</code> if there is at least one more parsed CSV line.
160 *
161 * @return <code>true></code> if there is at least one more parsed line
162 * @see java.util.Iterator#hasNext()
163 */
164 public boolean hasNext() {
165 if (isReadingComplete()) {
166 return false;
167 }
168
169 if (!haveReadPreviousLine) {
170 readOneLine();
171 haveReadPreviousLine = true;
172 }
173 return !isReadingComplete();
174 }
175
176 /**
177 * Returns a list of the CSV field values for the current line.
178 *
179 * @return the next list of parsed CSV field values
180 * @see java.util.Iterator#next()
181 */
182 public List<String> next() {
183
184 if (!haveReadPreviousLine) {
185 readOneLine();
186 } else {
187 haveReadPreviousLine = false;
188 }
189
190 if (isReadingComplete()) {
191 throw new NoSuchElementException();
192 }
193
194 final List<String> valueList = new ArrayList<String>(
195 parsedValues.length);
196 CollectionUtils.addAll(valueList, parsedValues);
197
198 return valueList;
199
200 }
201
202 /**
203 * Reads one CSV line using the CSV parser engine and stores the parsed
204 * line fields.
205 */
206 private void readOneLine() {
207 try {
208 parsedValues = getParser().getLine();
209 if (parsedValues == null) {
210 readingIsComplete();
211 }
212 } catch (final IOException e) {
213 LOG.warn("Error in reading a line from the CSV stream ", e);
214 readingIsComplete();
215 }
216
217 }
218
219 /**
220 * This method is not supported.
221 *
222 * @see java.util.Iterator#remove()
223 */
224 public void remove() {
225 LOG
226 .debug("Invalid call to the unsupported remove() method on the iterator");
227 throw new UnsupportedOperationException(
228 "This method is not supported");
229 }
230 }
231
232 /**
233 * Indicates whether the header row is present or not.
234 *
235 * @return Returns <code>true</code> if the header row is present
236 */
237 public boolean isHeaderPresent() {
238 return this.headerPresent;
239 }
240
241 /**
242 * Indicates whether the reader has read all CSV lines.
243 *
244 * @return Returns <code>true</code> if all CSV lines have been read
245 */
246 public boolean isReadingComplete() {
247 return this.readingComplete;
248 }
249
250 /**
251 * Sets the flag to denote that all lines have been read.
252 */
253 protected void readingIsComplete() {
254 this.readingComplete = true;
255 }
256
257 /**
258 * Returns the internal CSV parser engine instance for this reader.
259 *
260 * @return Returns the parser instance
261 */
262 protected CSVParse getParser() {
263 return this.parser;
264 }
265 }