[CSV-220] Add API
[commons-csv.git] / src / main / java / org / apache / commons / csv / CSVFormat.java
1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18 package org.apache.commons.csv;
19
20 import static org.apache.commons.csv.Constants.BACKSLASH;
21 import static org.apache.commons.csv.Constants.COMMA;
22 import static org.apache.commons.csv.Constants.COMMENT;
23 import static org.apache.commons.csv.Constants.EMPTY;
24 import static org.apache.commons.csv.Constants.CR;
25 import static org.apache.commons.csv.Constants.CRLF;
26 import static org.apache.commons.csv.Constants.DOUBLE_QUOTE_CHAR;
27 import static org.apache.commons.csv.Constants.LF;
28 import static org.apache.commons.csv.Constants.PIPE;
29 import static org.apache.commons.csv.Constants.SP;
30 import static org.apache.commons.csv.Constants.TAB;
31
32 import java.io.File;
33 import java.io.FileOutputStream;
34 import java.io.IOException;
35 import java.io.OutputStreamWriter;
36 import java.io.Reader;
37 import java.io.Serializable;
38 import java.io.StringWriter;
39 import java.nio.charset.Charset;
40 import java.nio.file.Files;
41 import java.nio.file.Path;
42 import java.sql.ResultSet;
43 import java.sql.ResultSetMetaData;
44 import java.sql.SQLException;
45 import java.util.Arrays;
46 import java.util.HashSet;
47 import java.util.Set;
48
49 /**
50 * Specifies the format of a CSV file and parses input.
51 *
52 * <h2>Using predefined formats</h2>
53 *
54 * <p>
55 * You can use one of the predefined formats:
56 * </p>
57 *
58 * <ul>
59 * <li>{@link #DEFAULT}</li>
60 * <li>{@link #EXCEL}</li>
61 * <li>{@link #MYSQL}</li>
62 * <li>{@link #RFC4180}</li>
63 * <li>{@link #TDF}</li>
64 * </ul>
65 *
66 * <p>
67 * For example:
68 * </p>
69 *
70 * <pre>
71 * CSVParser parser = CSVFormat.EXCEL.parse(reader);
72 * </pre>
73 *
74 * <p>
75 * The {@link CSVParser} provides static methods to parse other input types, for example:
76 * </p>
77 *
78 * <pre>
79 * CSVParser parser = CSVParser.parse(file, StandardCharsets.US_ASCII, CSVFormat.EXCEL);
80 * </pre>
81 *
82 * <h2>Defining formats</h2>
83 *
84 * <p>
85 * You can extend a format by calling the {@code with} methods. For example:
86 * </p>
87 *
88 * <pre>
89 * CSVFormat.EXCEL.withNullString(&quot;N/A&quot;).withIgnoreSurroundingSpaces(true);
90 * </pre>
91 *
92 * <h2>Defining column names</h2>
93 *
94 * <p>
95 * To define the column names you want to use to access records, write:
96 * </p>
97 *
98 * <pre>
99 * CSVFormat.EXCEL.withHeader(&quot;Col1&quot;, &quot;Col2&quot;, &quot;Col3&quot;);
100 * </pre>
101 *
102 * <p>
103 * Calling {@link #withHeader(String...)} let's you use the given names to address values in a {@link CSVRecord}, and
104 * assumes that your CSV source does not contain a first record that also defines column names.
105 *
106 * If it does, then you are overriding this metadata with your names and you should skip the first record by calling
107 * {@link #withSkipHeaderRecord(boolean)} with {@code true}.
108 * </p>
109 *
110 * <h2>Parsing</h2>
111 *
112 * <p>
113 * You can use a format directly to parse a reader. For example, to parse an Excel file with columns header, write:
114 * </p>
115 *
116 * <pre>
117 * Reader in = ...;
118 * CSVFormat.EXCEL.withHeader(&quot;Col1&quot;, &quot;Col2&quot;, &quot;Col3&quot;).parse(in);
119 * </pre>
120 *
121 * <p>
122 * For other input types, like resources, files, and URLs, use the static methods on {@link CSVParser}.
123 * </p>
124 *
125 * <h2>Referencing columns safely</h2>
126 *
127 * <p>
128 * If your source contains a header record, you can simplify your code and safely reference columns, by using
129 * {@link #withHeader(String...)} with no arguments:
130 * </p>
131 *
132 * <pre>
133 * CSVFormat.EXCEL.withHeader();
134 * </pre>
135 *
136 * <p>
137 * This causes the parser to read the first record and use its values as column names.
138 *
139 * Then, call one of the {@link CSVRecord} get method that takes a String column name argument:
140 * </p>
141 *
142 * <pre>
143 * String value = record.get(&quot;Col1&quot;);
144 * </pre>
145 *
146 * <p>
147 * This makes your code impervious to changes in column order in the CSV file.
148 * </p>
149 *
150 * <h2>Notes</h2>
151 *
152 * <p>
153 * This class is immutable.
154 * </p>
155 */
156 public final class CSVFormat implements Serializable {
157
158 /**
159 * Predefines formats.
160 *
161 * @since 1.2
162 */
163 public enum Predefined {
164
165 /**
166 * @see CSVFormat#DEFAULT
167 */
168 Default(CSVFormat.DEFAULT),
169
170 /**
171 * @see CSVFormat#EXCEL
172 */
173 Excel(CSVFormat.EXCEL),
174
175 /**
176 * @see CSVFormat#INFORMIX_UNLOAD
177 * @since 1.3
178 */
179 InformixUnload(CSVFormat.INFORMIX_UNLOAD),
180
181 /**
182 * @see CSVFormat#INFORMIX_UNLOAD_CSV
183 * @since 1.3
184 */
185 InformixUnloadCsv(CSVFormat.INFORMIX_UNLOAD_CSV),
186
187 /**
188 * @see CSVFormat#MYSQL
189 */
190 MySQL(CSVFormat.MYSQL),
191
192 /**
193 * @see CSVFormat#POSTGRESQL_CSV
194 * @since 1.5
195 */
196 PostgreSQLCsv(CSVFormat.POSTGRESQL_CSV),
197
198 /**
199 * @see CSVFormat#POSTGRESQL_CSV
200 */
201 PostgreSQLText(CSVFormat.POSTGRESQL_TEXT),
202
203 /**
204 * @see CSVFormat#RFC4180
205 */
206 RFC4180(CSVFormat.RFC4180),
207
208 /**
209 * @see CSVFormat#TDF
210 */
211 TDF(CSVFormat.TDF);
212
213 private final CSVFormat format;
214
215 Predefined(final CSVFormat format) {
216 this.format = format;
217 }
218
219 /**
220 * Gets the format.
221 *
222 * @return the format.
223 */
224 public CSVFormat getFormat() {
225 return format;
226 }
227 }
228
229 /**
230 * Standard comma separated format, as for {@link #RFC4180} but allowing empty lines.
231 *
232 * <p>
233 * Settings are:
234 * </p>
235 * <ul>
236 * <li>withDelimiter(',')</li>
237 * <li>withQuote('"')</li>
238 * <li>withRecordSeparator("\r\n")</li>
239 * <li>withIgnoreEmptyLines(true)</li>
240 * </ul>
241 *
242 * @see Predefined#Default
243 */
244 public static final CSVFormat DEFAULT = new CSVFormat(COMMA, DOUBLE_QUOTE_CHAR, null, null, null, false, true, CRLF,
245 null, null, null, false, false, false, false, false, false);
246
247 /**
248 * Excel file format (using a comma as the value delimiter). Note that the actual value delimiter used by Excel is
249 * locale dependent, it might be necessary to customize this format to accommodate to your regional settings.
250 *
251 * <p>
252 * For example for parsing or generating a CSV file on a French system the following format will be used:
253 * </p>
254 *
255 * <pre>
256 * CSVFormat fmt = CSVFormat.EXCEL.withDelimiter(';');
257 * </pre>
258 *
259 * <p>
260 * Settings are:
261 * </p>
262 * <ul>
263 * <li>{@link #withDelimiter(char) withDelimiter(',')}</li>
264 * <li>{@link #withQuote(char) withQuote('"')}</li>
265 * <li>{@link #withRecordSeparator(String) withRecordSeparator("\r\n")}</li>
266 * <li>{@link #withIgnoreEmptyLines(boolean) withIgnoreEmptyLines(false)}</li>
267 * <li>{@link #withAllowMissingColumnNames(boolean) withAllowMissingColumnNames(true)}</li>
268 * </ul>
269 * <p>
270 * Note: this is currently like {@link #RFC4180} plus {@link #withAllowMissingColumnNames(boolean)
271 * withAllowMissingColumnNames(true)}.
272 * </p>
273 *
274 * @see Predefined#Excel
275 */
276 // @formatter:off
277 public static final CSVFormat EXCEL = DEFAULT
278 .withIgnoreEmptyLines(false)
279 .withAllowMissingColumnNames();
280 // @formatter:on
281
282 /**
283 * Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation.
284 *
285 * <p>
286 * This is a comma-delimited format with a LF character as the line separator. Values are not quoted and special
287 * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}.
288 * </p>
289 *
290 * <p>
291 * Settings are:
292 * </p>
293 * <ul>
294 * <li>withDelimiter(',')</li>
295 * <li>withQuote("\"")</li>
296 * <li>withRecordSeparator('\n')</li>
297 * <li>withEscape('\\')</li>
298 * </ul>
299 *
300 * @see Predefined#MySQL
301 * @see <a href=
302 * "http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm">
303 * http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm</a>
304 * @since 1.3
305 */
306 // @formatter:off
307 public static final CSVFormat INFORMIX_UNLOAD = DEFAULT
308 .withDelimiter(PIPE)
309 .withEscape(BACKSLASH)
310 .withQuote(DOUBLE_QUOTE_CHAR)
311 .withRecordSeparator(LF);
312 // @formatter:on
313
314 /**
315 * Default Informix CSV UNLOAD format used by the {@code UNLOAD TO file_name} operation (escaping is disabled.)
316 *
317 * <p>
318 * This is a comma-delimited format with a LF character as the line separator. Values are not quoted and special
319 * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}.
320 * </p>
321 *
322 * <p>
323 * Settings are:
324 * </p>
325 * <ul>
326 * <li>withDelimiter(',')</li>
327 * <li>withQuote("\"")</li>
328 * <li>withRecordSeparator('\n')</li>
329 * </ul>
330 *
331 * @see Predefined#MySQL
332 * @see <a href=
333 * "http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm">
334 * http://www.ibm.com/support/knowledgecenter/SSBJG3_2.5.0/com.ibm.gen_busug.doc/c_fgl_InOutSql_UNLOAD.htm</a>
335 * @since 1.3
336 */
337 // @formatter:off
338 public static final CSVFormat INFORMIX_UNLOAD_CSV = DEFAULT
339 .withDelimiter(COMMA)
340 .withQuote(DOUBLE_QUOTE_CHAR)
341 .withRecordSeparator(LF);
342 // @formatter:on
343
344 /**
345 * Default MySQL format used by the {@code SELECT INTO OUTFILE} and {@code LOAD DATA INFILE} operations.
346 *
347 * <p>
348 * This is a tab-delimited format with a LF character as the line separator. Values are not quoted and special
349 * characters are escaped with {@code '\'}. The default NULL string is {@code "\\N"}.
350 * </p>
351 *
352 * <p>
353 * Settings are:
354 * </p>
355 * <ul>
356 * <li>withDelimiter('\t')</li>
357 * <li>withQuote(null)</li>
358 * <li>withRecordSeparator('\n')</li>
359 * <li>withIgnoreEmptyLines(false)</li>
360 * <li>withEscape('\\')</li>
361 * <li>withNullString("\\N")</li>
362 * <li>withQuoteMode(QuoteMode.ALL_NON_NULL)</li>
363 * </ul>
364 *
365 * @see Predefined#MySQL
366 * @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html"> http://dev.mysql.com/doc/refman/5.1/en/load
367 * -data.html</a>
368 */
369 // @formatter:off
370 public static final CSVFormat MYSQL = DEFAULT
371 .withDelimiter(TAB)
372 .withEscape(BACKSLASH)
373 .withIgnoreEmptyLines(false)
374 .withQuote(null)
375 .withRecordSeparator(LF)
376 .withNullString("\\N")
377 .withQuoteMode(QuoteMode.ALL_NON_NULL);
378 // @formatter:off
379
380 /**
381 * Default PostgreSQL CSV format used by the {@code COPY} operation.
382 *
383 * <p>
384 * This is a comma-delimited format with a LF character as the line separator. Values are double quoted and special
385 * characters are escaped with {@code '"'}. The default NULL string is {@code ""}.
386 * </p>
387 *
388 * <p>
389 * Settings are:
390 * </p>
391 * <ul>
392 * <li>withDelimiter(',')</li>
393 * <li>withQuote('"')</li>
394 * <li>withRecordSeparator('\n')</li>
395 * <li>withIgnoreEmptyLines(false)</li>
396 * <li>withEscape('\\')</li>
397 * <li>withNullString("")</li>
398 * <li>withQuoteMode(QuoteMode.ALL_NON_NULL)</li>
399 * </ul>
400 *
401 * @see Predefined#MySQL
402 * @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html"> http://dev.mysql.com/doc/refman/5.1/en/load
403 * -data.html</a>
404 * @since 1.5
405 */
406 // @formatter:off
407 public static final CSVFormat POSTGRESQL_CSV = DEFAULT
408 .withDelimiter(COMMA)
409 .withEscape(DOUBLE_QUOTE_CHAR)
410 .withIgnoreEmptyLines(false)
411 .withQuote(DOUBLE_QUOTE_CHAR)
412 .withRecordSeparator(LF)
413 .withNullString(EMPTY)
414 .withQuoteMode(QuoteMode.ALL_NON_NULL);
415 // @formatter:off
416
417 /**
418 * Default PostgreSQL text format used by the {@code COPY} operation.
419 *
420 * <p>
421 * This is a tab-delimited format with a LF character as the line separator. Values are double quoted and special
422 * characters are escaped with {@code '"'}. The default NULL string is {@code "\\N"}.
423 * </p>
424 *
425 * <p>
426 * Settings are:
427 * </p>
428 * <ul>
429 * <li>withDelimiter('\t')</li>
430 * <li>withQuote('"')</li>
431 * <li>withRecordSeparator('\n')</li>
432 * <li>withIgnoreEmptyLines(false)</li>
433 * <li>withEscape('\\')</li>
434 * <li>withNullString("\\N")</li>
435 * <li>withQuoteMode(QuoteMode.ALL_NON_NULL)</li>
436 * </ul>
437 *
438 * @see Predefined#MySQL
439 * @see <a href="http://dev.mysql.com/doc/refman/5.1/en/load-data.html"> http://dev.mysql.com/doc/refman/5.1/en/load
440 * -data.html</a>
441 * @since 1.5
442 */
443 // @formatter:off
444 public static final CSVFormat POSTGRESQL_TEXT = DEFAULT
445 .withDelimiter(TAB)
446 .withEscape(DOUBLE_QUOTE_CHAR)
447 .withIgnoreEmptyLines(false)
448 .withQuote(DOUBLE_QUOTE_CHAR)
449 .withRecordSeparator(LF)
450 .withNullString("\\N")
451 .withQuoteMode(QuoteMode.ALL_NON_NULL);
452 // @formatter:off
453
454 /**
455 * Comma separated format as defined by <a href="http://tools.ietf.org/html/rfc4180">RFC 4180</a>.
456 *
457 * <p>
458 * Settings are:
459 * </p>
460 * <ul>
461 * <li>withDelimiter(',')</li>
462 * <li>withQuote('"')</li>
463 * <li>withRecordSeparator("\r\n")</li>
464 * <li>withIgnoreEmptyLines(false)</li>
465 * </ul>
466 *
467 * @see Predefined#RFC4180
468 */
469 public static final CSVFormat RFC4180 = DEFAULT.withIgnoreEmptyLines(false);
470
471 private static final long serialVersionUID = 1L;
472
473 /**
474 * Tab-delimited format.
475 *
476 * <p>
477 * Settings are:
478 * </p>
479 * <ul>
480 * <li>withDelimiter('\t')</li>
481 * <li>withQuote('"')</li>
482 * <li>withRecordSeparator("\r\n")</li>
483 * <li>withIgnoreSurroundingSpaces(true)</li>
484 * </ul>
485 *
486 * @see Predefined#TDF
487 */
488 // @formatter:off
489 public static final CSVFormat TDF = DEFAULT
490 .withDelimiter(TAB)
491 .withIgnoreSurroundingSpaces();
492 // @formatter:on
493
494 /**
495 * Returns true if the given character is a line break character.
496 *
497 * @param c
498 * the character to check
499 *
500 * @return true if <code>c</code> is a line break character
501 */
502 private static boolean isLineBreak(final char c) {
503 return c == LF || c == CR;
504 }
505
506 /**
507 * Returns true if the given character is a line break character.
508 *
509 * @param c
510 * the character to check, may be null
511 *
512 * @return true if <code>c</code> is a line break character (and not null)
513 */
514 private static boolean isLineBreak(final Character c) {
515 return c != null && isLineBreak(c.charValue());
516 }
517
518 /**
519 * Creates a new CSV format with the specified delimiter.
520 *
521 * <p>
522 * Use this method if you want to create a CSVFormat from scratch. All fields but the delimiter will be initialized
523 * with null/false.
524 * </p>
525 *
526 * @param delimiter
527 * the char used for value separation, must not be a line break character
528 * @return a new CSV format.
529 * @throws IllegalArgumentException
530 * if the delimiter is a line break character
531 *
532 * @see #DEFAULT
533 * @see #RFC4180
534 * @see #MYSQL
535 * @see #EXCEL
536 * @see #TDF
537 */
538 public static CSVFormat newFormat(final char delimiter) {
539 return new CSVFormat(delimiter, null, null, null, null, false, false, null, null, null, null, false, false,
540 false, false, false, false);
541 }
542
543 /**
544 * Gets one of the predefined formats from {@link CSVFormat.Predefined}.
545 *
546 * @param format
547 * name
548 * @return one of the predefined formats
549 * @since 1.2
550 */
551 public static CSVFormat valueOf(final String format) {
552 return CSVFormat.Predefined.valueOf(format).getFormat();
553 }
554
555 private final boolean allowMissingColumnNames;
556
557 private final Character commentMarker; // null if commenting is disabled
558
559 private final char delimiter;
560
561 private final Character escapeCharacter; // null if escaping is disabled
562
563 private final String[] header; // array of header column names
564
565 private final String[] headerComments; // array of header comment lines
566
567 private final boolean ignoreEmptyLines;
568
569 private final boolean ignoreHeaderCase; // should ignore header names case
570
571 private final boolean ignoreSurroundingSpaces; // Should leading/trailing spaces be ignored around values?
572
573 private final String nullString; // the string to be used for null values
574
575 private final Character quoteCharacter; // null if quoting is disabled
576
577 private final QuoteMode quoteMode;
578
579 private final String recordSeparator; // for outputs
580
581 private final boolean skipHeaderRecord;
582
583 private final boolean trailingDelimiter;
584
585 private final boolean trim;
586
587 private final boolean autoFlush;
588
589 /**
590 * Creates a customized CSV format.
591 *
592 * @param delimiter
593 * the char used for value separation, must not be a line break character
594 * @param quoteChar
595 * the Character used as value encapsulation marker, may be {@code null} to disable
596 * @param quoteMode
597 * the quote mode
598 * @param commentStart
599 * the Character used for comment identification, may be {@code null} to disable
600 * @param escape
601 * the Character used to escape special characters in values, may be {@code null} to disable
602 * @param ignoreSurroundingSpaces
603 * {@code true} when whitespaces enclosing values should be ignored
604 * @param ignoreEmptyLines
605 * {@code true} when the parser should skip empty lines
606 * @param recordSeparator
607 * the line separator to use for output
608 * @param nullString
609 * the line separator to use for output
610 * @param headerComments
611 * the comments to be printed by the Printer before the actual CSV data
612 * @param header
613 * the header
614 * @param skipHeaderRecord
615 * TODO
616 * @param allowMissingColumnNames
617 * TODO
618 * @param ignoreHeaderCase
619 * TODO
620 * @param trim
621 * TODO
622 * @param trailingDelimiter
623 * TODO
624 * @param autoFlush
625 * @throws IllegalArgumentException
626 * if the delimiter is a line break character
627 */
628 private CSVFormat(final char delimiter, final Character quoteChar, final QuoteMode quoteMode,
629 final Character commentStart, final Character escape, final boolean ignoreSurroundingSpaces,
630 final boolean ignoreEmptyLines, final String recordSeparator, final String nullString,
631 final Object[] headerComments, final String[] header, final boolean skipHeaderRecord,
632 final boolean allowMissingColumnNames, final boolean ignoreHeaderCase, final boolean trim,
633 final boolean trailingDelimiter, final boolean autoFlush) {
634 this.delimiter = delimiter;
635 this.quoteCharacter = quoteChar;
636 this.quoteMode = quoteMode;
637 this.commentMarker = commentStart;
638 this.escapeCharacter = escape;
639 this.ignoreSurroundingSpaces = ignoreSurroundingSpaces;
640 this.allowMissingColumnNames = allowMissingColumnNames;
641 this.ignoreEmptyLines = ignoreEmptyLines;
642 this.recordSeparator = recordSeparator;
643 this.nullString = nullString;
644 this.headerComments = toStringArray(headerComments);
645 this.header = header == null ? null : header.clone();
646 this.skipHeaderRecord = skipHeaderRecord;
647 this.ignoreHeaderCase = ignoreHeaderCase;
648 this.trailingDelimiter = trailingDelimiter;
649 this.trim = trim;
650 this.autoFlush = autoFlush;
651 validate();
652 }
653
654 @Override
655 public boolean equals(final Object obj) {
656 if (this == obj) {
657 return true;
658 }
659 if (obj == null) {
660 return false;
661 }
662 if (getClass() != obj.getClass()) {
663 return false;
664 }
665
666 final CSVFormat other = (CSVFormat) obj;
667 if (delimiter != other.delimiter) {
668 return false;
669 }
670 if (quoteMode != other.quoteMode) {
671 return false;
672 }
673 if (quoteCharacter == null) {
674 if (other.quoteCharacter != null) {
675 return false;
676 }
677 } else if (!quoteCharacter.equals(other.quoteCharacter)) {
678 return false;
679 }
680 if (commentMarker == null) {
681 if (other.commentMarker != null) {
682 return false;
683 }
684 } else if (!commentMarker.equals(other.commentMarker)) {
685 return false;
686 }
687 if (escapeCharacter == null) {
688 if (other.escapeCharacter != null) {
689 return false;
690 }
691 } else if (!escapeCharacter.equals(other.escapeCharacter)) {
692 return false;
693 }
694 if (nullString == null) {
695 if (other.nullString != null) {
696 return false;
697 }
698 } else if (!nullString.equals(other.nullString)) {
699 return false;
700 }
701 if (!Arrays.equals(header, other.header)) {
702 return false;
703 }
704 if (ignoreSurroundingSpaces != other.ignoreSurroundingSpaces) {
705 return false;
706 }
707 if (ignoreEmptyLines != other.ignoreEmptyLines) {
708 return false;
709 }
710 if (skipHeaderRecord != other.skipHeaderRecord) {
711 return false;
712 }
713 if (recordSeparator == null) {
714 if (other.recordSeparator != null) {
715 return false;
716 }
717 } else if (!recordSeparator.equals(other.recordSeparator)) {
718 return false;
719 }
720 return true;
721 }
722
723 /**
724 * Formats the specified values.
725 *
726 * @param values
727 * the values to format
728 * @return the formatted values
729 */
730 public String format(final Object... values) {
731 final StringWriter out = new StringWriter();
732 try (final CSVPrinter csvPrinter = new CSVPrinter(out, this)) {
733 csvPrinter.printRecord(values);
734 return out.toString().trim();
735 } catch (final IOException e) {
736 // should not happen because a StringWriter does not do IO.
737 throw new IllegalStateException(e);
738 }
739 }
740
741 /**
742 * Specifies whether missing column names are allowed when parsing the header line.
743 *
744 * @return {@code true} if missing column names are allowed when parsing the header line, {@code false} to throw an
745 * {@link IllegalArgumentException}.
746 */
747 public boolean getAllowMissingColumnNames() {
748 return allowMissingColumnNames;
749 }
750
751 /**
752 * Returns whether to flush on close.
753 *
754 * @return whether to flush on close.
755 * @since 1.6
756 */
757 public boolean getAutoFlush() {
758 return autoFlush;
759 }
760
761 /**
762 * Returns the character marking the start of a line comment.
763 *
764 * @return the comment start marker, may be {@code null}
765 */
766 public Character getCommentMarker() {
767 return commentMarker;
768 }
769
770 /**
771 * Returns the character delimiting the values (typically ';', ',' or '\t').
772 *
773 * @return the delimiter character
774 */
775 public char getDelimiter() {
776 return delimiter;
777 }
778
779 /**
780 * Returns the escape character.
781 *
782 * @return the escape character, may be {@code null}
783 */
784 public Character getEscapeCharacter() {
785 return escapeCharacter;
786 }
787
788 /**
789 * Returns a copy of the header array.
790 *
791 * @return a copy of the header array; {@code null} if disabled, the empty array if to be read from the file
792 */
793 public String[] getHeader() {
794 return header != null ? header.clone() : null;
795 }
796
797 /**
798 * Returns a copy of the header comment array.
799 *
800 * @return a copy of the header comment array; {@code null} if disabled.
801 */
802 public String[] getHeaderComments() {
803 return headerComments != null ? headerComments.clone() : null;
804 }
805
806 /**
807 * Specifies whether empty lines between records are ignored when parsing input.
808 *
809 * @return {@code true} if empty lines between records are ignored, {@code false} if they are turned into empty
810 * records.
811 */
812 public boolean getIgnoreEmptyLines() {
813 return ignoreEmptyLines;
814 }
815
816 /**
817 * Specifies whether header names will be accessed ignoring case.
818 *
819 * @return {@code true} if header names cases are ignored, {@code false} if they are case sensitive.
820 * @since 1.3
821 */
822 public boolean getIgnoreHeaderCase() {
823 return ignoreHeaderCase;
824 }
825
826 /**
827 * Specifies whether spaces around values are ignored when parsing input.
828 *
829 * @return {@code true} if spaces around values are ignored, {@code false} if they are treated as part of the value.
830 */
831 public boolean getIgnoreSurroundingSpaces() {
832 return ignoreSurroundingSpaces;
833 }
834
835 /**
836 * Gets the String to convert to and from {@code null}.
837 * <ul>
838 * <li><strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading
839 * records.</li>
840 * <li><strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li>
841 * </ul>
842 *
843 * @return the String to convert to and from {@code null}. No substitution occurs if {@code null}
844 */
845 public String getNullString() {
846 return nullString;
847 }
848
849 /**
850 * Returns the character used to encapsulate values containing special characters.
851 *
852 * @return the quoteChar character, may be {@code null}
853 */
854 public Character getQuoteCharacter() {
855 return quoteCharacter;
856 }
857
858 /**
859 * Returns the quote policy output fields.
860 *
861 * @return the quote policy
862 */
863 public QuoteMode getQuoteMode() {
864 return quoteMode;
865 }
866
867 /**
868 * Returns the record separator delimiting output records.
869 *
870 * @return the record separator
871 */
872 public String getRecordSeparator() {
873 return recordSeparator;
874 }
875
876 /**
877 * Returns whether to skip the header record.
878 *
879 * @return whether to skip the header record.
880 */
881 public boolean getSkipHeaderRecord() {
882 return skipHeaderRecord;
883 }
884
885 /**
886 * Returns whether to add a trailing delimiter.
887 *
888 * @return whether to add a trailing delimiter.
889 * @since 1.3
890 */
891 public boolean getTrailingDelimiter() {
892 return trailingDelimiter;
893 }
894
895 /**
896 * Returns whether to trim leading and trailing blanks.
897 *
898 * @return whether to trim leading and trailing blanks.
899 */
900 public boolean getTrim() {
901 return trim;
902 }
903
904 @Override
905 public int hashCode() {
906 final int prime = 31;
907 int result = 1;
908
909 result = prime * result + delimiter;
910 result = prime * result + ((quoteMode == null) ? 0 : quoteMode.hashCode());
911 result = prime * result + ((quoteCharacter == null) ? 0 : quoteCharacter.hashCode());
912 result = prime * result + ((commentMarker == null) ? 0 : commentMarker.hashCode());
913 result = prime * result + ((escapeCharacter == null) ? 0 : escapeCharacter.hashCode());
914 result = prime * result + ((nullString == null) ? 0 : nullString.hashCode());
915 result = prime * result + (ignoreSurroundingSpaces ? 1231 : 1237);
916 result = prime * result + (ignoreHeaderCase ? 1231 : 1237);
917 result = prime * result + (ignoreEmptyLines ? 1231 : 1237);
918 result = prime * result + (skipHeaderRecord ? 1231 : 1237);
919 result = prime * result + ((recordSeparator == null) ? 0 : recordSeparator.hashCode());
920 result = prime * result + Arrays.hashCode(header);
921 return result;
922 }
923
924 /**
925 * Specifies whether comments are supported by this format.
926 *
927 * Note that the comment introducer character is only recognized at the start of a line.
928 *
929 * @return {@code true} is comments are supported, {@code false} otherwise
930 */
931 public boolean isCommentMarkerSet() {
932 return commentMarker != null;
933 }
934
935 /**
936 * Returns whether escape are being processed.
937 *
938 * @return {@code true} if escapes are processed
939 */
940 public boolean isEscapeCharacterSet() {
941 return escapeCharacter != null;
942 }
943
944 /**
945 * Returns whether a nullString has been defined.
946 *
947 * @return {@code true} if a nullString is defined
948 */
949 public boolean isNullStringSet() {
950 return nullString != null;
951 }
952
953 /**
954 * Returns whether a quoteChar has been defined.
955 *
956 * @return {@code true} if a quoteChar is defined
957 */
958 public boolean isQuoteCharacterSet() {
959 return quoteCharacter != null;
960 }
961
962 /**
963 * Parses the specified content.
964 *
965 * <p>
966 * See also the various static parse methods on {@link CSVParser}.
967 * </p>
968 *
969 * @param in
970 * the input stream
971 * @return a parser over a stream of {@link CSVRecord}s.
972 * @throws IOException
973 * If an I/O error occurs
974 */
975 public CSVParser parse(final Reader in) throws IOException {
976 return new CSVParser(in, this);
977 }
978
979 /**
980 * Prints to the specified output.
981 *
982 * <p>
983 * See also {@link CSVPrinter}.
984 * </p>
985 *
986 * @param out
987 * the output.
988 * @return a printer to an output.
989 * @throws IOException
990 * thrown if the optional header cannot be printed.
991 */
992 public CSVPrinter print(final Appendable out) throws IOException {
993 return new CSVPrinter(out, this);
994 }
995
996 /**
997 * Prints to the specified output.
998 *
999 * <p>
1000 * See also {@link CSVPrinter}.
1001 * </p>
1002 *
1003 * @param out
1004 * the output.
1005 * @param charset
1006 * A charset.
1007 * @return a printer to an output.
1008 * @throws IOException
1009 * thrown if the optional header cannot be printed.
1010 * @since 1.5
1011 */
1012 @SuppressWarnings("resource")
1013 public CSVPrinter print(final File out, final Charset charset) throws IOException {
1014 // The writer will be closed when close() is called.
1015 return new CSVPrinter(new OutputStreamWriter(new FileOutputStream(out), charset), this);
1016 }
1017
1018 /**
1019 * Prints the {@code value} as the next value on the line to {@code out}. The value will be escaped or encapsulated
1020 * as needed. Useful when one wants to avoid creating CSVPrinters.
1021 *
1022 * @param value
1023 * value to output.
1024 * @param out
1025 * where to print the value.
1026 * @param newRecord
1027 * if this a new record.
1028 * @throws IOException
1029 * If an I/O error occurs.
1030 * @since 1.4
1031 */
1032 public void print(final Object value, final Appendable out, final boolean newRecord) throws IOException {
1033 // null values are considered empty
1034 // Only call CharSequence.toString() if you have to, helps GC-free use cases.
1035 CharSequence charSequence;
1036 if (value == null) {
1037 // https://issues.apache.org/jira/browse/CSV-203
1038 if (null == nullString) {
1039 charSequence = EMPTY;
1040 } else {
1041 if (QuoteMode.ALL == quoteMode) {
1042 charSequence = quoteCharacter + nullString + quoteCharacter;
1043 } else {
1044 charSequence = nullString;
1045 }
1046 }
1047 } else {
1048 charSequence = value instanceof CharSequence ? (CharSequence) value : value.toString();
1049 }
1050 charSequence = getTrim() ? trim(charSequence) : charSequence;
1051 this.print(value, charSequence, 0, charSequence.length(), out, newRecord);
1052 }
1053
1054 private void print(final Object object, final CharSequence value, final int offset, final int len,
1055 final Appendable out, final boolean newRecord) throws IOException {
1056 if (!newRecord) {
1057 out.append(getDelimiter());
1058 }
1059 if (object == null) {
1060 out.append(value);
1061 } else if (isQuoteCharacterSet()) {
1062 // the original object is needed so can check for Number
1063 printAndQuote(object, value, offset, len, out, newRecord);
1064 } else if (isEscapeCharacterSet()) {
1065 printAndEscape(value, offset, len, out);
1066 } else {
1067 out.append(value, offset, offset + len);
1068 }
1069 }
1070
1071 /**
1072 * Prints to the specified output.
1073 *
1074 * <p>
1075 * See also {@link CSVPrinter}.
1076 * </p>
1077 *
1078 * @param out
1079 * the output.
1080 * @param charset
1081 * A charset.
1082 * @return a printer to an output.
1083 * @throws IOException
1084 * thrown if the optional header cannot be printed.
1085 * @since 1.5
1086 */
1087 public CSVPrinter print(final Path out, final Charset charset) throws IOException {
1088 return print(Files.newBufferedWriter(out, charset));
1089 }
1090
1091 /*
1092 * Note: must only be called if escaping is enabled, otherwise will generate NPE
1093 */
1094 private void printAndEscape(final CharSequence value, final int offset, final int len, final Appendable out)
1095 throws IOException {
1096 int start = offset;
1097 int pos = offset;
1098 final int end = offset + len;
1099
1100 final char delim = getDelimiter();
1101 final char escape = getEscapeCharacter().charValue();
1102
1103 while (pos < end) {
1104 char c = value.charAt(pos);
1105 if (c == CR || c == LF || c == delim || c == escape) {
1106 // write out segment up until this char
1107 if (pos > start) {
1108 out.append(value, start, pos);
1109 }
1110 if (c == LF) {
1111 c = 'n';
1112 } else if (c == CR) {
1113 c = 'r';
1114 }
1115
1116 out.append(escape);
1117 out.append(c);
1118
1119 start = pos + 1; // start on the current char after this one
1120 }
1121
1122 pos++;
1123 }
1124
1125 // write last segment
1126 if (pos > start) {
1127 out.append(value, start, pos);
1128 }
1129 }
1130
1131 /*
1132 * Note: must only be called if quoting is enabled, otherwise will generate NPE
1133 */
1134 // the original object is needed so can check for Number
1135 private void printAndQuote(final Object object, final CharSequence value, final int offset, final int len,
1136 final Appendable out, final boolean newRecord) throws IOException {
1137 boolean quote = false;
1138 int start = offset;
1139 int pos = offset;
1140 final int end = offset + len;
1141
1142 final char delimChar = getDelimiter();
1143 final char quoteChar = getQuoteCharacter().charValue();
1144
1145 QuoteMode quoteModePolicy = getQuoteMode();
1146 if (quoteModePolicy == null) {
1147 quoteModePolicy = QuoteMode.MINIMAL;
1148 }
1149 switch (quoteModePolicy) {
1150 case ALL:
1151 case ALL_NON_NULL:
1152 quote = true;
1153 break;
1154 case NON_NUMERIC:
1155 quote = !(object instanceof Number);
1156 break;
1157 case NONE:
1158 // Use the existing escaping code
1159 printAndEscape(value, offset, len, out);
1160 return;
1161 case MINIMAL:
1162 if (len <= 0) {
1163 // always quote an empty token that is the first
1164 // on the line, as it may be the only thing on the
1165 // line. If it were not quoted in that case,
1166 // an empty line has no tokens.
1167 if (newRecord) {
1168 quote = true;
1169 }
1170 } else {
1171 char c = value.charAt(pos);
1172
1173 if (c <= COMMENT) {
1174 // Some other chars at the start of a value caused the parser to fail, so for now
1175 // encapsulate if we start in anything less than '#'. We are being conservative
1176 // by including the default comment char too.
1177 quote = true;
1178 } else {
1179 while (pos < end) {
1180 c = value.charAt(pos);
1181 if (c == LF || c == CR || c == quoteChar || c == delimChar) {
1182 quote = true;
1183 break;
1184 }
1185 pos++;
1186 }
1187
1188 if (!quote) {
1189 pos = end - 1;
1190 c = value.charAt(pos);
1191 // Some other chars at the end caused the parser to fail, so for now
1192 // encapsulate if we end in anything less than ' '
1193 if (c <= SP) {
1194 quote = true;
1195 }
1196 }
1197 }
1198 }
1199
1200 if (!quote) {
1201 // no encapsulation needed - write out the original value
1202 out.append(value, start, end);
1203 return;
1204 }
1205 break;
1206 default:
1207 throw new IllegalStateException("Unexpected Quote value: " + quoteModePolicy);
1208 }
1209
1210 if (!quote) {
1211 // no encapsulation needed - write out the original value
1212 out.append(value, start, end);
1213 return;
1214 }
1215
1216 // we hit something that needed encapsulation
1217 out.append(quoteChar);
1218
1219 // Pick up where we left off: pos should be positioned on the first character that caused
1220 // the need for encapsulation.
1221 while (pos < end) {
1222 final char c = value.charAt(pos);
1223 if (c == quoteChar) {
1224 // write out the chunk up until this point
1225
1226 // add 1 to the length to write out the encapsulator also
1227 out.append(value, start, pos + 1);
1228 // put the next starting position on the encapsulator so we will
1229 // write it out again with the next string (effectively doubling it)
1230 start = pos;
1231 }
1232 pos++;
1233 }
1234
1235 // write the last segment
1236 out.append(value, start, pos);
1237 out.append(quoteChar);
1238 }
1239
1240 /**
1241 * Prints to the {@link System#out}.
1242 *
1243 * <p>
1244 * See also {@link CSVPrinter}.
1245 * </p>
1246 *
1247 * @return a printer to {@link System#out}.
1248 * @throws IOException
1249 * thrown if the optional header cannot be printed.
1250 * @since 1.5
1251 */
1252 public CSVPrinter printer() throws IOException {
1253 return new CSVPrinter(System.out, this);
1254 }
1255
1256 /**
1257 * Outputs the trailing delimiter (if set) followed by the record separator (if set).
1258 *
1259 * @param out
1260 * where to write
1261 * @throws IOException
1262 * If an I/O error occurs
1263 * @since 1.4
1264 */
1265 public void println(final Appendable out) throws IOException {
1266 if (getTrailingDelimiter()) {
1267 out.append(getDelimiter());
1268 }
1269 if (recordSeparator != null) {
1270 out.append(recordSeparator);
1271 }
1272 }
1273
1274 /**
1275 * Prints the given {@code values} to {@code out} as a single record of delimiter separated values followed by the
1276 * record separator.
1277 *
1278 * <p>
1279 * The values will be quoted if needed. Quotes and new-line characters will be escaped. This method adds the record
1280 * separator to the output after printing the record, so there is no need to call {@link #println(Appendable)}.
1281 * </p>
1282 *
1283 * @param out
1284 * where to write.
1285 * @param values
1286 * values to output.
1287 * @throws IOException
1288 * If an I/O error occurs.
1289 * @since 1.4
1290 */
1291 public void printRecord(final Appendable out, final Object... values) throws IOException {
1292 for (int i = 0; i < values.length; i++) {
1293 print(values[i], out, i == 0);
1294 }
1295 println(out);
1296 }
1297
1298 @Override
1299 public String toString() {
1300 final StringBuilder sb = new StringBuilder();
1301 sb.append("Delimiter=<").append(delimiter).append('>');
1302 if (isEscapeCharacterSet()) {
1303 sb.append(' ');
1304 sb.append("Escape=<").append(escapeCharacter).append('>');
1305 }
1306 if (isQuoteCharacterSet()) {
1307 sb.append(' ');
1308 sb.append("QuoteChar=<").append(quoteCharacter).append('>');
1309 }
1310 if (isCommentMarkerSet()) {
1311 sb.append(' ');
1312 sb.append("CommentStart=<").append(commentMarker).append('>');
1313 }
1314 if (isNullStringSet()) {
1315 sb.append(' ');
1316 sb.append("NullString=<").append(nullString).append('>');
1317 }
1318 if (recordSeparator != null) {
1319 sb.append(' ');
1320 sb.append("RecordSeparator=<").append(recordSeparator).append('>');
1321 }
1322 if (getIgnoreEmptyLines()) {
1323 sb.append(" EmptyLines:ignored");
1324 }
1325 if (getIgnoreSurroundingSpaces()) {
1326 sb.append(" SurroundingSpaces:ignored");
1327 }
1328 if (getIgnoreHeaderCase()) {
1329 sb.append(" IgnoreHeaderCase:ignored");
1330 }
1331 sb.append(" SkipHeaderRecord:").append(skipHeaderRecord);
1332 if (headerComments != null) {
1333 sb.append(' ');
1334 sb.append("HeaderComments:").append(Arrays.toString(headerComments));
1335 }
1336 if (header != null) {
1337 sb.append(' ');
1338 sb.append("Header:").append(Arrays.toString(header));
1339 }
1340 return sb.toString();
1341 }
1342
1343 private String[] toStringArray(final Object[] values) {
1344 if (values == null) {
1345 return null;
1346 }
1347 final String[] strings = new String[values.length];
1348 for (int i = 0; i < values.length; i++) {
1349 final Object value = values[i];
1350 strings[i] = value == null ? null : value.toString();
1351 }
1352 return strings;
1353 }
1354
1355 private CharSequence trim(final CharSequence charSequence) {
1356 if (charSequence instanceof String) {
1357 return ((String) charSequence).trim();
1358 }
1359 final int count = charSequence.length();
1360 int len = count;
1361 int pos = 0;
1362
1363 while (pos < len && charSequence.charAt(pos) <= SP) {
1364 pos++;
1365 }
1366 while (pos < len && charSequence.charAt(len - 1) <= SP) {
1367 len--;
1368 }
1369 return pos > 0 || len < count ? charSequence.subSequence(pos, len) : charSequence;
1370 }
1371
1372 /**
1373 * Verifies the consistency of the parameters and throws an IllegalArgumentException if necessary.
1374 *
1375 * @throws IllegalArgumentException
1376 */
1377 private void validate() throws IllegalArgumentException {
1378 if (isLineBreak(delimiter)) {
1379 throw new IllegalArgumentException("The delimiter cannot be a line break");
1380 }
1381
1382 if (quoteCharacter != null && delimiter == quoteCharacter.charValue()) {
1383 throw new IllegalArgumentException(
1384 "The quoteChar character and the delimiter cannot be the same ('" + quoteCharacter + "')");
1385 }
1386
1387 if (escapeCharacter != null && delimiter == escapeCharacter.charValue()) {
1388 throw new IllegalArgumentException(
1389 "The escape character and the delimiter cannot be the same ('" + escapeCharacter + "')");
1390 }
1391
1392 if (commentMarker != null && delimiter == commentMarker.charValue()) {
1393 throw new IllegalArgumentException(
1394 "The comment start character and the delimiter cannot be the same ('" + commentMarker + "')");
1395 }
1396
1397 if (quoteCharacter != null && quoteCharacter.equals(commentMarker)) {
1398 throw new IllegalArgumentException(
1399 "The comment start character and the quoteChar cannot be the same ('" + commentMarker + "')");
1400 }
1401
1402 if (escapeCharacter != null && escapeCharacter.equals(commentMarker)) {
1403 throw new IllegalArgumentException(
1404 "The comment start and the escape character cannot be the same ('" + commentMarker + "')");
1405 }
1406
1407 if (escapeCharacter == null && quoteMode == QuoteMode.NONE) {
1408 throw new IllegalArgumentException("No quotes mode set but no escape character is set");
1409 }
1410
1411 // validate header
1412 if (header != null) {
1413 final Set<String> dupCheck = new HashSet<>();
1414 for (final String hdr : header) {
1415 if (!dupCheck.add(hdr)) {
1416 throw new IllegalArgumentException(
1417 "The header contains a duplicate entry: '" + hdr + "' in " + Arrays.toString(header));
1418 }
1419 }
1420 }
1421 }
1422
1423 /**
1424 * Returns a new {@code CSVFormat} with the missing column names behavior of the format set to {@code true}
1425 *
1426 * @return A new CSVFormat that is equal to this but with the specified missing column names behavior.
1427 * @see #withAllowMissingColumnNames(boolean)
1428 * @since 1.1
1429 */
1430 public CSVFormat withAllowMissingColumnNames() {
1431 return this.withAllowMissingColumnNames(true);
1432 }
1433
1434 /**
1435 * Returns a new {@code CSVFormat} with the missing column names behavior of the format set to the given value.
1436 *
1437 * @param allowMissingColumnNames
1438 * the missing column names behavior, {@code true} to allow missing column names in the header line,
1439 * {@code false} to cause an {@link IllegalArgumentException} to be thrown.
1440 * @return A new CSVFormat that is equal to this but with the specified missing column names behavior.
1441 */
1442 public CSVFormat withAllowMissingColumnNames(final boolean allowMissingColumnNames) {
1443 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1444 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1445 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
1446 }
1447
1448 /**
1449 * Returns a new {@code CSVFormat} with whether to flush on close.
1450 *
1451 * @param autoFlush
1452 * whether to flush on close.
1453 *
1454 * @return A new CSVFormat that is equal to this but with the specified autoFlush setting.
1455 * @since 1.6
1456 */
1457 public CSVFormat withAutoFlush(final boolean autoFlush) {
1458 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1459 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1460 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
1461 }
1462
1463 /**
1464 * Returns a new {@code CSVFormat} with the comment start marker of the format set to the specified character.
1465 *
1466 * Note that the comment start character is only recognized at the start of a line.
1467 *
1468 * @param commentMarker
1469 * the comment start marker
1470 * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker
1471 * @throws IllegalArgumentException
1472 * thrown if the specified character is a line break
1473 */
1474 public CSVFormat withCommentMarker(final char commentMarker) {
1475 return withCommentMarker(Character.valueOf(commentMarker));
1476 }
1477
1478 /**
1479 * Returns a new {@code CSVFormat} with the comment start marker of the format set to the specified character.
1480 *
1481 * Note that the comment start character is only recognized at the start of a line.
1482 *
1483 * @param commentMarker
1484 * the comment start marker, use {@code null} to disable
1485 * @return A new CSVFormat that is equal to this one but with the specified character as the comment start marker
1486 * @throws IllegalArgumentException
1487 * thrown if the specified character is a line break
1488 */
1489 public CSVFormat withCommentMarker(final Character commentMarker) {
1490 if (isLineBreak(commentMarker)) {
1491 throw new IllegalArgumentException("The comment start marker character cannot be a line break");
1492 }
1493 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1494 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1495 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
1496 }
1497
1498 /**
1499 * Returns a new {@code CSVFormat} with the delimiter of the format set to the specified character.
1500 *
1501 * @param delimiter
1502 * the delimiter character
1503 * @return A new CSVFormat that is equal to this with the specified character as delimiter
1504 * @throws IllegalArgumentException
1505 * thrown if the specified character is a line break
1506 */
1507 public CSVFormat withDelimiter(final char delimiter) {
1508 if (isLineBreak(delimiter)) {
1509 throw new IllegalArgumentException("The delimiter cannot be a line break");
1510 }
1511 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1512 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1513 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
1514 }
1515
1516 /**
1517 * Returns a new {@code CSVFormat} with the escape character of the format set to the specified character.
1518 *
1519 * @param escape
1520 * the escape character
1521 * @return A new CSVFormat that is equal to his but with the specified character as the escape character
1522 * @throws IllegalArgumentException
1523 * thrown if the specified character is a line break
1524 */
1525 public CSVFormat withEscape(final char escape) {
1526 return withEscape(Character.valueOf(escape));
1527 }
1528
1529 /**
1530 * Returns a new {@code CSVFormat} with the escape character of the format set to the specified character.
1531 *
1532 * @param escape
1533 * the escape character, use {@code null} to disable
1534 * @return A new CSVFormat that is equal to this but with the specified character as the escape character
1535 * @throws IllegalArgumentException
1536 * thrown if the specified character is a line break
1537 */
1538 public CSVFormat withEscape(final Character escape) {
1539 if (isLineBreak(escape)) {
1540 throw new IllegalArgumentException("The escape character cannot be a line break");
1541 }
1542 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escape, ignoreSurroundingSpaces,
1543 ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord,
1544 allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
1545 }
1546
1547 /**
1548 * Returns a new {@code CSVFormat} using the first record as header.
1549 *
1550 * <p>
1551 * Calling this method is equivalent to calling:
1552 * </p>
1553 *
1554 * <pre>
1555 * CSVFormat format = aFormat.withHeader().withSkipHeaderRecord();
1556 * </pre>
1557 *
1558 * @return A new CSVFormat that is equal to this but using the first record as header.
1559 * @see #withSkipHeaderRecord(boolean)
1560 * @see #withHeader(String...)
1561 * @since 1.3
1562 */
1563 public CSVFormat withFirstRecordAsHeader() {
1564 return withHeader().withSkipHeaderRecord();
1565 }
1566
1567 /**
1568 * Returns a new {@code CSVFormat} with the header of the format defined by the enum class.
1569 *
1570 * <p>
1571 * Example:
1572 * </p>
1573 * <pre>
1574 * public enum Header {
1575 * Name, Email, Phone
1576 * }
1577 *
1578 * CSVFormat format = aformat.withHeader(Header.class);
1579 * </pre>
1580 * <p>
1581 * The header is also used by the {@link CSVPrinter}.
1582 * </p>
1583 *
1584 * @param headerEnum
1585 * the enum defining the header, {@code null} if disabled, empty if parsed automatically, user specified
1586 * otherwise.
1587 *
1588 * @return A new CSVFormat that is equal to this but with the specified header
1589 * @see #withHeader(String...)
1590 * @see #withSkipHeaderRecord(boolean)
1591 * @since 1.3
1592 */
1593 public CSVFormat withHeader(final Class<? extends Enum<?>> headerEnum) {
1594 String[] header = null;
1595 if (headerEnum != null) {
1596 final Enum<?>[] enumValues = headerEnum.getEnumConstants();
1597 header = new String[enumValues.length];
1598 for (int i = 0; i < enumValues.length; i++) {
1599 header[i] = enumValues[i].name();
1600 }
1601 }
1602 return withHeader(header);
1603 }
1604
1605 /**
1606 * Returns a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can
1607 * either be parsed automatically from the input file with:
1608 *
1609 * <pre>
1610 * CSVFormat format = aformat.withHeader();
1611 * </pre>
1612 *
1613 * or specified manually with:
1614 *
1615 * <pre>
1616 * CSVFormat format = aformat.withHeader(resultSet);
1617 * </pre>
1618 * <p>
1619 * The header is also used by the {@link CSVPrinter}.
1620 * </p>
1621 *
1622 * @param resultSet
1623 * the resultSet for the header, {@code null} if disabled, empty if parsed automatically, user specified
1624 * otherwise.
1625 *
1626 * @return A new CSVFormat that is equal to this but with the specified header
1627 * @throws SQLException
1628 * SQLException if a database access error occurs or this method is called on a closed result set.
1629 * @since 1.1
1630 */
1631 public CSVFormat withHeader(final ResultSet resultSet) throws SQLException {
1632 return withHeader(resultSet != null ? resultSet.getMetaData() : null);
1633 }
1634
1635 /**
1636 * Returns a new {@code CSVFormat} with the header of the format set from the result set metadata. The header can
1637 * either be parsed automatically from the input file with:
1638 *
1639 * <pre>
1640 * CSVFormat format = aformat.withHeader();
1641 * </pre>
1642 *
1643 * or specified manually with:
1644 *
1645 * <pre>
1646 * CSVFormat format = aformat.withHeader(metaData);
1647 * </pre>
1648 * <p>
1649 * The header is also used by the {@link CSVPrinter}.
1650 * </p>
1651 *
1652 * @param metaData
1653 * the metaData for the header, {@code null} if disabled, empty if parsed automatically, user specified
1654 * otherwise.
1655 *
1656 * @return A new CSVFormat that is equal to this but with the specified header
1657 * @throws SQLException
1658 * SQLException if a database access error occurs or this method is called on a closed result set.
1659 * @since 1.1
1660 */
1661 public CSVFormat withHeader(final ResultSetMetaData metaData) throws SQLException {
1662 String[] labels = null;
1663 if (metaData != null) {
1664 final int columnCount = metaData.getColumnCount();
1665 labels = new String[columnCount];
1666 for (int i = 0; i < columnCount; i++) {
1667 labels[i] = metaData.getColumnLabel(i + 1);
1668 }
1669 }
1670 return withHeader(labels);
1671 }
1672
1673 /**
1674 * Returns a new {@code CSVFormat} with the header of the format set to the given values. The header can either be
1675 * parsed automatically from the input file with:
1676 *
1677 * <pre>
1678 * CSVFormat format = aformat.withHeader();
1679 * </pre>
1680 *
1681 * or specified manually with:
1682 *
1683 * <pre>
1684 * CSVFormat format = aformat.withHeader(&quot;name&quot;, &quot;email&quot;, &quot;phone&quot;);
1685 * </pre>
1686 * <p>
1687 * The header is also used by the {@link CSVPrinter}.
1688 * </p>
1689 *
1690 * @param header
1691 * the header, {@code null} if disabled, empty if parsed automatically, user specified otherwise.
1692 *
1693 * @return A new CSVFormat that is equal to this but with the specified header
1694 * @see #withSkipHeaderRecord(boolean)
1695 */
1696 public CSVFormat withHeader(final String... header) {
1697 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1698 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1699 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
1700 }
1701
1702 /**
1703 * Returns a new {@code CSVFormat} with the header comments of the format set to the given values. The comments will
1704 * be printed first, before the headers. This setting is ignored by the parser.
1705 *
1706 * <pre>
1707 * CSVFormat format = aformat.withHeaderComments(&quot;Generated by Apache Commons CSV 1.1.&quot;, new Date());
1708 * </pre>
1709 *
1710 * @param headerComments
1711 * the headerComments which will be printed by the Printer before the actual CSV data.
1712 *
1713 * @return A new CSVFormat that is equal to this but with the specified header
1714 * @see #withSkipHeaderRecord(boolean)
1715 * @since 1.1
1716 */
1717 public CSVFormat withHeaderComments(final Object... headerComments) {
1718 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1719 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1720 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
1721 }
1722
1723 /**
1724 * Returns a new {@code CSVFormat} with the empty line skipping behavior of the format set to {@code true}.
1725 *
1726 * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior.
1727 * @since {@link #withIgnoreEmptyLines(boolean)}
1728 * @since 1.1
1729 */
1730 public CSVFormat withIgnoreEmptyLines() {
1731 return this.withIgnoreEmptyLines(true);
1732 }
1733
1734 /**
1735 * Returns a new {@code CSVFormat} with the empty line skipping behavior of the format set to the given value.
1736 *
1737 * @param ignoreEmptyLines
1738 * the empty line skipping behavior, {@code true} to ignore the empty lines between the records,
1739 * {@code false} to translate empty lines to empty records.
1740 * @return A new CSVFormat that is equal to this but with the specified empty line skipping behavior.
1741 */
1742 public CSVFormat withIgnoreEmptyLines(final boolean ignoreEmptyLines) {
1743 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1744 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1745 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
1746 }
1747
1748 /**
1749 * Returns a new {@code CSVFormat} with the header ignore case behavior set to {@code true}.
1750 *
1751 * @return A new CSVFormat that will ignore case header name.
1752 * @see #withIgnoreHeaderCase(boolean)
1753 * @since 1.3
1754 */
1755 public CSVFormat withIgnoreHeaderCase() {
1756 return this.withIgnoreHeaderCase(true);
1757 }
1758
1759 /**
1760 * Returns a new {@code CSVFormat} with whether header names should be accessed ignoring case.
1761 *
1762 * @param ignoreHeaderCase
1763 * the case mapping behavior, {@code true} to access name/values, {@code false} to leave the mapping as
1764 * is.
1765 * @return A new CSVFormat that will ignore case header name if specified as {@code true}
1766 * @since 1.3
1767 */
1768 public CSVFormat withIgnoreHeaderCase(final boolean ignoreHeaderCase) {
1769 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1770 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1771 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
1772 }
1773
1774 /**
1775 * Returns a new {@code CSVFormat} with the trimming behavior of the format set to {@code true}.
1776 *
1777 * @return A new CSVFormat that is equal to this but with the specified trimming behavior.
1778 * @see #withIgnoreSurroundingSpaces(boolean)
1779 * @since 1.1
1780 */
1781 public CSVFormat withIgnoreSurroundingSpaces() {
1782 return this.withIgnoreSurroundingSpaces(true);
1783 }
1784
1785 /**
1786 * Returns a new {@code CSVFormat} with the trimming behavior of the format set to the given value.
1787 *
1788 * @param ignoreSurroundingSpaces
1789 * the trimming behavior, {@code true} to remove the surrounding spaces, {@code false} to leave the
1790 * spaces as is.
1791 * @return A new CSVFormat that is equal to this but with the specified trimming behavior.
1792 */
1793 public CSVFormat withIgnoreSurroundingSpaces(final boolean ignoreSurroundingSpaces) {
1794 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1795 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1796 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
1797 }
1798
1799 /**
1800 * Returns a new {@code CSVFormat} with conversions to and from null for strings on input and output.
1801 * <ul>
1802 * <li><strong>Reading:</strong> Converts strings equal to the given {@code nullString} to {@code null} when reading
1803 * records.</li>
1804 * <li><strong>Writing:</strong> Writes {@code null} as the given {@code nullString} when writing records.</li>
1805 * </ul>
1806 *
1807 * @param nullString
1808 * the String to convert to and from {@code null}. No substitution occurs if {@code null}
1809 *
1810 * @return A new CSVFormat that is equal to this but with the specified null conversion string.
1811 */
1812 public CSVFormat withNullString(final String nullString) {
1813 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1814 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1815 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
1816 }
1817
1818 /**
1819 * Returns a new {@code CSVFormat} with the quoteChar of the format set to the specified character.
1820 *
1821 * @param quoteChar
1822 * the quoteChar character
1823 * @return A new CSVFormat that is equal to this but with the specified character as quoteChar
1824 * @throws IllegalArgumentException
1825 * thrown if the specified character is a line break
1826 */
1827 public CSVFormat withQuote(final char quoteChar) {
1828 return withQuote(Character.valueOf(quoteChar));
1829 }
1830
1831 /**
1832 * Returns a new {@code CSVFormat} with the quoteChar of the format set to the specified character.
1833 *
1834 * @param quoteChar
1835 * the quoteChar character, use {@code null} to disable
1836 * @return A new CSVFormat that is equal to this but with the specified character as quoteChar
1837 * @throws IllegalArgumentException
1838 * thrown if the specified character is a line break
1839 */
1840 public CSVFormat withQuote(final Character quoteChar) {
1841 if (isLineBreak(quoteChar)) {
1842 throw new IllegalArgumentException("The quoteChar cannot be a line break");
1843 }
1844 return new CSVFormat(delimiter, quoteChar, quoteMode, commentMarker, escapeCharacter, ignoreSurroundingSpaces,
1845 ignoreEmptyLines, recordSeparator, nullString, headerComments, header, skipHeaderRecord,
1846 allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
1847 }
1848
1849 /**
1850 * Returns a new {@code CSVFormat} with the output quote policy of the format set to the specified value.
1851 *
1852 * @param quoteModePolicy
1853 * the quote policy to use for output.
1854 *
1855 * @return A new CSVFormat that is equal to this but with the specified quote policy
1856 */
1857 public CSVFormat withQuoteMode(final QuoteMode quoteModePolicy) {
1858 return new CSVFormat(delimiter, quoteCharacter, quoteModePolicy, commentMarker, escapeCharacter,
1859 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1860 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
1861 }
1862
1863 /**
1864 * Returns a new {@code CSVFormat} with the record separator of the format set to the specified character.
1865 *
1866 * <p>
1867 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently
1868 * only works for inputs with '\n', '\r' and "\r\n"
1869 * </p>
1870 *
1871 * @param recordSeparator
1872 * the record separator to use for output.
1873 *
1874 * @return A new CSVFormat that is equal to this but with the specified output record separator
1875 */
1876 public CSVFormat withRecordSeparator(final char recordSeparator) {
1877 return withRecordSeparator(String.valueOf(recordSeparator));
1878 }
1879
1880 /**
1881 * Returns a new {@code CSVFormat} with the record separator of the format set to the specified String.
1882 *
1883 * <p>
1884 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently
1885 * only works for inputs with '\n', '\r' and "\r\n"
1886 * </p>
1887 *
1888 * @param recordSeparator
1889 * the record separator to use for output.
1890 *
1891 * @return A new CSVFormat that is equal to this but with the specified output record separator
1892 * @throws IllegalArgumentException
1893 * if recordSeparator is none of CR, LF or CRLF
1894 */
1895 public CSVFormat withRecordSeparator(final String recordSeparator) {
1896 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1897 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1898 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
1899 }
1900
1901 /**
1902 * Returns a new {@code CSVFormat} with skipping the header record set to {@code true}.
1903 *
1904 * @return A new CSVFormat that is equal to this but with the the specified skipHeaderRecord setting.
1905 * @see #withSkipHeaderRecord(boolean)
1906 * @see #withHeader(String...)
1907 * @since 1.1
1908 */
1909 public CSVFormat withSkipHeaderRecord() {
1910 return this.withSkipHeaderRecord(true);
1911 }
1912
1913 /**
1914 * Returns a new {@code CSVFormat} with whether to skip the header record.
1915 *
1916 * @param skipHeaderRecord
1917 * whether to skip the header record.
1918 *
1919 * @return A new CSVFormat that is equal to this but with the the specified skipHeaderRecord setting.
1920 * @see #withHeader(String...)
1921 */
1922 public CSVFormat withSkipHeaderRecord(final boolean skipHeaderRecord) {
1923 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1924 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1925 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
1926 }
1927
1928 /**
1929 * Returns a new {@code CSVFormat} with the record separator of the format set to the operating system's line
1930 * separator string, typically CR+LF on Windows and LF on Linux.
1931 *
1932 * <p>
1933 * <strong>Note:</strong> This setting is only used during printing and does not affect parsing. Parsing currently
1934 * only works for inputs with '\n', '\r' and "\r\n"
1935 * </p>
1936 *
1937 * @return A new CSVFormat that is equal to this but with the operating system's line separator stringr
1938 * @since 1.6
1939 */
1940 public CSVFormat withSystemRecordSeparator() {
1941 return withRecordSeparator(System.getProperty("line.separator"));
1942 }
1943
1944 /**
1945 * Returns a new {@code CSVFormat} to add a trailing delimiter.
1946 *
1947 * @return A new CSVFormat that is equal to this but with the trailing delimiter setting.
1948 * @since 1.3
1949 */
1950 public CSVFormat withTrailingDelimiter() {
1951 return withTrailingDelimiter(true);
1952 }
1953
1954 /**
1955 * Returns a new {@code CSVFormat} with whether to add a trailing delimiter.
1956 *
1957 * @param trailingDelimiter
1958 * whether to add a trailing delimiter.
1959 *
1960 * @return A new CSVFormat that is equal to this but with the specified trailing delimiter setting.
1961 * @since 1.3
1962 */
1963 public CSVFormat withTrailingDelimiter(final boolean trailingDelimiter) {
1964 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1965 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1966 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
1967 }
1968
1969 /**
1970 * Returns a new {@code CSVFormat} to trim leading and trailing blanks.
1971 *
1972 * @return A new CSVFormat that is equal to this but with the trim setting on.
1973 * @since 1.3
1974 */
1975 public CSVFormat withTrim() {
1976 return withTrim(true);
1977 }
1978
1979 /**
1980 * Returns a new {@code CSVFormat} with whether to trim leading and trailing blanks.
1981 *
1982 * @param trim
1983 * whether to trim leading and trailing blanks.
1984 *
1985 * @return A new CSVFormat that is equal to this but with the specified trim setting.
1986 * @since 1.3
1987 */
1988 public CSVFormat withTrim(final boolean trim) {
1989 return new CSVFormat(delimiter, quoteCharacter, quoteMode, commentMarker, escapeCharacter,
1990 ignoreSurroundingSpaces, ignoreEmptyLines, recordSeparator, nullString, headerComments, header,
1991 skipHeaderRecord, allowMissingColumnNames, ignoreHeaderCase, trim, trailingDelimiter, autoFlush);
1992 }
1993 }