001package com.streamconverter.command.impl.csv; 002 003import com.opencsv.CSVReader; 004import com.opencsv.CSVWriter; 005import com.opencsv.exceptions.CsvValidationException; 006import com.streamconverter.command.AbstractStreamCommand; 007import com.streamconverter.path.CSVPath; 008import java.io.IOException; 009import java.io.InputStream; 010import java.io.InputStreamReader; 011import java.io.OutputStream; 012import java.io.OutputStreamWriter; 013import java.nio.charset.StandardCharsets; 014import java.util.List; 015 016/** 017 * CSV Filter Command Class 018 * 019 * <p>This class implements pure data extraction from CSV using column selectors. Unlike 020 * CsvNavigateCommand which applies transformations, CsvFilterCommand only extracts/filters columns 021 * based on specified column names or indices without any modifications. 022 * 023 * <p>Features: - Extract specific columns using column names or indices - Preserve exact data 024 * values from extracted columns - Memory-efficient streaming processing - Support for multiple 025 * column selection 026 */ 027public class CsvFilterCommand extends AbstractStreamCommand { 028 029 private final CSVPath combinedSelector; 030 private final boolean hasHeader; 031 032 /** 033 * Constructor for CSV filtering with single typed column selector. 034 * 035 * @param columnSelector the typed CSVPath to extract 036 * @param hasHeader whether the CSV has a header row 037 * @throws IllegalArgumentException if columnSelector is null 038 */ 039 private CsvFilterCommand(CSVPath columnSelector, boolean hasHeader) { 040 this.combinedSelector = columnSelector; 041 this.hasHeader = hasHeader; 042 } 043 044 /** 045 * Factory method for CSV filtering with single typed column selector (assumes header exists). 046 * 047 * @param columnSelector the typed CSVPath to extract 048 * @return a CsvFilterCommand instance 049 * @throws IllegalArgumentException if columnSelector is null 050 */ 051 public static CsvFilterCommand create(CSVPath columnSelector) { 052 return create(columnSelector, true); 053 } 054 055 /** 056 * Factory method for CSV filtering with single typed column selector. 057 * 058 * @param columnSelector the typed CSVPath to extract 059 * @param hasHeader whether the CSV has a header row 060 * @return a CsvFilterCommand instance 061 * @throws IllegalArgumentException if columnSelector is null 062 */ 063 public static CsvFilterCommand create(CSVPath columnSelector, boolean hasHeader) { 064 if (columnSelector == null) { 065 throw new IllegalArgumentException("Column selector cannot be null"); 066 } 067 return new CsvFilterCommand(columnSelector, hasHeader); 068 } 069 070 @Override 071 public void execute(InputStream inputStream, OutputStream outputStream) throws IOException { 072 try (CSVReader csvReader = 073 new CSVReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8)); 074 CSVWriter csvWriter = 075 new CSVWriter( 076 new OutputStreamWriter(outputStream, StandardCharsets.UTF_8), 077 CSVWriter.DEFAULT_SEPARATOR, 078 CSVWriter.DEFAULT_QUOTE_CHARACTER, 079 // RFC 4180 §5: only doubled-quote escaping, no backslash escape 080 CSVWriter.NO_ESCAPE_CHARACTER, 081 // RFC4180_LINE_END (\r\n) per RFC 4180 §2. 082 CSVWriter.RFC4180_LINE_END)) { 083 084 List<Integer> columnIndices; 085 086 String[] firstRow = csvReader.readNext(); 087 if (firstRow == null) { 088 csvWriter.flush(); 089 return; 090 } 091 092 if (hasHeader) { 093 columnIndices = mapColumnSelectorsToIndices(combinedSelector, firstRow); 094 // Write filtered header 095 writeFilteredRow(csvWriter, firstRow, columnIndices); 096 } else { 097 // No header — column selectors must be numeric indices 098 columnIndices = parseNumericColumnSelectors(combinedSelector, firstRow.length); 099 // Write filtered first data row 100 writeFilteredRow(csvWriter, firstRow, columnIndices); 101 } 102 103 // Process remaining rows 104 String[] row; 105 while ((row = csvReader.readNext()) != null) { 106 writeFilteredRow(csvWriter, row, columnIndices); 107 } 108 109 csvWriter.flush(); 110 } catch (CsvValidationException e) { 111 throw new IOException("Failed to parse CSV: " + e.getMessage(), e); 112 } 113 } 114 115 /** 116 * Map column selectors to column indices using Don't Ask Tell pattern 117 * 118 * @param csvPath the CSV path containing multiple selectors 119 * @param headers array of header names 120 * @return list of column indices 121 * @throws IllegalArgumentException if no columns found 122 */ 123 private List<Integer> mapColumnSelectorsToIndices(CSVPath csvPath, String[] headers) { 124 List<Integer> indices = csvPath.findMatchingIndices(headers); 125 if (indices.isEmpty()) { 126 throw new IllegalArgumentException("No columns found for selector: " + csvPath.toString()); 127 } 128 return indices; 129 } 130 131 /** 132 * Parse numeric column selectors when no header exists using Don't Ask Tell pattern 133 * 134 * @param csvPath the CSV path containing column selectors 135 * @param totalColumns total number of columns available 136 * @return list of column indices 137 * @throws IllegalArgumentException if no valid indices found 138 */ 139 private List<Integer> parseNumericColumnSelectors(CSVPath csvPath, int totalColumns) { 140 List<Integer> indices = csvPath.findMatchingIndices(totalColumns); 141 if (indices.isEmpty()) { 142 throw new IllegalArgumentException( 143 "Column selector must be numeric when no header: " + csvPath.toString()); 144 } 145 return indices; 146 } 147 148 /** 149 * Write filtered row with only selected columns. Quotes are applied only when required by RFC 150 * 4180 (fields containing commas, quotes, or newlines). 151 * 152 * @param csvWriter output writer 153 * @param fields all field values 154 * @param columnIndices indices of columns to include 155 */ 156 private void writeFilteredRow(CSVWriter csvWriter, String[] fields, List<Integer> columnIndices) { 157 String[] filteredRow = new String[columnIndices.size()]; 158 for (int i = 0; i < columnIndices.size(); i++) { 159 int columnIndex = columnIndices.get(i); 160 filteredRow[i] = columnIndex < fields.length ? fields[columnIndex] : ""; 161 } 162 // applyQuotesToAll=false: only quote fields that contain delimiters or quotes 163 csvWriter.writeNext(filteredRow, false); 164 } 165}