001package com.streamconverter.command.impl.csv; 002 003import com.streamconverter.command.AbstractStreamCommand; 004import com.streamconverter.path.CSVPath; 005import java.io.BufferedReader; 006import java.io.IOException; 007import java.io.InputStream; 008import java.io.InputStreamReader; 009import java.io.OutputStream; 010import java.io.OutputStreamWriter; 011import java.io.Writer; 012import java.nio.charset.StandardCharsets; 013import java.util.ArrayList; 014import java.util.Arrays; 015import java.util.List; 016 017/** 018 * CSV Filter Command Class 019 * 020 * <p>This class implements pure data extraction from CSV using column selectors. Unlike 021 * CsvNavigateCommand which applies transformations, CsvFilterCommand only extracts/filters columns 022 * based on specified column names or indices without any modifications. 023 * 024 * <p>Features: - Extract specific columns using column names or indices - Preserve exact data 025 * values from extracted columns - Memory-efficient streaming processing - Support for multiple 026 * column selection 027 */ 028public class CsvFilterCommand extends AbstractStreamCommand { 029 030 private final CSVPath combinedSelector; 031 private final boolean hasHeader; 032 033 /** 034 * Constructor for CSV filtering with single typed column selector. 035 * 036 * @param columnSelector the typed CSVPath to extract 037 * @param hasHeader whether the CSV has a header row 038 * @throws IllegalArgumentException if columnSelector is null 039 */ 040 public CsvFilterCommand(CSVPath columnSelector, boolean hasHeader) { 041 if (columnSelector == null) { 042 throw new IllegalArgumentException("Column selector cannot be null"); 043 } 044 this.combinedSelector = columnSelector; 045 Arrays.asList(columnSelector); 046 Arrays.asList(columnSelector.toString()); 047 this.hasHeader = hasHeader; 048 } 049 050 /** 051 * Factory method for CSV filtering with single typed column selector (assumes header exists). 052 * 053 * @param columnSelector the typed CSVPath to extract 054 * @return a CsvFilterCommand instance 055 */ 056 public static CsvFilterCommand create(CSVPath columnSelector) { 057 return create(columnSelector, true); 058 } 059 060 /** 061 * Factory method for CSV filtering with single typed column selector. 062 * 063 * @param columnSelector the typed CSVPath to extract 064 * @param hasHeader whether the CSV has a header row 065 * @return a CsvFilterCommand instance 066 */ 067 public static CsvFilterCommand create(CSVPath columnSelector, boolean hasHeader) { 068 return new CsvFilterCommand(columnSelector, hasHeader); 069 } 070 071 @Override 072 protected String getCommandDetails() { 073 return String.format( 074 "CsvFilterCommand(columns=%s, hasHeader=%s)", combinedSelector.toString(), hasHeader); 075 } 076 077 @Override 078 protected void executeInternal(InputStream inputStream, OutputStream outputStream) 079 throws IOException { 080 try (BufferedReader reader = 081 new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8)); 082 Writer writer = new OutputStreamWriter(outputStream, StandardCharsets.UTF_8)) { 083 084 List<Integer> columnIndices = new ArrayList<>(); 085 String[] headers = null; 086 087 // Read first line 088 String firstLine = reader.readLine(); 089 if (firstLine == null) { 090 // Empty CSV 091 writer.flush(); 092 return; 093 } 094 095 String[] firstRowFields = parseCsvLine(firstLine); 096 097 if (hasHeader) { 098 headers = firstRowFields; 099 // Map column selectors to indices 100 columnIndices = mapColumnSelectorsToIndices(combinedSelector, headers); 101 102 // Write filtered header 103 writeFilteredRow(writer, firstRowFields, columnIndices); 104 writer.write(System.lineSeparator()); 105 } else { 106 // No header - column selectors must be numeric indices 107 columnIndices = parseNumericColumnSelectors(combinedSelector, firstRowFields.length); 108 109 // Write filtered first data row 110 writeFilteredRow(writer, firstRowFields, columnIndices); 111 writer.write(System.lineSeparator()); 112 } 113 114 // Process remaining data rows 115 String line; 116 while ((line = reader.readLine()) != null) { 117 String[] fields = parseCsvLine(line); 118 writeFilteredRow(writer, fields, columnIndices); 119 writer.write(System.lineSeparator()); 120 } 121 122 writer.flush(); 123 } 124 } 125 126 /** 127 * Map column selectors to column indices using Don't Ask Tell pattern 128 * 129 * @param csvPath the CSV path containing multiple selectors 130 * @param headers array of header names 131 * @return list of column indices 132 * @throws IllegalArgumentException if no columns found 133 */ 134 private List<Integer> mapColumnSelectorsToIndices(CSVPath csvPath, String[] headers) { 135 List<Integer> indices = csvPath.findMatchingIndices(headers); 136 if (indices.isEmpty()) { 137 throw new IllegalArgumentException("No columns found for selector: " + csvPath.toString()); 138 } 139 return indices; 140 } 141 142 /** 143 * Parse numeric column selectors when no header exists using Don't Ask Tell pattern 144 * 145 * @param csvPath the CSV path containing column selectors 146 * @param totalColumns total number of columns available 147 * @return list of column indices 148 * @throws IllegalArgumentException if no valid indices found 149 */ 150 private List<Integer> parseNumericColumnSelectors(CSVPath csvPath, int totalColumns) { 151 List<Integer> indices = csvPath.findMatchingIndices(totalColumns); 152 if (indices.isEmpty()) { 153 throw new IllegalArgumentException( 154 "Column selector must be numeric when no header: " + csvPath.toString()); 155 } 156 return indices; 157 } 158 159 /** 160 * Parse a CSV line into fields (simple implementation) 161 * 162 * @param line CSV line 163 * @return array of field values 164 */ 165 private String[] parseCsvLine(String line) { 166 // Simple CSV parsing - handles basic comma separation 167 // For production, consider using a proper CSV library 168 if (line == null || line.isEmpty()) { 169 return new String[0]; 170 } 171 172 List<String> fields = new ArrayList<>(); 173 StringBuilder current = new StringBuilder(); 174 boolean inQuotes = false; 175 176 for (int i = 0; i < line.length(); i++) { 177 char c = line.charAt(i); 178 179 if (c == '"') { 180 inQuotes = !inQuotes; 181 } else if (c == ',' && !inQuotes) { 182 fields.add(current.toString().trim()); 183 current.setLength(0); 184 } else { 185 current.append(c); 186 } 187 } 188 189 fields.add(current.toString().trim()); 190 return fields.toArray(new String[0]); 191 } 192 193 /** 194 * Write filtered row with only selected columns 195 * 196 * @param writer output writer 197 * @param fields all field values 198 * @param columnIndices indices of columns to include 199 * @throws IOException if writing fails 200 */ 201 private void writeFilteredRow(Writer writer, String[] fields, List<Integer> columnIndices) 202 throws IOException { 203 for (int i = 0; i < columnIndices.size(); i++) { 204 if (i > 0) { 205 writer.write(","); 206 } 207 208 int columnIndex = columnIndices.get(i); 209 if (columnIndex < fields.length) { 210 String field = fields[columnIndex]; 211 // Quote field if it contains comma or quotes 212 if (field.contains(",") || field.contains("\"")) { 213 writer.write("\"" + field.replace("\"", "\"\"") + "\""); 214 } else { 215 writer.write(field); 216 } 217 } else { 218 // Column doesn't exist in this row - write empty field 219 writer.write(""); 220 } 221 } 222 } 223}