001package com.streamconverter.command.impl.csv;
002
003import com.opencsv.CSVReader;
004import com.opencsv.CSVWriter;
005import com.opencsv.exceptions.CsvValidationException;
006import com.streamconverter.command.AbstractStreamCommand;
007import com.streamconverter.path.CSVPath;
008import java.io.IOException;
009import java.io.InputStream;
010import java.io.InputStreamReader;
011import java.io.OutputStream;
012import java.io.OutputStreamWriter;
013import java.nio.charset.StandardCharsets;
014import java.util.List;
015
016/**
017 * CSV Filter Command Class
018 *
019 * <p>This class implements pure data extraction from CSV using column selectors. Unlike
020 * CsvNavigateCommand which applies transformations, CsvFilterCommand only extracts/filters columns
021 * based on specified column names or indices without any modifications.
022 *
023 * <p>Features: - Extract specific columns using column names or indices - Preserve exact data
024 * values from extracted columns - Memory-efficient streaming processing - Support for multiple
025 * column selection
026 */
027public class CsvFilterCommand extends AbstractStreamCommand {
028
029  private final CSVPath combinedSelector;
030  private final boolean hasHeader;
031
032  /**
033   * Constructor for CSV filtering with single typed column selector.
034   *
035   * @param columnSelector the typed CSVPath to extract
036   * @param hasHeader whether the CSV has a header row
037   * @throws IllegalArgumentException if columnSelector is null
038   */
039  private CsvFilterCommand(CSVPath columnSelector, boolean hasHeader) {
040    this.combinedSelector = columnSelector;
041    this.hasHeader = hasHeader;
042  }
043
044  /**
045   * Factory method for CSV filtering with single typed column selector (assumes header exists).
046   *
047   * @param columnSelector the typed CSVPath to extract
048   * @return a CsvFilterCommand instance
049   * @throws IllegalArgumentException if columnSelector is null
050   */
051  public static CsvFilterCommand create(CSVPath columnSelector) {
052    return create(columnSelector, true);
053  }
054
055  /**
056   * Factory method for CSV filtering with single typed column selector.
057   *
058   * @param columnSelector the typed CSVPath to extract
059   * @param hasHeader whether the CSV has a header row
060   * @return a CsvFilterCommand instance
061   * @throws IllegalArgumentException if columnSelector is null
062   */
063  public static CsvFilterCommand create(CSVPath columnSelector, boolean hasHeader) {
064    if (columnSelector == null) {
065      throw new IllegalArgumentException("Column selector cannot be null");
066    }
067    return new CsvFilterCommand(columnSelector, hasHeader);
068  }
069
070  @Override
071  public void execute(InputStream inputStream, OutputStream outputStream) throws IOException {
072    try (CSVReader csvReader =
073            new CSVReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8));
074        CSVWriter csvWriter =
075            new CSVWriter(
076                new OutputStreamWriter(outputStream, StandardCharsets.UTF_8),
077                CSVWriter.DEFAULT_SEPARATOR,
078                CSVWriter.DEFAULT_QUOTE_CHARACTER,
079                // RFC 4180 §5: only doubled-quote escaping, no backslash escape
080                CSVWriter.NO_ESCAPE_CHARACTER,
081                // RFC4180_LINE_END (\r\n) per RFC 4180 §2.
082                CSVWriter.RFC4180_LINE_END)) {
083
084      List<Integer> columnIndices;
085
086      String[] firstRow = csvReader.readNext();
087      if (firstRow == null) {
088        csvWriter.flush();
089        return;
090      }
091
092      if (hasHeader) {
093        columnIndices = mapColumnSelectorsToIndices(combinedSelector, firstRow);
094        // Write filtered header
095        writeFilteredRow(csvWriter, firstRow, columnIndices);
096      } else {
097        // No header — column selectors must be numeric indices
098        columnIndices = parseNumericColumnSelectors(combinedSelector, firstRow.length);
099        // Write filtered first data row
100        writeFilteredRow(csvWriter, firstRow, columnIndices);
101      }
102
103      // Process remaining rows
104      String[] row;
105      while ((row = csvReader.readNext()) != null) {
106        writeFilteredRow(csvWriter, row, columnIndices);
107      }
108
109      csvWriter.flush();
110    } catch (CsvValidationException e) {
111      throw new IOException("Failed to parse CSV: " + e.getMessage(), e);
112    }
113  }
114
115  /**
116   * Map column selectors to column indices using Don't Ask Tell pattern
117   *
118   * @param csvPath the CSV path containing multiple selectors
119   * @param headers array of header names
120   * @return list of column indices
121   * @throws IllegalArgumentException if no columns found
122   */
123  private List<Integer> mapColumnSelectorsToIndices(CSVPath csvPath, String[] headers) {
124    List<Integer> indices = csvPath.findMatchingIndices(headers);
125    if (indices.isEmpty()) {
126      throw new IllegalArgumentException("No columns found for selector: " + csvPath.toString());
127    }
128    return indices;
129  }
130
131  /**
132   * Parse numeric column selectors when no header exists using Don't Ask Tell pattern
133   *
134   * @param csvPath the CSV path containing column selectors
135   * @param totalColumns total number of columns available
136   * @return list of column indices
137   * @throws IllegalArgumentException if no valid indices found
138   */
139  private List<Integer> parseNumericColumnSelectors(CSVPath csvPath, int totalColumns) {
140    List<Integer> indices = csvPath.findMatchingIndices(totalColumns);
141    if (indices.isEmpty()) {
142      throw new IllegalArgumentException(
143          "Column selector must be numeric when no header: " + csvPath.toString());
144    }
145    return indices;
146  }
147
148  /**
149   * Write filtered row with only selected columns. Quotes are applied only when required by RFC
150   * 4180 (fields containing commas, quotes, or newlines).
151   *
152   * @param csvWriter output writer
153   * @param fields all field values
154   * @param columnIndices indices of columns to include
155   */
156  private void writeFilteredRow(CSVWriter csvWriter, String[] fields, List<Integer> columnIndices) {
157    String[] filteredRow = new String[columnIndices.size()];
158    for (int i = 0; i < columnIndices.size(); i++) {
159      int columnIndex = columnIndices.get(i);
160      filteredRow[i] = columnIndex < fields.length ? fields[columnIndex] : "";
161    }
162    // applyQuotesToAll=false: only quote fields that contain delimiters or quotes
163    csvWriter.writeNext(filteredRow, false);
164  }
165}