001package com.streamconverter.command.impl.csv;
002
003import com.streamconverter.command.AbstractStreamCommand;
004import com.streamconverter.path.CSVPath;
005import java.io.BufferedReader;
006import java.io.IOException;
007import java.io.InputStream;
008import java.io.InputStreamReader;
009import java.io.OutputStream;
010import java.io.OutputStreamWriter;
011import java.io.Writer;
012import java.nio.charset.StandardCharsets;
013import java.util.ArrayList;
014import java.util.Arrays;
015import java.util.List;
016
017/**
018 * CSV Filter Command Class
019 *
020 * <p>This class implements pure data extraction from CSV using column selectors. Unlike
021 * CsvNavigateCommand which applies transformations, CsvFilterCommand only extracts/filters columns
022 * based on specified column names or indices without any modifications.
023 *
024 * <p>Features: - Extract specific columns using column names or indices - Preserve exact data
025 * values from extracted columns - Memory-efficient streaming processing - Support for multiple
026 * column selection
027 */
028public class CsvFilterCommand extends AbstractStreamCommand {
029
030  private final CSVPath combinedSelector;
031  private final boolean hasHeader;
032
033  /**
034   * Constructor for CSV filtering with single typed column selector.
035   *
036   * @param columnSelector the typed CSVPath to extract
037   * @param hasHeader whether the CSV has a header row
038   * @throws IllegalArgumentException if columnSelector is null
039   */
040  public CsvFilterCommand(CSVPath columnSelector, boolean hasHeader) {
041    if (columnSelector == null) {
042      throw new IllegalArgumentException("Column selector cannot be null");
043    }
044    this.combinedSelector = columnSelector;
045    Arrays.asList(columnSelector);
046    Arrays.asList(columnSelector.toString());
047    this.hasHeader = hasHeader;
048  }
049
050  /**
051   * Factory method for CSV filtering with single typed column selector (assumes header exists).
052   *
053   * @param columnSelector the typed CSVPath to extract
054   * @return a CsvFilterCommand instance
055   */
056  public static CsvFilterCommand create(CSVPath columnSelector) {
057    return create(columnSelector, true);
058  }
059
060  /**
061   * Factory method for CSV filtering with single typed column selector.
062   *
063   * @param columnSelector the typed CSVPath to extract
064   * @param hasHeader whether the CSV has a header row
065   * @return a CsvFilterCommand instance
066   */
067  public static CsvFilterCommand create(CSVPath columnSelector, boolean hasHeader) {
068    return new CsvFilterCommand(columnSelector, hasHeader);
069  }
070
071  @Override
072  protected String getCommandDetails() {
073    return String.format(
074        "CsvFilterCommand(columns=%s, hasHeader=%s)", combinedSelector.toString(), hasHeader);
075  }
076
077  @Override
078  protected void executeInternal(InputStream inputStream, OutputStream outputStream)
079      throws IOException {
080    try (BufferedReader reader =
081            new BufferedReader(new InputStreamReader(inputStream, StandardCharsets.UTF_8));
082        Writer writer = new OutputStreamWriter(outputStream, StandardCharsets.UTF_8)) {
083
084      List<Integer> columnIndices = new ArrayList<>();
085      String[] headers = null;
086
087      // Read first line
088      String firstLine = reader.readLine();
089      if (firstLine == null) {
090        // Empty CSV
091        writer.flush();
092        return;
093      }
094
095      String[] firstRowFields = parseCsvLine(firstLine);
096
097      if (hasHeader) {
098        headers = firstRowFields;
099        // Map column selectors to indices
100        columnIndices = mapColumnSelectorsToIndices(combinedSelector, headers);
101
102        // Write filtered header
103        writeFilteredRow(writer, firstRowFields, columnIndices);
104        writer.write(System.lineSeparator());
105      } else {
106        // No header - column selectors must be numeric indices
107        columnIndices = parseNumericColumnSelectors(combinedSelector, firstRowFields.length);
108
109        // Write filtered first data row
110        writeFilteredRow(writer, firstRowFields, columnIndices);
111        writer.write(System.lineSeparator());
112      }
113
114      // Process remaining data rows
115      String line;
116      while ((line = reader.readLine()) != null) {
117        String[] fields = parseCsvLine(line);
118        writeFilteredRow(writer, fields, columnIndices);
119        writer.write(System.lineSeparator());
120      }
121
122      writer.flush();
123    }
124  }
125
126  /**
127   * Map column selectors to column indices using Don't Ask Tell pattern
128   *
129   * @param csvPath the CSV path containing multiple selectors
130   * @param headers array of header names
131   * @return list of column indices
132   * @throws IllegalArgumentException if no columns found
133   */
134  private List<Integer> mapColumnSelectorsToIndices(CSVPath csvPath, String[] headers) {
135    List<Integer> indices = csvPath.findMatchingIndices(headers);
136    if (indices.isEmpty()) {
137      throw new IllegalArgumentException("No columns found for selector: " + csvPath.toString());
138    }
139    return indices;
140  }
141
142  /**
143   * Parse numeric column selectors when no header exists using Don't Ask Tell pattern
144   *
145   * @param csvPath the CSV path containing column selectors
146   * @param totalColumns total number of columns available
147   * @return list of column indices
148   * @throws IllegalArgumentException if no valid indices found
149   */
150  private List<Integer> parseNumericColumnSelectors(CSVPath csvPath, int totalColumns) {
151    List<Integer> indices = csvPath.findMatchingIndices(totalColumns);
152    if (indices.isEmpty()) {
153      throw new IllegalArgumentException(
154          "Column selector must be numeric when no header: " + csvPath.toString());
155    }
156    return indices;
157  }
158
159  /**
160   * Parse a CSV line into fields (simple implementation)
161   *
162   * @param line CSV line
163   * @return array of field values
164   */
165  private String[] parseCsvLine(String line) {
166    // Simple CSV parsing - handles basic comma separation
167    // For production, consider using a proper CSV library
168    if (line == null || line.isEmpty()) {
169      return new String[0];
170    }
171
172    List<String> fields = new ArrayList<>();
173    StringBuilder current = new StringBuilder();
174    boolean inQuotes = false;
175
176    for (int i = 0; i < line.length(); i++) {
177      char c = line.charAt(i);
178
179      if (c == '"') {
180        inQuotes = !inQuotes;
181      } else if (c == ',' && !inQuotes) {
182        fields.add(current.toString().trim());
183        current.setLength(0);
184      } else {
185        current.append(c);
186      }
187    }
188
189    fields.add(current.toString().trim());
190    return fields.toArray(new String[0]);
191  }
192
193  /**
194   * Write filtered row with only selected columns
195   *
196   * @param writer output writer
197   * @param fields all field values
198   * @param columnIndices indices of columns to include
199   * @throws IOException if writing fails
200   */
201  private void writeFilteredRow(Writer writer, String[] fields, List<Integer> columnIndices)
202      throws IOException {
203    for (int i = 0; i < columnIndices.size(); i++) {
204      if (i > 0) {
205        writer.write(",");
206      }
207
208      int columnIndex = columnIndices.get(i);
209      if (columnIndex < fields.length) {
210        String field = fields[columnIndex];
211        // Quote field if it contains comma or quotes
212        if (field.contains(",") || field.contains("\"")) {
213          writer.write("\"" + field.replace("\"", "\"\"") + "\"");
214        } else {
215          writer.write(field);
216        }
217      } else {
218        // Column doesn't exist in this row - write empty field
219        writer.write("");
220      }
221    }
222  }
223}