001package com.streamconverter.command.impl.xml;
002
003import com.streamconverter.command.AbstractStreamCommand;
004import com.streamconverter.path.IPath;
005import com.streamconverter.security.SecureXmlConfiguration;
006import java.io.IOException;
007import java.io.InputStream;
008import java.io.OutputStream;
009import java.io.OutputStreamWriter;
010import java.io.StringWriter;
011import java.io.Writer;
012import java.nio.charset.StandardCharsets;
013import java.util.ArrayList;
014import java.util.List;
015import javax.xml.stream.XMLEventReader;
016import javax.xml.stream.XMLEventWriter;
017import javax.xml.stream.XMLInputFactory;
018import javax.xml.stream.XMLOutputFactory;
019import javax.xml.stream.XMLStreamException;
020import javax.xml.stream.events.XMLEvent;
021import org.slf4j.Logger;
022import org.slf4j.LoggerFactory;
023
024/**
025 * XML Filter Command Class
026 *
027 * <p>This class implements pure data extraction from XML using TreePath expressions. Unlike
028 * XmlNavigateCommand which applies transformations, XmlFilterCommand only extracts/filters elements
029 * based on specified paths without any modifications.
030 *
031 * <p>Features: - Extract specific elements using TreePath expressions - Preserve exact XML
032 * structure of extracted elements - Memory-efficient streaming processing - Support for complex
033 * path expressions including nested elements and attributes
034 */
035public class XmlFilterCommand extends AbstractStreamCommand {
036  private static final Logger LOGGER = LoggerFactory.getLogger(XmlFilterCommand.class);
037
038  private final IPath<List<String>> xpath;
039
040  /**
041   * Constructor for XML filtering with typed TreePath selector.
042   *
043   * @param xpath the typed TreePath to extract elements
044   * @throws IllegalArgumentException if xpath is null
045   */
046  private XmlFilterCommand(IPath<List<String>> xpath) {
047    this.xpath = xpath;
048  }
049
050  /**
051   * Factory method for XML filtering with typed path selector.
052   *
053   * @param xpath the typed path to extract elements
054   * @return an XmlFilterCommand instance
055   * @throws IllegalArgumentException if xpath is null
056   */
057  public static XmlFilterCommand create(IPath<List<String>> xpath) {
058    if (xpath == null) {
059      throw new IllegalArgumentException("TreePath cannot be null");
060    }
061    return new XmlFilterCommand(xpath);
062  }
063
064  @Override
065  public void execute(InputStream inputStream, OutputStream outputStream) throws IOException {
066    try (Writer writer = new OutputStreamWriter(outputStream, StandardCharsets.UTF_8)) {
067
068      XMLInputFactory inputFactory = SecureXmlConfiguration.createSecureXMLInputFactory();
069
070      XMLEventReader reader;
071      try {
072        reader = inputFactory.createXMLEventReader(inputStream);
073      } catch (XMLStreamException e) {
074        throw new IOException("Error creating XML reader: " + e.getMessage(), e);
075      }
076
077      try {
078        List<String> currentPath = new ArrayList<>();
079        boolean isCapturing = false;
080        int captureDepth = 0;
081        int currentDepth = 0;
082        String firstExtractedElement = null;
083        boolean isWrappedOutput = false;
084
085        XMLOutputFactory outputFactory = XMLOutputFactory.newInstance();
086        StringWriter elementWriter = new StringWriter();
087        XMLEventWriter eventWriter = null;
088
089        while (reader.hasNext()) {
090          XMLEvent event = reader.nextEvent();
091
092          if (event.isStartElement()) {
093            currentDepth++;
094            String elementName = event.asStartElement().getName().getLocalPart();
095            currentPath.add(elementName);
096
097            // Check if this element matches our target path
098            if (xpath.matches(currentPath) && !isCapturing) {
099              isCapturing = true;
100              captureDepth = currentDepth;
101              elementWriter = new StringWriter();
102              try {
103                eventWriter = outputFactory.createXMLEventWriter(elementWriter);
104                eventWriter.add(event);
105              } catch (XMLStreamException e) {
106                // Reset capturing state to avoid leaving isCapturing=true with eventWriter=null
107                isCapturing = false;
108                captureDepth = 0;
109                LOGGER.warn("Error writing start element", e);
110              }
111            } else if (isCapturing && currentDepth > captureDepth) {
112              // We're inside a matching element, continue capturing with the same writer
113              try {
114                if (eventWriter != null) {
115                  eventWriter.add(event);
116                }
117              } catch (XMLStreamException e) {
118                // Abort capture to avoid writing corrupt partial state
119                isCapturing = false;
120                captureDepth = 0;
121                eventWriter = null;
122                LOGGER.warn("Error writing nested start element; aborting capture", e);
123              }
124            }
125
126          } else if (event.isEndElement()) {
127            if (isCapturing) {
128              try {
129                if (eventWriter != null) {
130                  eventWriter.add(event);
131                }
132              } catch (XMLStreamException e) {
133                // Abort capture to avoid corrupt state propagation
134                isCapturing = false;
135                captureDepth = 0;
136                eventWriter = null;
137                LOGGER.warn("Error writing end element; aborting capture", e);
138              }
139
140              // If we're closing the captured element (re-check isCapturing in case catch reset it)
141              if (isCapturing && currentDepth == captureDepth) {
142                try {
143                  if (eventWriter != null) {
144                    eventWriter.close();
145                    eventWriter = null;
146                  }
147                } catch (XMLStreamException e) {
148                  eventWriter = null;
149                  LOGGER.warn("Error closing event writer: {}", e.getMessage(), e);
150                }
151                String extractedElement = elementWriter.toString();
152                if (isWrappedOutput) {
153                  writer.write(extractedElement);
154                  writer.flush();
155                } else if (firstExtractedElement == null) {
156                  firstExtractedElement = extractedElement;
157                } else {
158                  writeWrappedOutputStart(writer, firstExtractedElement);
159                  isWrappedOutput = true;
160                  firstExtractedElement = null;
161                  writer.write(extractedElement);
162                  writer.flush();
163                }
164                isCapturing = false;
165                captureDepth = 0;
166              }
167            }
168
169            currentPath.remove(currentPath.size() - 1);
170            currentDepth--;
171
172          } else if (isCapturing) {
173            // Characters, comments, etc. inside captured element
174            try {
175              if (eventWriter != null) {
176                eventWriter.add(event);
177              }
178            } catch (XMLStreamException e) {
179              // Abort capture to avoid corrupt state propagation
180              isCapturing = false;
181              captureDepth = 0;
182              eventWriter = null;
183              LOGGER.warn("Error writing content; aborting capture", e);
184            }
185          }
186        }
187
188        // Ensure writer is closed if capture was interrupted
189        if (eventWriter != null) {
190          try {
191            eventWriter.close();
192          } catch (XMLStreamException e) {
193            LOGGER.warn("Error closing event writer: {}", e.getMessage(), e);
194          }
195        }
196
197        writeRemainingOutput(writer, firstExtractedElement, isWrappedOutput);
198
199      } catch (XMLStreamException e) {
200        throw new IOException("Error processing XML: " + e.getMessage(), e);
201      } finally {
202        try {
203          reader.close();
204        } catch (XMLStreamException e) {
205          LOGGER.warn("Error closing XML reader: {}", e.getMessage(), e);
206        }
207      }
208    }
209  }
210
211  /**
212   * Write extracted XML elements to the output writer
213   *
214   * @param writer the output writer
215   * @param elements list of extracted XML elements
216   * @throws IOException if writing fails
217   */
218  private void writeWrappedOutputStart(Writer writer, String firstElement) throws IOException {
219    writer.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
220    writer.write("<filtered-results>");
221    writer.write(firstElement);
222  }
223
224  private void writeRemainingOutput(
225      Writer writer, String firstExtractedElement, boolean isWrappedOutput) throws IOException {
226    if (firstExtractedElement != null) {
227      writer.write(firstExtractedElement);
228    }
229    if (isWrappedOutput) {
230      writer.write("</filtered-results>");
231    }
232    writer.flush();
233  }
234}