001package com.streamconverter.command.impl.xml; 002 003import com.streamconverter.command.AbstractStreamCommand; 004import com.streamconverter.path.IPath; 005import com.streamconverter.security.SecureXmlConfiguration; 006import java.io.IOException; 007import java.io.InputStream; 008import java.io.OutputStream; 009import java.io.OutputStreamWriter; 010import java.io.StringWriter; 011import java.io.Writer; 012import java.nio.charset.StandardCharsets; 013import java.util.ArrayList; 014import java.util.List; 015import javax.xml.stream.XMLEventReader; 016import javax.xml.stream.XMLEventWriter; 017import javax.xml.stream.XMLInputFactory; 018import javax.xml.stream.XMLOutputFactory; 019import javax.xml.stream.XMLStreamException; 020import javax.xml.stream.events.XMLEvent; 021import org.slf4j.Logger; 022import org.slf4j.LoggerFactory; 023 024/** 025 * XML Filter Command Class 026 * 027 * <p>This class implements pure data extraction from XML using TreePath expressions. Unlike 028 * XmlNavigateCommand which applies transformations, XmlFilterCommand only extracts/filters elements 029 * based on specified paths without any modifications. 030 * 031 * <p>Features: - Extract specific elements using TreePath expressions - Preserve exact XML 032 * structure of extracted elements - Memory-efficient streaming processing - Support for complex 033 * path expressions including nested elements and attributes 034 */ 035public class XmlFilterCommand extends AbstractStreamCommand { 036 private static final Logger LOGGER = LoggerFactory.getLogger(XmlFilterCommand.class); 037 038 private final IPath<List<String>> xpath; 039 040 /** 041 * Constructor for XML filtering with typed TreePath selector. 042 * 043 * @param xpath the typed TreePath to extract elements 044 * @throws IllegalArgumentException if xpath is null 045 */ 046 private XmlFilterCommand(IPath<List<String>> xpath) { 047 this.xpath = xpath; 048 } 049 050 /** 051 * Factory method for XML filtering with typed path selector. 052 * 053 * @param xpath the typed path to extract elements 054 * @return an XmlFilterCommand instance 055 * @throws IllegalArgumentException if xpath is null 056 */ 057 public static XmlFilterCommand create(IPath<List<String>> xpath) { 058 if (xpath == null) { 059 throw new IllegalArgumentException("TreePath cannot be null"); 060 } 061 return new XmlFilterCommand(xpath); 062 } 063 064 @Override 065 public void execute(InputStream inputStream, OutputStream outputStream) throws IOException { 066 try (Writer writer = new OutputStreamWriter(outputStream, StandardCharsets.UTF_8)) { 067 068 XMLInputFactory inputFactory = SecureXmlConfiguration.createSecureXMLInputFactory(); 069 070 XMLEventReader reader; 071 try { 072 reader = inputFactory.createXMLEventReader(inputStream); 073 } catch (XMLStreamException e) { 074 throw new IOException("Error creating XML reader: " + e.getMessage(), e); 075 } 076 077 try { 078 List<String> currentPath = new ArrayList<>(); 079 boolean isCapturing = false; 080 int captureDepth = 0; 081 int currentDepth = 0; 082 String firstExtractedElement = null; 083 boolean isWrappedOutput = false; 084 085 XMLOutputFactory outputFactory = XMLOutputFactory.newInstance(); 086 StringWriter elementWriter = new StringWriter(); 087 XMLEventWriter eventWriter = null; 088 089 while (reader.hasNext()) { 090 XMLEvent event = reader.nextEvent(); 091 092 if (event.isStartElement()) { 093 currentDepth++; 094 String elementName = event.asStartElement().getName().getLocalPart(); 095 currentPath.add(elementName); 096 097 // Check if this element matches our target path 098 if (xpath.matches(currentPath) && !isCapturing) { 099 isCapturing = true; 100 captureDepth = currentDepth; 101 elementWriter = new StringWriter(); 102 try { 103 eventWriter = outputFactory.createXMLEventWriter(elementWriter); 104 eventWriter.add(event); 105 } catch (XMLStreamException e) { 106 // Reset capturing state to avoid leaving isCapturing=true with eventWriter=null 107 isCapturing = false; 108 captureDepth = 0; 109 LOGGER.warn("Error writing start element", e); 110 } 111 } else if (isCapturing && currentDepth > captureDepth) { 112 // We're inside a matching element, continue capturing with the same writer 113 try { 114 if (eventWriter != null) { 115 eventWriter.add(event); 116 } 117 } catch (XMLStreamException e) { 118 // Abort capture to avoid writing corrupt partial state 119 isCapturing = false; 120 captureDepth = 0; 121 eventWriter = null; 122 LOGGER.warn("Error writing nested start element; aborting capture", e); 123 } 124 } 125 126 } else if (event.isEndElement()) { 127 if (isCapturing) { 128 try { 129 if (eventWriter != null) { 130 eventWriter.add(event); 131 } 132 } catch (XMLStreamException e) { 133 // Abort capture to avoid corrupt state propagation 134 isCapturing = false; 135 captureDepth = 0; 136 eventWriter = null; 137 LOGGER.warn("Error writing end element; aborting capture", e); 138 } 139 140 // If we're closing the captured element (re-check isCapturing in case catch reset it) 141 if (isCapturing && currentDepth == captureDepth) { 142 try { 143 if (eventWriter != null) { 144 eventWriter.close(); 145 eventWriter = null; 146 } 147 } catch (XMLStreamException e) { 148 eventWriter = null; 149 LOGGER.warn("Error closing event writer: {}", e.getMessage(), e); 150 } 151 String extractedElement = elementWriter.toString(); 152 if (isWrappedOutput) { 153 writer.write(extractedElement); 154 writer.flush(); 155 } else if (firstExtractedElement == null) { 156 firstExtractedElement = extractedElement; 157 } else { 158 writeWrappedOutputStart(writer, firstExtractedElement); 159 isWrappedOutput = true; 160 firstExtractedElement = null; 161 writer.write(extractedElement); 162 writer.flush(); 163 } 164 isCapturing = false; 165 captureDepth = 0; 166 } 167 } 168 169 currentPath.remove(currentPath.size() - 1); 170 currentDepth--; 171 172 } else if (isCapturing) { 173 // Characters, comments, etc. inside captured element 174 try { 175 if (eventWriter != null) { 176 eventWriter.add(event); 177 } 178 } catch (XMLStreamException e) { 179 // Abort capture to avoid corrupt state propagation 180 isCapturing = false; 181 captureDepth = 0; 182 eventWriter = null; 183 LOGGER.warn("Error writing content; aborting capture", e); 184 } 185 } 186 } 187 188 // Ensure writer is closed if capture was interrupted 189 if (eventWriter != null) { 190 try { 191 eventWriter.close(); 192 } catch (XMLStreamException e) { 193 LOGGER.warn("Error closing event writer: {}", e.getMessage(), e); 194 } 195 } 196 197 writeRemainingOutput(writer, firstExtractedElement, isWrappedOutput); 198 199 } catch (XMLStreamException e) { 200 throw new IOException("Error processing XML: " + e.getMessage(), e); 201 } finally { 202 try { 203 reader.close(); 204 } catch (XMLStreamException e) { 205 LOGGER.warn("Error closing XML reader: {}", e.getMessage(), e); 206 } 207 } 208 } 209 } 210 211 /** 212 * Write extracted XML elements to the output writer 213 * 214 * @param writer the output writer 215 * @param elements list of extracted XML elements 216 * @throws IOException if writing fails 217 */ 218 private void writeWrappedOutputStart(Writer writer, String firstElement) throws IOException { 219 writer.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); 220 writer.write("<filtered-results>"); 221 writer.write(firstElement); 222 } 223 224 private void writeRemainingOutput( 225 Writer writer, String firstExtractedElement, boolean isWrappedOutput) throws IOException { 226 if (firstExtractedElement != null) { 227 writer.write(firstExtractedElement); 228 } 229 if (isWrappedOutput) { 230 writer.write("</filtered-results>"); 231 } 232 writer.flush(); 233 } 234}