001package com.streamconverter.command.impl.xml; 002 003import com.streamconverter.command.AbstractStreamCommand; 004import com.streamconverter.path.IPath; 005import com.streamconverter.security.SecureXmlConfiguration; 006import java.io.IOException; 007import java.io.InputStream; 008import java.io.OutputStream; 009import java.io.OutputStreamWriter; 010import java.io.StringWriter; 011import java.io.Writer; 012import java.nio.charset.StandardCharsets; 013import java.util.ArrayList; 014import java.util.List; 015import java.util.logging.Logger; 016import javax.xml.stream.XMLEventReader; 017import javax.xml.stream.XMLEventWriter; 018import javax.xml.stream.XMLInputFactory; 019import javax.xml.stream.XMLOutputFactory; 020import javax.xml.stream.XMLStreamException; 021import javax.xml.stream.events.XMLEvent; 022 023/** 024 * XML Filter Command Class 025 * 026 * <p>This class implements pure data extraction from XML using TreePath expressions. Unlike 027 * XmlNavigateCommand which applies transformations, XmlFilterCommand only extracts/filters elements 028 * based on specified paths without any modifications. 029 * 030 * <p>Features: - Extract specific elements using TreePath expressions - Preserve exact XML 031 * structure of extracted elements - Memory-efficient streaming processing - Support for complex 032 * path expressions including nested elements and attributes 033 */ 034public class XmlFilterCommand extends AbstractStreamCommand { 035 private static final Logger LOGGER = Logger.getLogger(XmlFilterCommand.class.getName()); 036 037 private final IPath<List<String>> xpath; 038 039 /** 040 * Constructor for XML filtering with typed TreePath selector. 041 * 042 * @param xpath the typed TreePath to extract elements 043 * @throws IllegalArgumentException if xpath is null 044 */ 045 public XmlFilterCommand(IPath<List<String>> xpath) { 046 if (xpath == null) { 047 throw new IllegalArgumentException("TreePath cannot be null"); 048 } 049 this.xpath = xpath; 050 xpath.toString(); 051 } 052 053 @Override 054 protected String getCommandDetails() { 055 return String.format("XmlFilterCommand(xpath='%s')", xpath.toString()); 056 } 057 058 @Override 059 protected void executeInternal(InputStream inputStream, OutputStream outputStream) 060 throws IOException { 061 try (Writer writer = new OutputStreamWriter(outputStream, StandardCharsets.UTF_8)) { 062 063 XMLInputFactory inputFactory = SecureXmlConfiguration.createSecureXMLInputFactory(); 064 065 List<String> extractedElements = new ArrayList<>(); 066 067 XMLEventReader reader = inputFactory.createXMLEventReader(inputStream); 068 List<String> currentPath = new ArrayList<>(); 069 boolean isCapturing = false; 070 int captureDepth = 0; 071 int currentDepth = 0; 072 073 XMLOutputFactory outputFactory = XMLOutputFactory.newInstance(); 074 StringWriter elementWriter = new StringWriter(); 075 076 while (reader.hasNext()) { 077 XMLEvent event = reader.nextEvent(); 078 079 if (event.isStartElement()) { 080 currentDepth++; 081 String elementName = event.asStartElement().getName().getLocalPart(); 082 currentPath.add(elementName); 083 084 // Check if this element matches our target path 085 if (xpath.matches(currentPath) && !isCapturing) { 086 isCapturing = true; 087 captureDepth = currentDepth; 088 elementWriter = new StringWriter(); 089 090 try { 091 XMLEventWriter eventWriter = outputFactory.createXMLEventWriter(elementWriter); 092 eventWriter.add(event); 093 eventWriter.close(); 094 } catch (XMLStreamException e) { 095 LOGGER.warning("Error writing start element: " + e.getMessage()); 096 } 097 } else if (isCapturing && currentDepth > captureDepth) { 098 // We're inside a matching element, continue capturing 099 try { 100 XMLEventWriter eventWriter = outputFactory.createXMLEventWriter(elementWriter); 101 eventWriter.add(event); 102 eventWriter.close(); 103 } catch (XMLStreamException e) { 104 LOGGER.warning("Error writing nested start element: " + e.getMessage()); 105 } 106 } 107 108 } else if (event.isEndElement()) { 109 if (isCapturing) { 110 try { 111 XMLEventWriter eventWriter = outputFactory.createXMLEventWriter(elementWriter); 112 eventWriter.add(event); 113 eventWriter.close(); 114 } catch (XMLStreamException e) { 115 LOGGER.warning("Error writing end element: " + e.getMessage()); 116 } 117 118 // If we're closing the captured element 119 if (currentDepth == captureDepth) { 120 extractedElements.add(elementWriter.toString()); 121 isCapturing = false; 122 captureDepth = 0; 123 } 124 } 125 126 currentPath.remove(currentPath.size() - 1); 127 currentDepth--; 128 129 } else if (isCapturing) { 130 // Characters, comments, etc. inside captured element 131 try { 132 XMLEventWriter eventWriter = outputFactory.createXMLEventWriter(elementWriter); 133 eventWriter.add(event); 134 eventWriter.close(); 135 } catch (XMLStreamException e) { 136 LOGGER.warning("Error writing content: " + e.getMessage()); 137 } 138 } 139 } 140 141 // Write extracted elements to output 142 writeExtractedElements(writer, extractedElements); 143 144 reader.close(); 145 } catch (XMLStreamException e) { 146 throw new IOException("Error processing XML: " + e.getMessage(), e); 147 } 148 } 149 150 /** 151 * Write extracted XML elements to the output writer 152 * 153 * @param writer the output writer 154 * @param elements list of extracted XML elements 155 * @throws IOException if writing fails 156 */ 157 private void writeExtractedElements(Writer writer, List<String> elements) throws IOException { 158 if (elements.isEmpty()) { 159 // No matching elements found - output empty XML fragment 160 writer.write(""); 161 writer.flush(); 162 return; 163 } 164 165 if (elements.size() == 1) { 166 // Single element - write directly 167 writer.write(elements.get(0)); 168 } else { 169 // Multiple elements - wrap in a root element 170 writer.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); 171 writer.write("<filtered-results>"); 172 for (String element : elements) { 173 writer.write(element); 174 } 175 writer.write("</filtered-results>"); 176 } 177 178 writer.flush(); 179 } 180}