001package com.streamconverter.command.impl.xml; 002 003import com.streamconverter.command.AbstractStreamCommand; 004import com.streamconverter.path.IPath; 005import java.io.IOException; 006import java.io.InputStream; 007import java.io.OutputStream; 008import java.io.OutputStreamWriter; 009import java.io.StringWriter; 010import java.io.Writer; 011import java.nio.charset.StandardCharsets; 012import java.util.ArrayList; 013import java.util.List; 014import java.util.logging.Logger; 015import javax.xml.stream.XMLEventReader; 016import javax.xml.stream.XMLEventWriter; 017import javax.xml.stream.XMLInputFactory; 018import javax.xml.stream.XMLOutputFactory; 019import javax.xml.stream.XMLStreamException; 020import javax.xml.stream.events.XMLEvent; 021 022/** 023 * XML Filter Command Class 024 * 025 * <p>This class implements pure data extraction from XML using TreePath expressions. Unlike 026 * XmlNavigateCommand which applies transformations, XmlFilterCommand only extracts/filters elements 027 * based on specified paths without any modifications. 028 * 029 * <p>Features: - Extract specific elements using TreePath expressions - Preserve exact XML 030 * structure of extracted elements - Memory-efficient streaming processing - Support for complex 031 * path expressions including nested elements and attributes 032 */ 033public class XmlFilterCommand extends AbstractStreamCommand { 034 private static final Logger LOGGER = Logger.getLogger(XmlFilterCommand.class.getName()); 035 036 private final IPath<List<String>> xpath; 037 038 /** 039 * Constructor for XML filtering with typed TreePath selector. 040 * 041 * @param xpath the typed TreePath to extract elements 042 * @throws IllegalArgumentException if xpath is null 043 */ 044 public XmlFilterCommand(IPath<List<String>> xpath) { 045 if (xpath == null) { 046 throw new IllegalArgumentException("TreePath cannot be null"); 047 } 048 this.xpath = xpath; 049 xpath.toString(); 050 } 051 052 @Override 053 protected String getCommandDetails() { 054 return String.format("XmlFilterCommand(xpath='%s')", xpath.toString()); 055 } 056 057 @Override 058 protected void executeInternal(InputStream inputStream, OutputStream outputStream) 059 throws IOException { 060 try (Writer writer = new OutputStreamWriter(outputStream, StandardCharsets.UTF_8)) { 061 062 XMLInputFactory inputFactory = XMLInputFactory.newInstance(); 063 inputFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false); 064 inputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false); 065 066 List<String> extractedElements = new ArrayList<>(); 067 068 XMLEventReader reader = inputFactory.createXMLEventReader(inputStream); 069 List<String> currentPath = new ArrayList<>(); 070 boolean isCapturing = false; 071 int captureDepth = 0; 072 int currentDepth = 0; 073 074 XMLOutputFactory outputFactory = XMLOutputFactory.newInstance(); 075 StringWriter elementWriter = new StringWriter(); 076 077 while (reader.hasNext()) { 078 XMLEvent event = reader.nextEvent(); 079 080 if (event.isStartElement()) { 081 currentDepth++; 082 String elementName = event.asStartElement().getName().getLocalPart(); 083 currentPath.add(elementName); 084 085 // Check if this element matches our target path 086 if (xpath.matches(currentPath) && !isCapturing) { 087 isCapturing = true; 088 captureDepth = currentDepth; 089 elementWriter = new StringWriter(); 090 091 try { 092 XMLEventWriter eventWriter = outputFactory.createXMLEventWriter(elementWriter); 093 eventWriter.add(event); 094 eventWriter.close(); 095 } catch (XMLStreamException e) { 096 LOGGER.warning("Error writing start element: " + e.getMessage()); 097 } 098 } else if (isCapturing && currentDepth > captureDepth) { 099 // We're inside a matching element, continue capturing 100 try { 101 XMLEventWriter eventWriter = outputFactory.createXMLEventWriter(elementWriter); 102 eventWriter.add(event); 103 eventWriter.close(); 104 } catch (XMLStreamException e) { 105 LOGGER.warning("Error writing nested start element: " + e.getMessage()); 106 } 107 } 108 109 } else if (event.isEndElement()) { 110 if (isCapturing) { 111 try { 112 XMLEventWriter eventWriter = outputFactory.createXMLEventWriter(elementWriter); 113 eventWriter.add(event); 114 eventWriter.close(); 115 } catch (XMLStreamException e) { 116 LOGGER.warning("Error writing end element: " + e.getMessage()); 117 } 118 119 // If we're closing the captured element 120 if (currentDepth == captureDepth) { 121 extractedElements.add(elementWriter.toString()); 122 isCapturing = false; 123 captureDepth = 0; 124 } 125 } 126 127 currentPath.remove(currentPath.size() - 1); 128 currentDepth--; 129 130 } else if (isCapturing) { 131 // Characters, comments, etc. inside captured element 132 try { 133 XMLEventWriter eventWriter = outputFactory.createXMLEventWriter(elementWriter); 134 eventWriter.add(event); 135 eventWriter.close(); 136 } catch (XMLStreamException e) { 137 LOGGER.warning("Error writing content: " + e.getMessage()); 138 } 139 } 140 } 141 142 // Write extracted elements to output 143 writeExtractedElements(writer, extractedElements); 144 145 reader.close(); 146 } catch (XMLStreamException e) { 147 throw new IOException("Error processing XML: " + e.getMessage(), e); 148 } 149 } 150 151 /** 152 * Write extracted XML elements to the output writer 153 * 154 * @param writer the output writer 155 * @param elements list of extracted XML elements 156 * @throws IOException if writing fails 157 */ 158 private void writeExtractedElements(Writer writer, List<String> elements) throws IOException { 159 if (elements.isEmpty()) { 160 // No matching elements found - output empty XML fragment 161 writer.write(""); 162 writer.flush(); 163 return; 164 } 165 166 if (elements.size() == 1) { 167 // Single element - write directly 168 writer.write(elements.get(0)); 169 } else { 170 // Multiple elements - wrap in a root element 171 writer.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); 172 writer.write("<filtered-results>"); 173 for (String element : elements) { 174 writer.write(element); 175 } 176 writer.write("</filtered-results>"); 177 } 178 179 writer.flush(); 180 } 181}