001package com.streamconverter.command.rule.impl.casing; 002 003import com.streamconverter.command.rule.IRule; 004 005/** 006 * Transforms camelCase strings to snake_case format. 007 * 008 * <p>This rule converts strings from camelCase notation to snake_case notation by inserting 009 * underscores before uppercase letters and converting all letters to lowercase. 010 * 011 * <p>Examples: 012 * 013 * <ul> 014 * <li>{@code userName} → {@code user_name} 015 * <li>{@code firstName} → {@code first_name} 016 * <li>{@code userAccountID} → {@code user_account_id} 017 * <li>{@code XMLHttpRequest} → {@code xml_http_request} 018 * </ul> 019 * 020 * <p>Usage: 021 * 022 * <pre>{@code 023 * IRule rule = CamelToSnakeCaseRule.builder().build(); 024 * IStreamCommand command = JsonNavigateCommand.create("$.userName", rule); 025 * }</pre> 026 * 027 * @since 1.0 028 */ 029public class CamelToSnakeCaseRule implements IRule { 030 031 /** Whether to preserve leading/trailing underscores */ 032 private final boolean preserveUnderscores; 033 034 /** Whether to handle acronyms specially */ 035 private final boolean handleAcronyms; 036 037 /** Private constructor for builder pattern */ 038 private CamelToSnakeCaseRule(boolean preserveUnderscores, boolean handleAcronyms) { 039 this.preserveUnderscores = preserveUnderscores; 040 this.handleAcronyms = handleAcronyms; 041 } 042 043 @Override 044 public String apply(String input) { 045 if (input == null || input.isEmpty()) { 046 return input; 047 } 048 049 String result; 050 051 if (handleAcronyms) { 052 result = processWithAcronyms(input); 053 } else { 054 // Simple camelCase conversion - separate ALL uppercase letters 055 result = processWithoutAcronyms(input); 056 } 057 058 // Convert to lowercase 059 result = result.toLowerCase(); 060 061 // Clean up multiple underscores if not preserving 062 if (!preserveUnderscores) { 063 result = result.replaceAll("_{2,}", "_"); 064 // Remove leading/trailing underscores 065 result = result.replaceAll("^_+|_+$", ""); 066 } 067 068 return result; 069 } 070 071 /** 072 * Processes input with proper acronym handling using character-by-character analysis. 073 * 074 * @param input the input string to process 075 * @return processed string with underscores inserted at proper boundaries 076 */ 077 private String processWithAcronyms(String input) { 078 // Use a three-pass approach for better acronym handling 079 String result = input; 080 081 // Pass 1: Handle consecutive acronyms like JSONAPI -> JSON_API FIRST 082 // This must happen before other transformations 083 result = splitConsecutiveAcronyms(result); 084 085 // Pass 2: Insert underscores between lowercase/digit/underscore and uppercase 086 // Include underscore in the pattern if preserving underscores 087 String lowercasePattern = preserveUnderscores ? "([a-z0-9_])([A-Z])" : "([a-z0-9])([A-Z])"; 088 result = result.replaceAll(lowercasePattern, "$1_$2"); 089 090 // Pass 3: Insert underscores before uppercase that's followed by lowercase (end of acronym) 091 result = result.replaceAll("([A-Z])([A-Z][a-z])", "$1_$2"); 092 093 return result; 094 } 095 096 /** 097 * Processes input without acronym handling - separates EVERY uppercase letter. 098 * 099 * @param input the input string to process 100 * @return processed string with underscores between every case transition 101 */ 102 private String processWithoutAcronyms(String input) { 103 if (input.length() <= 1) { 104 return input; 105 } 106 107 StringBuilder result = new StringBuilder(); 108 char[] chars = input.toCharArray(); 109 110 for (int i = 0; i < chars.length; i++) { 111 char current = chars[i]; 112 char previous = i > 0 ? chars[i - 1] : '\0'; 113 114 // Add underscore before current character if: 115 // 1. Previous is lowercase/digit and current is uppercase 116 // 2. Previous is uppercase and current is uppercase (separate all uppercase) 117 if (i > 0 118 && Character.isUpperCase(current) 119 && (Character.isLowerCase(previous) 120 || Character.isDigit(previous) 121 || Character.isUpperCase(previous))) { 122 result.append('_'); 123 } 124 125 result.append(current); 126 } 127 128 return result.toString(); 129 } 130 131 /** Common acronyms that should be split when found consecutively */ 132 private static final String[] COMMON_ACRONYMS = { 133 "JSON", "XML", "API", "URL", "HTTP", "HTTPS", "FTP", "SQL", "HTML", "CSS", "JS", "REST", "SOAP" 134 }; 135 136 /** 137 * Splits consecutive acronyms based on configurable patterns. This method uses a dictionary 138 * approach to identify and split consecutive acronyms like JSONAPI -> JSON_API 139 */ 140 private String splitConsecutiveAcronyms(String input) { 141 String result = input; 142 143 // For each acronym, split if it is immediately followed by another uppercase sequence (length 144 // >= 3) 145 for (String acronym : COMMON_ACRONYMS) { 146 // Use regex to match the acronym followed by another uppercase sequence of length >= 3 147 // e.g., JSONAPI -> JSON_API, XMLHTTPS -> XML_HTTPS 148 result = result.replaceAll(acronym + "([A-Z]{3,})", acronym + "_$1"); 149 } 150 151 return result; 152 } 153 154 /** 155 * Creates a builder for configuring the CamelToSnakeCaseRule. 156 * 157 * @return new builder instance 158 */ 159 public static Builder builder() { 160 return new Builder(); 161 } 162 163 /** 164 * Creates a CamelToSnakeCaseRule with default settings. 165 * 166 * @return new rule instance with default configuration 167 */ 168 public static CamelToSnakeCaseRule create() { 169 return new Builder().build(); 170 } 171 172 /** Builder class for CamelToSnakeCaseRule configuration. */ 173 public static class Builder { 174 private boolean preserveUnderscores = false; 175 private boolean handleAcronyms = true; 176 177 /** 178 * Sets whether to preserve existing underscores in the input. 179 * 180 * @param preserve true to preserve underscores, false to clean them up 181 * @return this builder 182 */ 183 public Builder preserveUnderscores(boolean preserve) { 184 this.preserveUnderscores = preserve; 185 return this; 186 } 187 188 /** 189 * Sets whether to handle acronyms specially (e.g., XMLHttpRequest → xml_http_request). 190 * 191 * @param handle true to handle acronyms, false to treat them as regular uppercase 192 * @return this builder 193 */ 194 public Builder handleAcronyms(boolean handle) { 195 this.handleAcronyms = handle; 196 return this; 197 } 198 199 /** 200 * Builds the CamelToSnakeCaseRule with current configuration. 201 * 202 * @return configured rule instance 203 */ 204 public CamelToSnakeCaseRule build() { 205 return new CamelToSnakeCaseRule(preserveUnderscores, handleAcronyms); 206 } 207 } 208 209 @Override 210 public String toString() { 211 return String.format( 212 "CamelToSnakeCaseRule{preserveUnderscores=%s, handleAcronyms=%s}", 213 preserveUnderscores, handleAcronyms); 214 } 215}