001package com.streamconverter.command.rule.impl.casing;
002
003import com.streamconverter.command.rule.IRule;
004
005/**
006 * Transforms camelCase strings to snake_case format.
007 *
008 * <p>This rule converts strings from camelCase notation to snake_case notation by inserting
009 * underscores before uppercase letters and converting all letters to lowercase.
010 *
011 * <p>Examples:
012 *
013 * <ul>
014 *   <li>{@code userName} → {@code user_name}
015 *   <li>{@code firstName} → {@code first_name}
016 *   <li>{@code userAccountID} → {@code user_account_id}
017 *   <li>{@code XMLHttpRequest} → {@code xml_http_request}
018 * </ul>
019 *
020 * <p>Usage:
021 *
022 * <pre>{@code
023 * IRule rule = CamelToSnakeCaseRule.builder().build();
024 * IStreamCommand command = JsonNavigateCommand.create("$.userName", rule);
025 * }</pre>
026 *
027 * @since 1.0
028 */
029public class CamelToSnakeCaseRule implements IRule {
030
031  /** Whether to preserve leading/trailing underscores */
032  private final boolean preserveUnderscores;
033
034  /** Whether to handle acronyms specially */
035  private final boolean handleAcronyms;
036
037  /** Private constructor for builder pattern */
038  private CamelToSnakeCaseRule(boolean preserveUnderscores, boolean handleAcronyms) {
039    this.preserveUnderscores = preserveUnderscores;
040    this.handleAcronyms = handleAcronyms;
041  }
042
043  @Override
044  public String apply(String input) {
045    if (input == null || input.isEmpty()) {
046      return input;
047    }
048
049    String result;
050
051    if (handleAcronyms) {
052      result = processWithAcronyms(input);
053    } else {
054      // Simple camelCase conversion - separate ALL uppercase letters
055      result = processWithoutAcronyms(input);
056    }
057
058    // Convert to lowercase
059    result = result.toLowerCase();
060
061    // Clean up multiple underscores if not preserving
062    if (!preserveUnderscores) {
063      result = result.replaceAll("_{2,}", "_");
064      // Remove leading/trailing underscores
065      result = result.replaceAll("^_+|_+$", "");
066    }
067
068    return result;
069  }
070
071  /**
072   * Processes input with proper acronym handling using character-by-character analysis.
073   *
074   * @param input the input string to process
075   * @return processed string with underscores inserted at proper boundaries
076   */
077  private String processWithAcronyms(String input) {
078    // Use a three-pass approach for better acronym handling
079    String result = input;
080
081    // Pass 1: Handle consecutive acronyms like JSONAPI -> JSON_API FIRST
082    // This must happen before other transformations
083    result = splitConsecutiveAcronyms(result);
084
085    // Pass 2: Insert underscores between lowercase/digit/underscore and uppercase
086    // Include underscore in the pattern if preserving underscores
087    String lowercasePattern = preserveUnderscores ? "([a-z0-9_])([A-Z])" : "([a-z0-9])([A-Z])";
088    result = result.replaceAll(lowercasePattern, "$1_$2");
089
090    // Pass 3: Insert underscores before uppercase that's followed by lowercase (end of acronym)
091    result = result.replaceAll("([A-Z])([A-Z][a-z])", "$1_$2");
092
093    return result;
094  }
095
096  /**
097   * Processes input without acronym handling - separates EVERY uppercase letter.
098   *
099   * @param input the input string to process
100   * @return processed string with underscores between every case transition
101   */
102  private String processWithoutAcronyms(String input) {
103    if (input.length() <= 1) {
104      return input;
105    }
106
107    StringBuilder result = new StringBuilder();
108    char[] chars = input.toCharArray();
109
110    for (int i = 0; i < chars.length; i++) {
111      char current = chars[i];
112      char previous = i > 0 ? chars[i - 1] : '\0';
113
114      // Add underscore before current character if:
115      // 1. Previous is lowercase/digit and current is uppercase
116      // 2. Previous is uppercase and current is uppercase (separate all uppercase)
117      if (i > 0
118          && Character.isUpperCase(current)
119          && (Character.isLowerCase(previous)
120              || Character.isDigit(previous)
121              || Character.isUpperCase(previous))) {
122        result.append('_');
123      }
124
125      result.append(current);
126    }
127
128    return result.toString();
129  }
130
131  /** Common acronyms that should be split when found consecutively */
132  private static final String[] COMMON_ACRONYMS = {
133    "JSON", "XML", "API", "URL", "HTTP", "HTTPS", "FTP", "SQL", "HTML", "CSS", "JS", "REST", "SOAP"
134  };
135
136  /**
137   * Splits consecutive acronyms based on configurable patterns. This method uses a dictionary
138   * approach to identify and split consecutive acronyms like JSONAPI -> JSON_API
139   */
140  private String splitConsecutiveAcronyms(String input) {
141    String result = input;
142
143    // For each acronym, split if it is immediately followed by another uppercase sequence (length
144    // >= 3)
145    for (String acronym : COMMON_ACRONYMS) {
146      // Use regex to match the acronym followed by another uppercase sequence of length >= 3
147      // e.g., JSONAPI -> JSON_API, XMLHTTPS -> XML_HTTPS
148      result = result.replaceAll(acronym + "([A-Z]{3,})", acronym + "_$1");
149    }
150
151    return result;
152  }
153
154  /**
155   * Creates a builder for configuring the CamelToSnakeCaseRule.
156   *
157   * @return new builder instance
158   */
159  public static Builder builder() {
160    return new Builder();
161  }
162
163  /**
164   * Creates a CamelToSnakeCaseRule with default settings.
165   *
166   * @return new rule instance with default configuration
167   */
168  public static CamelToSnakeCaseRule create() {
169    return new Builder().build();
170  }
171
172  /** Builder class for CamelToSnakeCaseRule configuration. */
173  public static class Builder {
174    private boolean preserveUnderscores = false;
175    private boolean handleAcronyms = true;
176
177    /**
178     * Sets whether to preserve existing underscores in the input.
179     *
180     * @param preserve true to preserve underscores, false to clean them up
181     * @return this builder
182     */
183    public Builder preserveUnderscores(boolean preserve) {
184      this.preserveUnderscores = preserve;
185      return this;
186    }
187
188    /**
189     * Sets whether to handle acronyms specially (e.g., XMLHttpRequest → xml_http_request).
190     *
191     * @param handle true to handle acronyms, false to treat them as regular uppercase
192     * @return this builder
193     */
194    public Builder handleAcronyms(boolean handle) {
195      this.handleAcronyms = handle;
196      return this;
197    }
198
199    /**
200     * Builds the CamelToSnakeCaseRule with current configuration.
201     *
202     * @return configured rule instance
203     */
204    public CamelToSnakeCaseRule build() {
205      return new CamelToSnakeCaseRule(preserveUnderscores, handleAcronyms);
206    }
207  }
208
209  @Override
210  public String toString() {
211    return String.format(
212        "CamelToSnakeCaseRule{preserveUnderscores=%s, handleAcronyms=%s}",
213        preserveUnderscores, handleAcronyms);
214  }
215}