Source code

001package com.streamconverter.benchmark;
002
003import java.io.IOException;
004import java.io.InputStream;
005import java.io.Writer;
006import java.nio.charset.StandardCharsets;
007import java.nio.file.Files;
008import java.nio.file.Path;
009import java.time.Instant;
010import java.util.Random;
011import org.slf4j.Logger;
012import org.slf4j.LoggerFactory;
013
014/**
015 * 大容量テストデータを生成するユーティリティクラス
016 *
017 * <p>XML、JSON、CSVフォーマットで大容量データファイルを生成し、 5GBデータ/50MBメモリ目標のベンチマークテストをサポートします。
018 */
019public class LargeDataGenerator {
020
021  private static final Logger LOG = LoggerFactory.getLogger(LargeDataGenerator.class);
022
023  private static final String[] SAMPLE_NAMES = {
024    "田中太郎", "佐藤花子", "鈴木一郎", "高橋美咲", "渡辺健太",
025    "伊藤由美", "山田隆", "中村恵子", "小林誠", "加藤真理子"
026  };
027
028  private static final String[] SAMPLE_CITIES = {
029    "東京", "大阪", "名古屋", "札幌", "福岡", "仙台", "広島", "京都", "神戸", "横浜"
030  };
031
032  private static final String[] SAMPLE_PRODUCTS = {
033    "スマートフォン", "ノートパソコン", "タブレット", "イヤホン", "キーボード",
034    "マウス", "モニター", "プリンター", "カメラ", "充電器"
035  };
036
037  /** Prevent instantiation. */
038  private LargeDataGenerator() {}
039
040  /**
041   * 大容量XMLファイルを生成します
042   *
043   * @param targetSizeBytes 目標ファイルサイズ（バイト）
044   * @return 生成されたファイルのPath
045   * @throws IOException ファイル作成に失敗した場合
046   */
047  public static Path generateLargeXmlFile(long targetSizeBytes) throws IOException {
048    Path tempFile = Files.createTempFile("large-test", ".xml");
049    Random random = new Random(42); // 再現可能な結果のため固定シード
050
051    try (Writer writer = Files.newBufferedWriter(tempFile)) {
052      writer.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
053      writer.write("<orders>\n");
054
055      long currentSize = 100; // ヘッダー分
056      int recordCount = 0;
057
058      while (currentSize < targetSizeBytes) {
059        String record = generateXmlRecord(recordCount++, random);
060        writer.write(record);
061        currentSize += record.getBytes("UTF-8").length;
062
063        // 進捗表示（大きなファイルの場合）
064        if (recordCount % 100000 == 0 && targetSizeBytes > 100 * 1024 * 1024) {
065          if (LOG.isInfoEnabled()) {
066            LOG.info("Generated {} records, {:.2f} MB", recordCount, currentSize / 1024.0 / 1024.0);
067          }
068        }
069      }
070
071      writer.write("</orders>\n");
072    }
073
074    return tempFile;
075  }
076
077  /**
078   * 大容量JSONファイルを生成します
079   *
080   * @param targetSizeBytes 目標ファイルサイズ（バイト）
081   * @return 生成されたファイルのPath
082   * @throws IOException ファイル作成に失敗した場合
083   */
084  public static Path generateLargeJsonFile(long targetSizeBytes) throws IOException {
085    Path tempFile = Files.createTempFile("large-test", ".json");
086    Random random = new Random(42);
087
088    try (Writer writer = Files.newBufferedWriter(tempFile)) {
089      writer.write("{\n  \"orders\": [\n");
090
091      long currentSize = 20; // ヘッダー分
092      int recordCount = 0;
093
094      while (currentSize < targetSizeBytes - 100) { // 終了分を考慮
095        if (recordCount > 0) {
096          writer.write(",\n");
097          currentSize += 2;
098        }
099
100        String record = generateJsonRecord(recordCount++, random);
101        writer.write(record);
102        currentSize += record.getBytes("UTF-8").length;
103
104        if (recordCount % 100000 == 0 && targetSizeBytes > 100 * 1024 * 1024) {
105          if (LOG.isInfoEnabled()) {
106            LOG.info("Generated {} records, {:.2f} MB", recordCount, currentSize / 1024.0 / 1024.0);
107          }
108        }
109      }
110
111      writer.write("\n  ]\n}\n");
112    }
113
114    return tempFile;
115  }
116
117  /**
118   * 大容量CSVファイルを生成します
119   *
120   * @param targetSizeBytes 目標ファイルサイズ（バイト）
121   * @return 生成されたファイルのPath
122   * @throws IOException ファイル作成に失敗した場合
123   */
124  public static Path generateLargeCsvFile(long targetSizeBytes) throws IOException {
125    Path tempFile = Files.createTempFile("large-test", ".csv");
126    Random random = new Random(42);
127
128    try (Writer writer = Files.newBufferedWriter(tempFile)) {
129      // CSVヘッダー
130      String header = "id,name,city,product,quantity,price,timestamp\n";
131      writer.write(header);
132      long currentSize = header.getBytes("UTF-8").length;
133
134      int recordCount = 0;
135
136      while (currentSize < targetSizeBytes) {
137        String record = generateCsvRecord(recordCount++, random);
138        writer.write(record);
139        currentSize += record.getBytes("UTF-8").length;
140
141        if (recordCount % 100000 == 0 && targetSizeBytes > 100 * 1024 * 1024) {
142          if (LOG.isInfoEnabled()) {
143            LOG.info("Generated {} records, {:.2f} MB", recordCount, currentSize / 1024.0 / 1024.0);
144          }
145        }
146      }
147    }
148
149    return tempFile;
150  }
151
152  /** XMLレコードを生成 */
153  private static String generateXmlRecord(int id, Random random) {
154    String name = SAMPLE_NAMES[random.nextInt(SAMPLE_NAMES.length)];
155    String city = SAMPLE_CITIES[random.nextInt(SAMPLE_CITIES.length)];
156    String product = SAMPLE_PRODUCTS[random.nextInt(SAMPLE_PRODUCTS.length)];
157    int quantity = random.nextInt(10) + 1;
158    double price = Math.round((random.nextDouble() * 10000 + 1000) * 100) / 100.0;
159
160    return String.format(
161        "  <order id=\"%d\">%n"
162            + "    <customer>%n"
163            + "      <name>%s</name>%n"
164            + "      <city>%s</city>%n"
165            + "    </customer>%n"
166            + "    <product>%s</product>%n"
167            + "    <quantity>%d</quantity>%n"
168            + "    <price>%.2f</price>%n"
169            + "    <timestamp>%s</timestamp>%n"
170            + "    <description>%s</description>%n"
171            + "  </order>%n",
172        id,
173        escapeXml(name),
174        escapeXml(city),
175        escapeXml(product),
176        quantity,
177        price,
178        Instant.now(),
179        generateDescription(product, random));
180  }
181
182  /** JSONレコードを生成 */
183  private static String generateJsonRecord(int id, Random random) {
184    String name = SAMPLE_NAMES[random.nextInt(SAMPLE_NAMES.length)];
185    String city = SAMPLE_CITIES[random.nextInt(SAMPLE_CITIES.length)];
186    String product = SAMPLE_PRODUCTS[random.nextInt(SAMPLE_PRODUCTS.length)];
187    int quantity = random.nextInt(10) + 1;
188    double price = Math.round((random.nextDouble() * 10000 + 1000) * 100) / 100.0;
189
190    return String.format(
191        "    {%n"
192            + "      \"id\": %d,%n"
193            + "      \"customer\": {%n"
194            + "        \"name\": \"%s\",%n"
195            + "        \"city\": \"%s\"%n"
196            + "      },%n"
197            + "      \"product\": \"%s\",%n"
198            + "      \"quantity\": %d,%n"
199            + "      \"price\": %.2f,%n"
200            + "      \"timestamp\": \"%s\",%n"
201            + "      \"description\": \"%s\"%n"
202            + "    }",
203        id,
204        name,
205        city,
206        product,
207        quantity,
208        price,
209        Instant.now(),
210        generateDescription(product, random));
211  }
212
213  /** CSVレコードを生成 */
214  private static String generateCsvRecord(int id, Random random) {
215    String name = SAMPLE_NAMES[random.nextInt(SAMPLE_NAMES.length)];
216    String city = SAMPLE_CITIES[random.nextInt(SAMPLE_CITIES.length)];
217    String product = SAMPLE_PRODUCTS[random.nextInt(SAMPLE_PRODUCTS.length)];
218    int quantity = random.nextInt(10) + 1;
219    double price = Math.round((random.nextDouble() * 10000 + 1000) * 100) / 100.0;
220
221    return String.format(
222        "%d,\"%s\",\"%s\",\"%s\",%d,%.2f,\"%s\"%n",
223        id, name, city, product, quantity, price, Instant.now());
224  }
225
226  /** 商品の説明文を生成（データボリューム増加のため） */
227  private static String generateDescription(String product, Random random) {
228    String[] adjectives = {"高品質な", "人気の", "最新の", "おすすめの", "限定の"};
229    String[] features = {"機能性", "デザイン", "性能", "品質", "価格"};
230
231    String adj = adjectives[random.nextInt(adjectives.length)];
232    String feature = features[random.nextInt(features.length)];
233
234    return escapeXml(adj + product + "で、" + feature + "に優れた商品です。多くのお客様にご愛用いただいております。");
235  }
236
237  /** XML用のエスケープ処理 */
238  private static String escapeXml(String text) {
239    return text.replace("&", "&amp;")
240        .replace("<", "&lt;")
241        .replace(">", "&gt;")
242        .replace("\"", "&quot;")
243        .replace("'", "&apos;");
244  }
245
246  /**
247   * メモリ効率的な大容量データInputStreamを作成
248   *
249   * @param format データフォーマット（"XML", "JSON", "CSV"）
250   * @param targetSizeBytes 目標サイズ
251   * @return 大容量データのInputStream
252   */
253  public static InputStream createLargeDataStream(String format, long targetSizeBytes) {
254    return new LargeDataInputStream(format, targetSizeBytes);
255  }
256
257  /** メモリ効率的な大容量データInputStream実装 */
258  private static class LargeDataInputStream extends InputStream {
259    private final String format;
260    private final long totalSize;
261    private long bytesGenerated = 0;
262    private byte[] buffer = new byte[0];
263    private int bufferPosition = 0;
264    private int recordCount = 0;
265    private boolean headerWritten = false;
266    private boolean footerWritten = false;
267    private boolean isDocumentComplete = false;
268    private final Random random = new Random(42);
269
270    public LargeDataInputStream(String format, long totalSize) {
271      this.format = format.toUpperCase();
272      this.totalSize = totalSize;
273    }
274
275    @Override
276    public int read() throws IOException {
277      if (bufferPosition >= buffer.length) {
278        generateNextChunk();
279        if (buffer.length == 0) {
280          return -1; // EOF
281        }
282        bufferPosition = 0;
283      }
284
285      return buffer[bufferPosition++] & 0xFF;
286    }
287
288    @Override
289    public int read(byte[] b, int off, int len) throws IOException {
290      if (isDocumentComplete && bufferPosition >= buffer.length) {
291        return -1; // EOF
292      }
293
294      int totalRead = 0;
295      while (totalRead < len && (!isDocumentComplete || bufferPosition < buffer.length)) {
296        if (bufferPosition >= buffer.length) {
297          generateNextChunk();
298          if (buffer.length == 0) break;
299          bufferPosition = 0;
300        }
301
302        int available = buffer.length - bufferPosition;
303        int toRead = Math.min(len - totalRead, available);
304
305        System.arraycopy(buffer, bufferPosition, b, off + totalRead, toRead);
306        bufferPosition += toRead;
307        totalRead += toRead;
308      }
309
310      return totalRead > 0 ? totalRead : -1;
311    }
312
313    private void generateNextChunk() {
314      if (isDocumentComplete) {
315        buffer = new byte[0];
316        return;
317      }
318
319      StringBuilder chunk = new StringBuilder();
320
321      // フッターの必要なサイズを計算（マージンを含む）
322      int footerSize = calculateFooterSize();
323      long remainingBytes = totalSize - bytesGenerated;
324
325      // ヘッダー生成
326      if (!headerWritten) {
327        switch (format) {
328          case "XML":
329            chunk.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<orders>\n");
330            break;
331          case "JSON":
332            chunk.append("{\n  \"orders\": [\n");
333            break;
334          case "CSV":
335            chunk.append("id,name,city,product,quantity,price,timestamp\n");
336            break;
337          default:
338            throw new IllegalArgumentException("Unsupported format: " + format);
339        }
340        headerWritten = true;
341      }
342
343      // フッター生成のタイミングチェック（厳密な制御）
344      boolean shouldGenerateFooter =
345          !footerWritten
346              && (remainingBytes <= footerSize + 100
347                  || // 最小限のマージン
348                  (chunk.length() == 0 && remainingBytes < 500)); // ヘッダーのみで小さすぎる場合
349
350      if (shouldGenerateFooter) {
351        switch (format) {
352          case "XML":
353            chunk.append("</orders>\n");
354            break;
355          case "JSON":
356            chunk.append("\n  ]\n}\n");
357            break;
358          case "CSV":
359            // CSVにはフッターなし
360            break;
361        }
362        footerWritten = true;
363        isDocumentComplete = true;
364      } else {
365        // レコード生成（メモリ効率重視、チャンクサイズ制限）
366        long availableSpace = remainingBytes - footerSize - 50; // 安全マージン
367        final int MAX_CHUNK_SIZE = 64 * 1024; // 64KB制限でメモリ効率向上
368
369        while (availableSpace > 0 && !shouldGenerateFooter && chunk.length() < MAX_CHUNK_SIZE) {
370          // JSON カンマ追加
371          String separator = "";
372          if (format.equals("JSON") && recordCount > 0 && chunk.length() > 50) {
373            separator = ",\n";
374          }
375
376          String record;
377          switch (format) {
378            case "XML":
379              record = generateXmlRecord(recordCount, random);
380              break;
381            case "JSON":
382              record = generateJsonRecord(recordCount, random);
383              break;
384            case "CSV":
385              record = generateCsvRecord(recordCount, random);
386              break;
387            default:
388              record = "Unknown format\n";
389          }
390
391          // サイズチェック（より厳密）
392          int nextAdditionSize = separator.length() + record.length();
393          if (nextAdditionSize > availableSpace
394              || chunk.length() + nextAdditionSize > MAX_CHUNK_SIZE) {
395            break; // これ以上追加できない
396          }
397
398          // レコード追加
399          chunk.append(separator).append(record);
400          recordCount++;
401          availableSpace -= nextAdditionSize;
402
403          // 次回のフッター生成判定を更新
404          shouldGenerateFooter = !footerWritten && availableSpace <= footerSize + 50;
405          if (shouldGenerateFooter) break;
406        }
407      }
408
409      try {
410        byte[] chunkBytes = chunk.toString().getBytes("UTF-8");
411        buffer = chunkBytes;
412        bytesGenerated += chunkBytes.length;
413      } catch (Exception e) {
414        buffer = new byte[0];
415        isDocumentComplete = true;
416      }
417      bufferPosition = 0;
418    }
419
420    private int calculateFooterSize() {
421      switch (format) {
422        case "XML":
423          return "</orders>\n".getBytes(StandardCharsets.UTF_8).length;
424        case "JSON":
425          return "\n  ]\n}\n".getBytes(StandardCharsets.UTF_8).length;
426        case "CSV":
427        default:
428          return 0;
429      }
430    }
431  }
432}