001package com.streamconverter.benchmark; 002 003import java.io.IOException; 004import java.io.InputStream; 005import java.io.Writer; 006import java.nio.charset.StandardCharsets; 007import java.nio.file.Files; 008import java.nio.file.Path; 009import java.time.Instant; 010import java.util.Random; 011import org.slf4j.Logger; 012import org.slf4j.LoggerFactory; 013 014/** 015 * 大容量テストデータを生成するユーティリティクラス 016 * 017 * <p>XML、JSON、CSVフォーマットで大容量データファイルを生成し、 5GBデータ/50MBメモリ目標のベンチマークテストをサポートします。 018 */ 019public class LargeDataGenerator { 020 021 private static final Logger LOG = LoggerFactory.getLogger(LargeDataGenerator.class); 022 023 private static final String[] SAMPLE_NAMES = { 024 "田中太郎", "佐藤花子", "鈴木一郎", "高橋美咲", "渡辺健太", 025 "伊藤由美", "山田隆", "中村恵子", "小林誠", "加藤真理子" 026 }; 027 028 private static final String[] SAMPLE_CITIES = { 029 "東京", "大阪", "名古屋", "札幌", "福岡", "仙台", "広島", "京都", "神戸", "横浜" 030 }; 031 032 private static final String[] SAMPLE_PRODUCTS = { 033 "スマートフォン", "ノートパソコン", "タブレット", "イヤホン", "キーボード", 034 "マウス", "モニター", "プリンター", "カメラ", "充電器" 035 }; 036 037 /** Prevent instantiation. */ 038 private LargeDataGenerator() {} 039 040 /** 041 * 大容量XMLファイルを生成します 042 * 043 * @param targetSizeBytes 目標ファイルサイズ(バイト) 044 * @return 生成されたファイルのPath 045 * @throws IOException ファイル作成に失敗した場合 046 */ 047 public static Path generateLargeXmlFile(long targetSizeBytes) throws IOException { 048 Path tempFile = Files.createTempFile("large-test", ".xml"); 049 Random random = new Random(42); // 再現可能な結果のため固定シード 050 051 try (Writer writer = Files.newBufferedWriter(tempFile)) { 052 writer.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"); 053 writer.write("<orders>\n"); 054 055 long currentSize = 100; // ヘッダー分 056 int recordCount = 0; 057 058 while (currentSize < targetSizeBytes) { 059 String record = generateXmlRecord(recordCount++, random); 060 writer.write(record); 061 currentSize += record.getBytes("UTF-8").length; 062 063 // 進捗表示(大きなファイルの場合) 064 if (recordCount % 100000 == 0 && targetSizeBytes > 100 * 1024 * 1024) { 065 if (LOG.isInfoEnabled()) { 066 LOG.info("Generated {} records, {:.2f} MB", recordCount, currentSize / 1024.0 / 1024.0); 067 } 068 } 069 } 070 071 writer.write("</orders>\n"); 072 } 073 074 return tempFile; 075 } 076 077 /** 078 * 大容量JSONファイルを生成します 079 * 080 * @param targetSizeBytes 目標ファイルサイズ(バイト) 081 * @return 生成されたファイルのPath 082 * @throws IOException ファイル作成に失敗した場合 083 */ 084 public static Path generateLargeJsonFile(long targetSizeBytes) throws IOException { 085 Path tempFile = Files.createTempFile("large-test", ".json"); 086 Random random = new Random(42); 087 088 try (Writer writer = Files.newBufferedWriter(tempFile)) { 089 writer.write("{\n \"orders\": [\n"); 090 091 long currentSize = 20; // ヘッダー分 092 int recordCount = 0; 093 094 while (currentSize < targetSizeBytes - 100) { // 終了分を考慮 095 if (recordCount > 0) { 096 writer.write(",\n"); 097 currentSize += 2; 098 } 099 100 String record = generateJsonRecord(recordCount++, random); 101 writer.write(record); 102 currentSize += record.getBytes("UTF-8").length; 103 104 if (recordCount % 100000 == 0 && targetSizeBytes > 100 * 1024 * 1024) { 105 if (LOG.isInfoEnabled()) { 106 LOG.info("Generated {} records, {:.2f} MB", recordCount, currentSize / 1024.0 / 1024.0); 107 } 108 } 109 } 110 111 writer.write("\n ]\n}\n"); 112 } 113 114 return tempFile; 115 } 116 117 /** 118 * 大容量CSVファイルを生成します 119 * 120 * @param targetSizeBytes 目標ファイルサイズ(バイト) 121 * @return 生成されたファイルのPath 122 * @throws IOException ファイル作成に失敗した場合 123 */ 124 public static Path generateLargeCsvFile(long targetSizeBytes) throws IOException { 125 Path tempFile = Files.createTempFile("large-test", ".csv"); 126 Random random = new Random(42); 127 128 try (Writer writer = Files.newBufferedWriter(tempFile)) { 129 // CSVヘッダー 130 String header = "id,name,city,product,quantity,price,timestamp\n"; 131 writer.write(header); 132 long currentSize = header.getBytes("UTF-8").length; 133 134 int recordCount = 0; 135 136 while (currentSize < targetSizeBytes) { 137 String record = generateCsvRecord(recordCount++, random); 138 writer.write(record); 139 currentSize += record.getBytes("UTF-8").length; 140 141 if (recordCount % 100000 == 0 && targetSizeBytes > 100 * 1024 * 1024) { 142 if (LOG.isInfoEnabled()) { 143 LOG.info("Generated {} records, {:.2f} MB", recordCount, currentSize / 1024.0 / 1024.0); 144 } 145 } 146 } 147 } 148 149 return tempFile; 150 } 151 152 /** XMLレコードを生成 */ 153 private static String generateXmlRecord(int id, Random random) { 154 String name = SAMPLE_NAMES[random.nextInt(SAMPLE_NAMES.length)]; 155 String city = SAMPLE_CITIES[random.nextInt(SAMPLE_CITIES.length)]; 156 String product = SAMPLE_PRODUCTS[random.nextInt(SAMPLE_PRODUCTS.length)]; 157 int quantity = random.nextInt(10) + 1; 158 double price = Math.round((random.nextDouble() * 10000 + 1000) * 100) / 100.0; 159 160 return String.format( 161 " <order id=\"%d\">%n" 162 + " <customer>%n" 163 + " <name>%s</name>%n" 164 + " <city>%s</city>%n" 165 + " </customer>%n" 166 + " <product>%s</product>%n" 167 + " <quantity>%d</quantity>%n" 168 + " <price>%.2f</price>%n" 169 + " <timestamp>%s</timestamp>%n" 170 + " <description>%s</description>%n" 171 + " </order>%n", 172 id, 173 escapeXml(name), 174 escapeXml(city), 175 escapeXml(product), 176 quantity, 177 price, 178 Instant.now(), 179 generateDescription(product, random)); 180 } 181 182 /** JSONレコードを生成 */ 183 private static String generateJsonRecord(int id, Random random) { 184 String name = SAMPLE_NAMES[random.nextInt(SAMPLE_NAMES.length)]; 185 String city = SAMPLE_CITIES[random.nextInt(SAMPLE_CITIES.length)]; 186 String product = SAMPLE_PRODUCTS[random.nextInt(SAMPLE_PRODUCTS.length)]; 187 int quantity = random.nextInt(10) + 1; 188 double price = Math.round((random.nextDouble() * 10000 + 1000) * 100) / 100.0; 189 190 return String.format( 191 " {%n" 192 + " \"id\": %d,%n" 193 + " \"customer\": {%n" 194 + " \"name\": \"%s\",%n" 195 + " \"city\": \"%s\"%n" 196 + " },%n" 197 + " \"product\": \"%s\",%n" 198 + " \"quantity\": %d,%n" 199 + " \"price\": %.2f,%n" 200 + " \"timestamp\": \"%s\",%n" 201 + " \"description\": \"%s\"%n" 202 + " }", 203 id, 204 name, 205 city, 206 product, 207 quantity, 208 price, 209 Instant.now(), 210 generateDescription(product, random)); 211 } 212 213 /** CSVレコードを生成 */ 214 private static String generateCsvRecord(int id, Random random) { 215 String name = SAMPLE_NAMES[random.nextInt(SAMPLE_NAMES.length)]; 216 String city = SAMPLE_CITIES[random.nextInt(SAMPLE_CITIES.length)]; 217 String product = SAMPLE_PRODUCTS[random.nextInt(SAMPLE_PRODUCTS.length)]; 218 int quantity = random.nextInt(10) + 1; 219 double price = Math.round((random.nextDouble() * 10000 + 1000) * 100) / 100.0; 220 221 return String.format( 222 "%d,\"%s\",\"%s\",\"%s\",%d,%.2f,\"%s\"%n", 223 id, name, city, product, quantity, price, Instant.now()); 224 } 225 226 /** 商品の説明文を生成(データボリューム増加のため) */ 227 private static String generateDescription(String product, Random random) { 228 String[] adjectives = {"高品質な", "人気の", "最新の", "おすすめの", "限定の"}; 229 String[] features = {"機能性", "デザイン", "性能", "品質", "価格"}; 230 231 String adj = adjectives[random.nextInt(adjectives.length)]; 232 String feature = features[random.nextInt(features.length)]; 233 234 return escapeXml(adj + product + "で、" + feature + "に優れた商品です。多くのお客様にご愛用いただいております。"); 235 } 236 237 /** XML用のエスケープ処理 */ 238 private static String escapeXml(String text) { 239 return text.replace("&", "&") 240 .replace("<", "<") 241 .replace(">", ">") 242 .replace("\"", """) 243 .replace("'", "'"); 244 } 245 246 /** 247 * メモリ効率的な大容量データInputStreamを作成 248 * 249 * @param format データフォーマット("XML", "JSON", "CSV") 250 * @param targetSizeBytes 目標サイズ 251 * @return 大容量データのInputStream 252 */ 253 public static InputStream createLargeDataStream(String format, long targetSizeBytes) { 254 return new LargeDataInputStream(format, targetSizeBytes); 255 } 256 257 /** メモリ効率的な大容量データInputStream実装 */ 258 private static class LargeDataInputStream extends InputStream { 259 private final String format; 260 private final long totalSize; 261 private long bytesGenerated = 0; 262 private byte[] buffer = new byte[0]; 263 private int bufferPosition = 0; 264 private int recordCount = 0; 265 private boolean headerWritten = false; 266 private boolean footerWritten = false; 267 private boolean isDocumentComplete = false; 268 private final Random random = new Random(42); 269 270 public LargeDataInputStream(String format, long totalSize) { 271 this.format = format.toUpperCase(); 272 this.totalSize = totalSize; 273 } 274 275 @Override 276 public int read() throws IOException { 277 if (bufferPosition >= buffer.length) { 278 generateNextChunk(); 279 if (buffer.length == 0) { 280 return -1; // EOF 281 } 282 bufferPosition = 0; 283 } 284 285 return buffer[bufferPosition++] & 0xFF; 286 } 287 288 @Override 289 public int read(byte[] b, int off, int len) throws IOException { 290 if (isDocumentComplete && bufferPosition >= buffer.length) { 291 return -1; // EOF 292 } 293 294 int totalRead = 0; 295 while (totalRead < len && (!isDocumentComplete || bufferPosition < buffer.length)) { 296 if (bufferPosition >= buffer.length) { 297 generateNextChunk(); 298 if (buffer.length == 0) break; 299 bufferPosition = 0; 300 } 301 302 int available = buffer.length - bufferPosition; 303 int toRead = Math.min(len - totalRead, available); 304 305 System.arraycopy(buffer, bufferPosition, b, off + totalRead, toRead); 306 bufferPosition += toRead; 307 totalRead += toRead; 308 } 309 310 return totalRead > 0 ? totalRead : -1; 311 } 312 313 private void generateNextChunk() { 314 if (isDocumentComplete) { 315 buffer = new byte[0]; 316 return; 317 } 318 319 StringBuilder chunk = new StringBuilder(); 320 321 // フッターの必要なサイズを計算(マージンを含む) 322 int footerSize = calculateFooterSize(); 323 long remainingBytes = totalSize - bytesGenerated; 324 325 // ヘッダー生成 326 if (!headerWritten) { 327 switch (format) { 328 case "XML": 329 chunk.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<orders>\n"); 330 break; 331 case "JSON": 332 chunk.append("{\n \"orders\": [\n"); 333 break; 334 case "CSV": 335 chunk.append("id,name,city,product,quantity,price,timestamp\n"); 336 break; 337 default: 338 throw new IllegalArgumentException("Unsupported format: " + format); 339 } 340 headerWritten = true; 341 } 342 343 // フッター生成のタイミングチェック(厳密な制御) 344 boolean shouldGenerateFooter = 345 !footerWritten 346 && (remainingBytes <= footerSize + 100 347 || // 最小限のマージン 348 (chunk.length() == 0 && remainingBytes < 500)); // ヘッダーのみで小さすぎる場合 349 350 if (shouldGenerateFooter) { 351 switch (format) { 352 case "XML": 353 chunk.append("</orders>\n"); 354 break; 355 case "JSON": 356 chunk.append("\n ]\n}\n"); 357 break; 358 case "CSV": 359 // CSVにはフッターなし 360 break; 361 } 362 footerWritten = true; 363 isDocumentComplete = true; 364 } else { 365 // レコード生成(メモリ効率重視、チャンクサイズ制限) 366 long availableSpace = remainingBytes - footerSize - 50; // 安全マージン 367 final int MAX_CHUNK_SIZE = 64 * 1024; // 64KB制限でメモリ効率向上 368 369 while (availableSpace > 0 && !shouldGenerateFooter && chunk.length() < MAX_CHUNK_SIZE) { 370 // JSON カンマ追加 371 String separator = ""; 372 if (format.equals("JSON") && recordCount > 0 && chunk.length() > 50) { 373 separator = ",\n"; 374 } 375 376 String record; 377 switch (format) { 378 case "XML": 379 record = generateXmlRecord(recordCount, random); 380 break; 381 case "JSON": 382 record = generateJsonRecord(recordCount, random); 383 break; 384 case "CSV": 385 record = generateCsvRecord(recordCount, random); 386 break; 387 default: 388 record = "Unknown format\n"; 389 } 390 391 // サイズチェック(より厳密) 392 int nextAdditionSize = separator.length() + record.length(); 393 if (nextAdditionSize > availableSpace 394 || chunk.length() + nextAdditionSize > MAX_CHUNK_SIZE) { 395 break; // これ以上追加できない 396 } 397 398 // レコード追加 399 chunk.append(separator).append(record); 400 recordCount++; 401 availableSpace -= nextAdditionSize; 402 403 // 次回のフッター生成判定を更新 404 shouldGenerateFooter = !footerWritten && availableSpace <= footerSize + 50; 405 if (shouldGenerateFooter) break; 406 } 407 } 408 409 try { 410 byte[] chunkBytes = chunk.toString().getBytes("UTF-8"); 411 buffer = chunkBytes; 412 bytesGenerated += chunkBytes.length; 413 } catch (Exception e) { 414 buffer = new byte[0]; 415 isDocumentComplete = true; 416 } 417 bufferPosition = 0; 418 } 419 420 private int calculateFooterSize() { 421 switch (format) { 422 case "XML": 423 return "</orders>\n".getBytes(StandardCharsets.UTF_8).length; 424 case "JSON": 425 return "\n ]\n}\n".getBytes(StandardCharsets.UTF_8).length; 426 case "CSV": 427 default: 428 return 0; 429 } 430 } 431 } 432}