C++ String Processing - Performance Optimization Techniques

String processing is a fundamental skill in C++ programming, especially for competitive programming and system development. This guide covers three efficient approaches for string concatenation and manipulation, each optimized for different scenarios.

๐Ÿš€ Three Approaches to String Building

#include <iostream>
#include <string>
#include <vector>
using namespace std;

// Method 1: reserve + append
string buildWithReserve(const vector<string>& parts) {
    size_t totalLen = 0;
    for (auto &p : parts) totalLen += p.size();
    string ans;
    ans.reserve(totalLen);
    for (auto &p : parts) {
        ans.append(p);
    }
    return ans;
}

When to use:

  • General string concatenation
  • When you know the approximate final size
  • Most common use case

Advantages:

  • Prevents multiple memory reallocations
  • Clean and readable code
  • Good performance for most scenarios

Approach 2: Stringstream (Formatting + Multiple Parts)

#include <sstream>

// Method 2: Using stringstream (formatting + multiple parts)
string buildWithStream(const vector<string>& parts) {
    ostringstream oss;
    for (int i = 0; i < parts.size(); ++i) {
        if (i) oss << ",";  // Add separator
        oss << parts[i];
    }
    return oss.str();
}

When to use:

  • Complex formatting requirements
  • Mixed data types (strings, numbers, etc.)
  • When you need separators or special formatting

Advantages:

  • Flexible formatting
  • Type-safe conversions
  • Easy to add separators and formatting

Approach 3: Manual Char Buffer (High Performance)

#include <cstring>

// Method 3: Manual char buffer (for very large strings, frequent concatenation, performance-critical)
string buildWithBuffer(const vector<string>& parts) {
    size_t total = 0;
    for (auto &p : parts) total += p.size();
    string result;
    result.resize(total);
    char *ptr = &result[0];
    for (auto &p : parts) {
        memcpy(ptr, p.data(), p.size());
        ptr += p.size();
    }
    return result;
}

When to use:

  • Very large strings
  • Frequent concatenation operations
  • Performance-critical applications
  • When you need maximum control over memory

Advantages:

  • Maximum performance
  • Single memory allocation
  • Direct memory manipulation
  • No intermediate allocations

๐Ÿ“Š Performance Comparison

Method Time Complexity Space Complexity Use Case
Reserve + Append O(n) O(n) General purpose
Stringstream O(n) O(n) Formatting
Manual Buffer O(n) O(n) High performance

๐Ÿงช Complete Example

#include <iostream>
#include <string>
#include <vector>
#include <sstream>
#include <cstring>
using namespace std;

// Method 1: reserve + append
string buildWithReserve(const vector<string>& parts) {
    size_t totalLen = 0;
    for (auto &p : parts) totalLen += p.size();
    string ans;
    ans.reserve(totalLen);
    for (auto &p : parts) {
        ans.append(p);
    }
    return ans;
}

// Method 2: Using stringstream (formatting + multiple parts)
string buildWithStream(const vector<string>& parts) {
    ostringstream oss;
    for (int i = 0; i < parts.size(); ++i) {
        if (i) oss << ",";  // Add separator
        oss << parts[i];
    }
    return oss.str();
}

// Method 3: Manual char buffer (for very large strings, frequent concatenation, performance-critical)
string buildWithBuffer(const vector<string>& parts) {
    size_t total = 0;
    for (auto &p : parts) total += p.size();
    string result;
    result.resize(total);
    char *ptr = &result[0];
    for (auto &p : parts) {
        memcpy(ptr, p.data(), p.size());
        ptr += p.size();
    }
    return result;
}

int main() {
    vector<string> ps = {"Hello", "World", "This", "Is", "Test"};
    
    cout << "Method 1 (Reserve + Append): " << buildWithReserve(ps) << endl;
    cout << "Method 2 (Stringstream): " << buildWithStream(ps) << endl;
    cout << "Method 3 (Manual Buffer): " << buildWithBuffer(ps) << endl;
    
    return 0;
}

Output:

Method 1 (Reserve + Append): HelloWorldThisIsTest
Method 2 (Stringstream): Hello,World,This,Is,Test
Method 3 (Manual Buffer): HelloWorldThisIsTest

๐Ÿ” Detailed Analysis

Method 1: Reserve + Append

string buildWithReserve(const vector<string>& parts) {
    size_t totalLen = 0;
    for (auto &p : parts) totalLen += p.size();  // Calculate total length
    string ans;
    ans.reserve(totalLen);                       // Pre-allocate memory
    for (auto &p : parts) {
        ans.append(p);                           // Append without reallocation
    }
    return ans;
}

Key Points:

  • reserve() pre-allocates memory to avoid reallocations
  • append() is more efficient than += for large strings
  • Single memory allocation after calculating total size

Method 2: Stringstream

string buildWithStream(const vector<string>& parts) {
    ostringstream oss;                           // Output string stream
    for (int i = 0; i < parts.size(); ++i) {
        if (i) oss << ",";                       // Add separator (except first)
        oss << parts[i];                         // Stream insertion
    }
    return oss.str();                            // Convert to string
}

Key Points:

  • ostringstream handles memory management automatically
  • Easy to add formatting and separators
  • Type-safe conversions
  • Slightly more overhead than direct string operations

Method 3: Manual Buffer

string buildWithBuffer(const vector<string>& parts) {
    size_t total = 0;
    for (auto &p : parts) total += p.size();     // Calculate total size
    string result;
    result.resize(total);                       // Allocate exact size
    char *ptr = &result[0];                     // Get pointer to buffer
    for (auto &p : parts) {
        memcpy(ptr, p.data(), p.size());       // Direct memory copy
        ptr += p.size();                        // Move pointer
    }
    return result;
}

Key Points:

  • resize() allocates exact memory needed
  • memcpy() is fastest for copying memory blocks
  • Direct pointer manipulation for maximum performance
  • Requires careful memory management

๐ŸŽฏ When to Use Each Method

Use Reserve + Append When:

  • Building strings from multiple parts
  • You know the approximate final size
  • General-purpose string concatenation
  • Code readability is important

Use Stringstream When:

  • Complex formatting is required
  • Mixing different data types
  • Adding separators or special formatting
  • Type safety is important

Use Manual Buffer When:

  • Performance is critical
  • Very large strings (>1MB)
  • Frequent concatenation operations
  • Maximum control over memory is needed

๐Ÿšจ Common Pitfalls

1. Forgetting to Reserve

// BAD: Multiple reallocations
string result;
for (auto &part : parts) {
    result += part;  // May cause reallocation
}

// GOOD: Pre-allocate memory
string result;
result.reserve(totalSize);
for (auto &part : parts) {
    result += part;  // No reallocation
}

2. Inefficient String Concatenation

// BAD: Creates temporary strings
string result = "";
for (auto &part : parts) {
    result = result + part;  // Creates new string each time
}

// GOOD: Use append or +=
string result;
for (auto &part : parts) {
    result += part;  // In-place modification
}

3. Buffer Overflow in Manual Method

// BAD: No bounds checking
char *ptr = &result[0];
for (auto &p : parts) {
    memcpy(ptr, p.data(), p.size());  // Potential overflow
    ptr += p.size();
}

// GOOD: Calculate exact size first
size_t total = 0;
for (auto &p : parts) total += p.size();
result.resize(total);  // Ensure enough space

๐Ÿ“ˆ Performance Tips

1. Pre-calculate Size

// Calculate total size before building
size_t totalSize = 0;
for (auto &part : parts) {
    totalSize += part.size();
}

2. Use Reserve for Dynamic Growth

string result;
result.reserve(totalSize);  // Pre-allocate

3. Choose the Right Method

  • Small strings (<1KB): Any method works
  • Medium strings (1KB-1MB): Reserve + Append
  • Large strings (>1MB): Manual Buffer
  • Complex formatting: Stringstream

๐Ÿงช Benchmark Example

#include <chrono>
#include <iostream>
#include <string>
#include <vector>

void benchmarkStringBuilding() {
    vector<string> parts(1000, "HelloWorld");
    
    auto start = chrono::high_resolution_clock::now();
    string result1 = buildWithReserve(parts);
    auto end = chrono::high_resolution_clock::now();
    auto duration1 = chrono::duration_cast<chrono::microseconds>(end - start);
    
    start = chrono::high_resolution_clock::now();
    string result2 = buildWithStream(parts);
    end = chrono::high_resolution_clock::now();
    auto duration2 = chrono::duration_cast<chrono::microseconds>(end - start);
    
    start = chrono::high_resolution_clock::now();
    string result3 = buildWithBuffer(parts);
    end = chrono::high_resolution_clock::now();
    auto duration3 = chrono::duration_cast<chrono::microseconds>(end - start);
    
    cout << "Reserve + Append: " << duration1.count() << " ฮผs" << endl;
    cout << "Stringstream: " << duration2.count() << " ฮผs" << endl;
    cout << "Manual Buffer: " << duration3.count() << " ฮผs" << endl;
}

๐ŸŽฏ LeetCode Applications

Problem: String Compression

string compressString(string s) {
    if (s.empty()) return s;
    
    string result;
    result.reserve(s.size());  // Pre-allocate
    
    int count = 1;
    for (int i = 1; i < s.size(); i++) {
        if (s[i] == s[i-1]) {
            count++;
        } else {
            result += s[i-1];
            result += to_string(count);
            count = 1;
        }
    }
    result += s.back();
    result += to_string(count);
    
    return result.size() < s.size() ? result : s;
}

Problem: Reverse Words in String

string reverseWords(string s) {
    stringstream ss(s);
    string word, result;
    
    while (ss >> word) {
        if (!result.empty()) result = " " + result;
        result = word + result;
    }
    
    return result;
}

๐Ÿ’ก Key Takeaways

  1. Always reserve memory when you know the approximate size
  2. Use append() instead of + for large strings
  3. Choose the right method based on your requirements
  4. Profile your code to identify bottlenecks
  5. Consider memory allocation patterns in your design