Building a Custom String Class in C++

A custom string class typically wraps a dynamically alocated character array along with size and capacity tracking. The following implementation lives inside a dedicated namespace to avoid collisions with the standard library.

namespace custom
{
    class string
    {
    private:
        char* _data;
        size_t _len;
        size_t _cap;
    };
}

The private members mirror a typical vector-like layout: a pointer to the heap-allocated buffer, the current length (number of characters excluding the null terminator), and the total capacity.

Construction and Destruction

string(const char* str = "")
{
    _len = std::strlen(str);
    _cap = _len;
    _data = new char[_cap + 1];
    std::memcpy(_data, str, _len + 1);
}

~string()
{
    delete[] _data;
    _data = nullptr;
    _len = _cap = 0;
}

The constructor allocates one extra byte for the null terminator. The destructor releases the owned memory and resets the members.

Element Access and Size

const char* c_str() const { return _data; }
size_t size() const { return _len; }

char& operator[](size_t pos)
{
    assert(pos <= _len);
    return _data[pos];
}
const char& operator[](size_t pos) const
{
    assert(pos <= _len);
    return _data[pos];
}

Iterators

typedef char* iterator;
typedef const char* const_iterator;

iterator begin() { return _data; }
iterator end()   { return _data + _len; }
const_iterator begin() const { return _data; }
const_iterator end()   const { return _data + _len; }

Capacity and Modification

reserve ensures at least the requested amount of memory is availbale:

void reserve(size_t n)
{
    if (n > _cap)
    {
        char* tmp = new char[n + 1];
        std::memcpy(tmp, _data, _len);
        delete[] _data;
        _data = tmp;
        _cap = n;
    }
}

Appending a single cahracter or a C-string:

void push_back(char c)
{
    if (_len == _cap)
        reserve(_cap == 0 ? 4 : _cap * 2);
    _data[_len++] = c;
    _data[_len] = '\0';
}

string& append(const char* s)
{
    size_t extra = std::strlen(s);
    if (_len + extra > _cap)
        reserve(_len + extra);
    std::memcpy(_data + _len, s, extra + 1);
    _len += extra;
    return *this;
}

string& operator+=(char c)        { push_back(c); return *this; }
string& operator+=(const char* s) { append(s); return *this; }

Insert and Erase

void insert(size_t pos, size_t count, char c)
{
    assert(pos <= _len);
    if (_len + count > _cap)
        reserve(_len + count);
    size_t end = _len;
    while (end >= pos && end != npos)
    {
        _data[end + count] = _data[end];
        --end;
    }
    for (size_t i = pos; i < pos + count; ++i)
        _data[i] = c;
    _len += count;
}

void insert(size_t pos, const char* str)
{
    assert(pos <= _len);
    size_t extra = std::strlen(str);
    if (_len + extra > _cap)
        reserve(_len + extra);
    size_t end = _len;
    while (end >= pos && end != npos)
    {
        _data[end + extra] = _data[end];
        --end;
    }
    for (size_t i = 0; i < extra; ++i)
        _data[pos + i] = str[i];
    _len += extra;
}

void erase(size_t pos, size_t len = npos)
{
    assert(pos <= _len);
    if (pos + len >= _len || len == npos)
    {
        _data[pos] = '\0';
        _len = pos;
    }
    else
    {
        size_t end = pos + len;
        while (end <= _len)
            _data[pos++] = _data[end++];
        _len -= len;
    }
}

Find and Substring

size_t find(char c, size_t pos = 0)
{
    assert(pos <= _len);
    for (size_t i = pos; i < _len; ++i)
        if (_data[i] == c)
            return i;
    return npos;
}

string substr(size_t pos = 0, size_t len = npos)
{
    assert(pos <= _len);
    size_t n = len;
    if (pos + len >= _len || len == npos)
        n = _len - pos;
    string tmp;
    tmp.reserve(n);
    for (size_t i = 0; i < n; ++i)
        tmp._data[i] = _data[pos + i];
    tmp._len = n;
    tmp._data[n] = '\0';
    return tmp;
}

Resize and Clear

void resize(size_t n, char ch = '\0')
{
    if (n < _len)
    {
        _data[n] = '\0';
        _len = n;
    }
    else
    {
        reserve(n);
        for (size_t i = _len; i < n; ++i)
            _data[i] = ch;
        _data[n] = '\0';
        _len = n;
    }
}

void clear()
{
    _data[0] = '\0';
    _len = 0;
}

Comparison Operators

bool operator<(const string& rhs) const
{
    size_t i1 = 0, i2 = 0;
    while (i1 < _len && i2 < rhs._len)
    {
        if (_data[i1] < rhs._data[i2])
            return true;
        if (_data[i1] > rhs._data[i2])
            return false;
        ++i1; ++i2;
    }
    return _len < rhs._len;
}

bool operator==(const string& rhs) const
{
    return _len == rhs._len && std::memcmp(_data, rhs._data, _len) == 0;
}

bool operator!=(const string& rhs) const { return !(*this == rhs); }
bool operator<=(const string& rhs) const { return (*this < rhs) || (*this == rhs); }
bool operator> (const string& rhs) const { return !(*this <= rhs); }
bool operator>=(const string& rhs) const { return !(*this < rhs); }

Stream Operators

These are implemented as free functions inside the custom namespace without requiring friend declarations:

std::ostream& operator<<(std::ostream& out, const string& s)
{
    for (size_t i = 0; i < s.size(); ++i)
        out << s[i];
    return out;
}

std::istream& operator>>(std::istream& in, string& s)
{
    s.clear();
    char ch;
    ch = in.get();
    // skip leading whitespace
    while (ch == ' ' || ch == '\n')
        ch = in.get();

    char buff[128];
    size_t i = 0;
    while (ch != ' ' && ch != '\n' && ch != EOF)
    {
        buff[i++] = ch;
        if (i == 127)
        {
            buff[127] = '\0';
            s += buff;
            i = 0;
        }
        ch = in.get();
    }
    if (i > 0)
    {
        buff[i] = '\0';
        s += buff;
    }
    return in;
}

Static Member npos

class string
{
public:
    const static size_t npos;
    // ...
};
const size_t string::npos = static_cast<size_t>(-1);

Deep Copy vs. Shallow Copy

When a copy constructor is not explicitly defined, the compiler generates a memberwise copy. For a class managing a resource like a dynamic array, this leads to two objects sharing the same buffer. Destruction of one object frees the memory, leaving the other with a dangling pointer—this is a shallow copy.

To avoid this, a deep copy must allocate a separate buffer:

string(const string& other)
{
    size_t len = std::strlen(other._data);
    _data = new char[len + 1];
    std::memcpy(_data, other._data, len + 1);
    _len = other._len;
    _cap = other._cap;
}

For the assignment operator, the classic approach checks for self-assignment, releases the old buffer, and copies the new one:

string& operator=(const string& other)
{
    if (this != &other)
    {
        delete[] _data;
        _data = new char[std::strlen(other._data) + 1];
        std::memcpy(_data, other._data, std::strlen(other._data) + 1);
        _len = other._len;
        _cap = other._cap;
    }
    return *this;
}

A modern idiom leverages the copy constructor and swap:

string& operator=(string tmp)
{
    if (this != &tmp)
    {
        std::swap(_len, tmp._len);
        std::swap(_cap, tmp._cap);
        std::swap(_data, tmp._data);
    }
    return *this;
}

Copy-on-Write (Brief Overview)

Deep copy can be wasteful if the copied string is never modified. An alternative is to share the buffer and add a reference count. When a write occurs, if the reference count is greater than one, a deep copy is made first. This technique is used by some standard library implementations (e.g., GCC’s old std::string). Visual Studio’s implementation traditionally used deep copy.

Complete Implementation in One Place

#pragma once
#define _CRT_SECURE_NO_WARNINGS 1
#include <iostream>
#include <cstring>
#include <cassert>
#include <algorithm>

namespace custom
{
    class string
    {
    public:
        typedef char* iterator;
        typedef const char* const_iterator;

        string(const char* str = "")
        {
            _len = std::strlen(str);
            _cap = _len;
            _data = new char[_cap + 1];
            std::memcpy(_data, str, _len + 1);
        }

        string(const string& other)
        {
            size_t len = std::strlen(other._data);
            _data = new char[len + 1];
            std::memcpy(_data, other._data, len + 1);
            _len = other._len;
            _cap = other._cap;
        }

        string& operator=(string tmp)
        {
            if (this != &tmp)
            {
                std::swap(_len, tmp._len);
                std::swap(_cap, tmp._cap);
                std::swap(_data, tmp._data);
            }
            return *this;
        }

        ~string()
        {
            delete[] _data;
            _data = nullptr;
            _len = _cap = 0;
        }

        const char* c_str() const { return _data; }
        size_t size() const { return _len; }

        char& operator[](size_t pos)
        {
            assert(pos <= _len);
            return _data[pos];
        }
        const char& operator[](size_t pos) const
        {
            assert(pos <= _len);
            return _data[pos];
        }

        iterator begin() { return _data; }
        iterator end()   { return _data + _len; }
        const_iterator begin() const { return _data; }
        const_iterator end()   const { return _data + _len; }

        void reserve(size_t n)
        {
            if (n > _cap)
            {
                char* tmp = new char[n + 1];
                std::memcpy(tmp, _data, _len);
                delete[] _data;
                _data = tmp;
                _cap = n;
            }
        }

        void push_back(char c)
        {
            if (_len == _cap)
                reserve(_cap == 0 ? 4 : _cap * 2);
            _data[_len++] = c;
            _data[_len] = '\0';
        }

        string& append(const char* s)
        {
            size_t extra = std::strlen(s);
            if (_len + extra > _cap)
                reserve(_len + extra);
            std::memcpy(_data + _len, s, extra + 1);
            _len += extra;
            return *this;
        }

        string& operator+=(char c)        { push_back(c); return *this; }
        string& operator+=(const char* s) { append(s); return *this; }

        void insert(size_t pos, size_t count, char c)
        {
            assert(pos <= _len);
            if (_len + count > _cap)
                reserve(_len + count);
            size_t end = _len;
            while (end >= pos && end != npos)
            {
                _data[end + count] = _data[end];
                --end;
            }
            for (size_t i = pos; i < pos + count; ++i)
                _data[i] = c;
            _len += count;
        }

        void insert(size_t pos, const char* str)
        {
            assert(pos <= _len);
            size_t extra = std::strlen(str);
            if (_len + extra > _cap)
                reserve(_len + extra);
            size_t end = _len;
            while (end >= pos && end != npos)
            {
                _data[end + extra] = _data[end];
                --end;
            }
            for (size_t i = 0; i < extra; ++i)
                _data[pos + i] = str[i];
            _len += extra;
        }

        void erase(size_t pos, size_t len = npos)
        {
            assert(pos <= _len);
            if (pos + len >= _len || len == npos)
            {
                _data[pos] = '\0';
                _len = pos;
            }
            else
            {
                size_t end = pos + len;
                while (end <= _len)
                    _data[pos++] = _data[end++];
                _len -= len;
            }
        }

        size_t find(char c, size_t pos = 0) const
        {
            assert(pos <= _len);
            for (size_t i = pos; i < _len; ++i)
                if (_data[i] == c)
                    return i;
            return npos;
        }

        string substr(size_t pos = 0, size_t len = npos) const
        {
            assert(pos <= _len);
            size_t n = len;
            if (pos + len >= _len || len == npos)
                n = _len - pos;
            string tmp;
            tmp.reserve(n);
            for (size_t i = 0; i < n; ++i)
                tmp._data[i] = _data[pos + i];
            tmp._len = n;
            tmp._data[n] = '\0';
            return tmp;
        }

        void resize(size_t n, char ch = '\0')
        {
            if (n < _len)
            {
                _data[n] = '\0';
                _len = n;
            }
            else
            {
                reserve(n);
                for (size_t i = _len; i < n; ++i)
                    _data[i] = ch;
                _data[n] = '\0';
                _len = n;
            }
        }

        void clear()
        {
            _data[0] = '\0';
            _len = 0;
        }

        bool operator<(const string& rhs) const
        {
            size_t i1 = 0, i2 = 0;
            while (i1 < _len && i2 < rhs._len)
            {
                if (_data[i1] < rhs._data[i2]) return true;
                if (_data[i1] > rhs._data[i2]) return false;
                ++i1; ++i2;
            }
            return _len < rhs._len;
        }

        bool operator==(const string& rhs) const
        {
            return _len == rhs._len && std::memcmp(_data, rhs._data, _len) == 0;
        }

        bool operator!=(const string& rhs) const { return !(*this == rhs); }
        bool operator<=(const string& rhs) const { return (*this < rhs) || (*this == rhs); }
        bool operator> (const string& rhs) const { return !(*this <= rhs); }
        bool operator>=(const string& rhs) const { return !(*this < rhs); }

    private:
        char* _data;
        size_t _len;
        size_t _cap;

    public:
        const static size_t npos;
    };

    const size_t string::npos = static_cast<size_t>(-1);

    std::ostream& operator<<(std::ostream& out, const string& s)
    {
        for (size_t i = 0; i < s.size(); ++i)
            out << s[i];
        return out;
    }

    std::istream& operator>>(std::istream& in, string& s)
    {
        s.clear();
        char ch = in.get();
        while (ch == ' ' || ch == '\n')
            ch = in.get();
        char buff[128];
        size_t i = 0;
        while (ch != ' ' && ch != '\n' && ch != EOF)
        {
            buff[i++] = ch;
            if (i == 127)
            {
                buff[127] = '\0';
                s += buff;
                i = 0;
            }
            ch = in.get();
        }
        if (i > 0)
        {
            buff[i] = '\0';
            s += buff;
        }
        return in;
    }
}

Tags: C++ string deep copy shallow copy copy-on-write

Posted on Fri, 08 May 2026 01:30:05 +0000 by BRUUUCE