
1023 lines
38 KiB
Raw Normal View History

2024-07-02 23:47:26 +08:00
/** @file testDocument.cxx
** Unit Tests for Scintilla internal data structures
#include <cstddef>
#include <cstring>
#include <stdexcept>
#include <string_view>
#include <vector>
#include <set>
#include <optional>
#include <algorithm>
#include <memory>
#include <iostream>
#include <fstream>
#include <iomanip>
#include "ScintillaTypes.h"
#include "ILoader.h"
#include "ILexer.h"
#include "Debugging.h"
#include "CharacterCategoryMap.h"
#include "Position.h"
#include "SplitVector.h"
#include "Partitioning.h"
#include "RunStyles.h"
#include "CellBuffer.h"
#include "CharClassify.h"
#include "Decoration.h"
#include "CaseFolder.h"
#include "Document.h"
#include "catch.hpp"
using namespace Scintilla;
using namespace Scintilla::Internal;
// set global locale to pass std::regex related tests
// see
struct GlobalLocaleInitializer {
GlobalLocaleInitializer() {
try {
} catch (...) {}
} globalLocaleInitializer;
// Test Document.
struct Folding {
int from;
int to;
int length;
// Table of case folding for non-ASCII bytes in Windows Latin code page 1252
const Folding foldings1252[] = {
{0x8a, 0x9a, 0x01},
{0x8c, 0x9c, 0x01},
{0x8e, 0x9e, 0x01},
{0x9f, 0xff, 0x01},
{0xc0, 0xe0, 0x17},
{0xd8, 0xf8, 0x07},
// Table of case folding for non-ASCII bytes in Windows Russian code page 1251
const Folding foldings1251[] = {
{0x80, 0x90, 0x01},
{0x81, 0x83, 0x01},
{0x8a, 0x9a, 0x01},
{0x8c, 0x9c, 0x04},
{0xa1, 0xa2, 0x01},
{0xa3, 0xbc, 0x01},
{0xa5, 0xb4, 0x01},
{0xa8, 0xb8, 0x01},
{0xaa, 0xba, 0x01},
{0xaf, 0xbf, 0x01},
{0xb2, 0xb3, 0x01},
{0xbd, 0xbe, 0x01},
{0xc0, 0xe0, 0x20},
std::string ReadFile(const std::string &path) {
std::ifstream ifs(path, std::ios::binary);
std::string content((std::istreambuf_iterator<char>(ifs)),
return content;
struct Match {
Sci::Position location = 0;
Sci::Position length = 0;
constexpr Match() = default;
constexpr Match(Sci::Position location_, Sci::Position length_=0) : location(location_), length(length_) {
constexpr bool operator==(const Match &other) const {
return location == other.location && length == other.length;
std::ostream &operator << (std::ostream &os, Match const &value) {
os << value.location << "," << value.length;
return os;
struct DocPlus {
Document document;
DocPlus(std::string_view svInitial, int codePage) : document(DocumentOption::Default) {
document.InsertString(0, svInitial);
void SetCodePage(int codePage) {
if (codePage == CpUtf8) {
} else {
// This case folder will not handle many DBCS cases. Scintilla uses platform-specific code for DBCS
// case folding which can not easily be inserted in platform-independent tests.
std::unique_ptr<CaseFolderTable> pcft = std::make_unique<CaseFolderTable>();
void SetSBCSFoldings(const Folding *foldings, size_t length) {
std::unique_ptr<CaseFolderTable> pcft = std::make_unique<CaseFolderTable>();
for (size_t block = 0; block < length; block++) {
for (int fold = 0; fold < foldings[block].length; fold++) {
pcft->SetTranslation(foldings[block].from + fold, foldings[block].to + fold);
Sci::Position FindNeedle(std::string_view needle, FindOption options, Sci::Position *length) {
assert(*length == static_cast<Sci::Position>(needle.length()));
return document.FindText(0, document.Length(),, options, length);
Sci::Position FindNeedleReverse(std::string_view needle, FindOption options, Sci::Position *length) {
assert(*length == static_cast<Sci::Position>(needle.length()));
return document.FindText(document.Length(), 0,, options, length);
Match FindString(Sci::Position minPos, Sci::Position maxPos, std::string_view needle, FindOption flags) {
Sci::Position lengthFinding = needle.length();
const Sci::Position location = document.FindText(minPos, maxPos,, flags, &lengthFinding);
return { location, lengthFinding };
std::string Substitute(std::string_view substituteText) {
Sci::Position lengthsubstitute = substituteText.length();
std::string substituted = document.SubstituteByPosition(, &lengthsubstitute);
assert(lengthsubstitute == static_cast<Sci::Position>(substituted.length()));
return substituted;
void MoveGap(Sci::Position gapNew) {
// Move gap to gapNew by inserting
document.InsertString(gapNew, "!", 1);
// Remove insertion
document.DeleteChars(gapNew, 1);
[[nodiscard]] std::string Contents() const {
const Sci::Position length = document.Length();
std::string contents(length, 0);
document.GetCharRange(, 0, length);
return contents;
void TimeTrace(std::string_view sv, const Catch::Timer &tikka) {
std::cout << sv << std::setw(5) << tikka.getElapsedMilliseconds() << " milliseconds" << std::endl;
TEST_CASE("Document") {
constexpr std::string_view sText = "Scintilla";
constexpr Sci::Position sLength = sText.length();
constexpr FindOption rePosix = FindOption::RegExp | FindOption::Posix;
constexpr FindOption reCxx11 = FindOption::RegExp | FindOption::Cxx11RegEx;
SECTION("InsertOneLine") {
DocPlus doc("", 0);
const Sci::Position length = doc.document.InsertString(0, sText);
REQUIRE(sLength == doc.document.Length());
REQUIRE(length == sLength);
REQUIRE(1 == doc.document.LinesTotal());
REQUIRE(0 == doc.document.LineStart(0));
REQUIRE(0 == doc.document.LineFromPosition(0));
REQUIRE(0 == doc.document.LineStartPosition(0));
REQUIRE(sLength == doc.document.LineStart(1));
REQUIRE(0 == doc.document.LineFromPosition(static_cast<int>(sLength)));
// Search ranges are from first argument to just before second argument
// Arguments are expected to be at character boundaries and will be tweaked if
// part way through a character.
SECTION("SearchInLatin") {
DocPlus doc("abcde", 0); // a b c d e
constexpr std::string_view finding = "b";
Sci::Position lengthFinding = finding.length();
Sci::Position location = doc.FindNeedle(finding, FindOption::MatchCase, &lengthFinding);
REQUIRE(location == 1);
location = doc.FindNeedleReverse(finding, FindOption::MatchCase, &lengthFinding);
REQUIRE(location == 1);
location = doc.document.FindText(0, 2,, FindOption::MatchCase, &lengthFinding);
REQUIRE(location == 1);
location = doc.document.FindText(0, 1,, FindOption::MatchCase, &lengthFinding);
REQUIRE(location == -1);
SECTION("SearchInBothSegments") {
DocPlus doc("ab-ab", 0); // a b - a b
constexpr std::string_view finding = "ab";
for (int gapPos = 0; gapPos <= 5; gapPos++) {
Sci::Position lengthFinding = finding.length();
Sci::Position location = doc.document.FindText(0, doc.document.Length(),, FindOption::MatchCase, &lengthFinding);
REQUIRE(location == 0);
location = doc.document.FindText(2, doc.document.Length(),, FindOption::MatchCase, &lengthFinding);
REQUIRE(location == 3);
SECTION("InsensitiveSearchInLatin") {
DocPlus doc("abcde", 0); // a b c d e
constexpr std::string_view finding = "B";
Sci::Position lengthFinding = finding.length();
Sci::Position location = doc.FindNeedle(finding, FindOption::None, &lengthFinding);
REQUIRE(location == 1);
location = doc.FindNeedleReverse(finding, FindOption::None, &lengthFinding);
REQUIRE(location == 1);
location = doc.document.FindText(0, 2,, FindOption::None, &lengthFinding);
REQUIRE(location == 1);
location = doc.document.FindText(0, 1,, FindOption::None, &lengthFinding);
REQUIRE(location == -1);
SECTION("InsensitiveSearchIn1252") {
// In Windows Latin, code page 1252, C6 is AE and E6 is ae
DocPlus doc("tru\xc6s\xe6t", 0); // t r u AE s ae t
doc.SetSBCSFoldings(foldings1252, std::size(foldings1252));
// Search for upper-case AE
std::string_view finding = "\xc6";
Sci::Position lengthFinding = finding.length();
Sci::Position location = doc.FindNeedle(finding, FindOption::None, &lengthFinding);
REQUIRE(location == 3);
location = doc.document.FindText(4, doc.document.Length(),, FindOption::None, &lengthFinding);
REQUIRE(location == 5);
location = doc.FindNeedleReverse(finding, FindOption::None, &lengthFinding);
REQUIRE(location == 5);
// Search for lower-case ae
finding = "\xe6";
location = doc.FindNeedle(finding, FindOption::None, &lengthFinding);
REQUIRE(location == 3);
location = doc.document.FindText(4, doc.document.Length(),, FindOption::None, &lengthFinding);
REQUIRE(location == 5);
location = doc.FindNeedleReverse(finding, FindOption::None, &lengthFinding);
REQUIRE(location == 5);
SECTION("Search2InLatin") {
// Checks that the initial '_' and final 'f' are ignored since they are outside the search bounds
DocPlus doc("_abcdef", 0); // _ a b c d e f
constexpr std::string_view finding = "cd";
Sci::Position lengthFinding = finding.length();
const size_t docLength = doc.document.Length() - 1;
Sci::Position location = doc.document.FindText(1, docLength,, FindOption::MatchCase, &lengthFinding);
REQUIRE(location == 3);
location = doc.document.FindText(docLength, 1,, FindOption::MatchCase, &lengthFinding);
REQUIRE(location == 3);
location = doc.document.FindText(docLength, 1, "bc", FindOption::MatchCase, &lengthFinding);
REQUIRE(location == 2);
location = doc.document.FindText(docLength, 1, "ab", FindOption::MatchCase, &lengthFinding);
REQUIRE(location == 1);
location = doc.document.FindText(docLength, 1, "de", FindOption::MatchCase, &lengthFinding);
REQUIRE(location == 4);
location = doc.document.FindText(docLength, 1, "_a", FindOption::MatchCase, &lengthFinding);
REQUIRE(location == -1);
location = doc.document.FindText(docLength, 1, "ef", FindOption::MatchCase, &lengthFinding);
REQUIRE(location == -1);
lengthFinding = 3;
location = doc.document.FindText(docLength, 1, "cde", FindOption::MatchCase, &lengthFinding);
REQUIRE(location == 3);
SECTION("SearchInUTF8") {
DocPlus doc("ab\xCE\x93" "d", CpUtf8); // a b gamma d
constexpr std::string_view finding = "b";
Sci::Position lengthFinding = finding.length();
Sci::Position location = doc.FindNeedle(finding, FindOption::MatchCase, &lengthFinding);
REQUIRE(location == 1);
location = doc.document.FindText(doc.document.Length(), 0,, FindOption::MatchCase, &lengthFinding);
REQUIRE(location == 1);
location = doc.document.FindText(0, 1,, FindOption::MatchCase, &lengthFinding);
REQUIRE(location == -1);
// Check doesn't try to follow a lead-byte past the search end
constexpr std::string_view findingUTF = "\xCE\x93";
lengthFinding = findingUTF.length();
location = doc.document.FindText(0, 4,, FindOption::MatchCase, &lengthFinding);
REQUIRE(location == 2);
// Only succeeds as 3 is partway through character so adjusted to 4
location = doc.document.FindText(0, 3,, FindOption::MatchCase, &lengthFinding);
REQUIRE(location == 2);
location = doc.document.FindText(0, 2,, FindOption::MatchCase, &lengthFinding);
REQUIRE(location == -1);
SECTION("InsensitiveSearchInUTF8") {
DocPlus doc("ab\xCE\x93" "d", CpUtf8); // a b gamma d
constexpr std::string_view finding = "b";
Sci::Position lengthFinding = finding.length();
Sci::Position location = doc.FindNeedle(finding, FindOption::None, &lengthFinding);
REQUIRE(location == 1);
location = doc.document.FindText(doc.document.Length(), 0,, FindOption::None, &lengthFinding);
REQUIRE(location == 1);
constexpr std::string_view findingUTF = "\xCE\x93";
lengthFinding = findingUTF.length();
location = doc.FindNeedle(findingUTF, FindOption::None, &lengthFinding);
REQUIRE(location == 2);
location = doc.document.FindText(doc.document.Length(), 0,, FindOption::None, &lengthFinding);
REQUIRE(location == 2);
location = doc.document.FindText(0, 4,, FindOption::None, &lengthFinding);
REQUIRE(location == 2);
// Only succeeds as 3 is partway through character so adjusted to 4
location = doc.document.FindText(0, 3,, FindOption::None, &lengthFinding);
REQUIRE(location == 2);
location = doc.document.FindText(0, 2,, FindOption::None, &lengthFinding);
REQUIRE(location == -1);
SECTION("SearchInShiftJIS") {
// {CJK UNIFIED IDEOGRAPH-9955} is two bytes: {0xE9, 'b'} in Shift-JIS
// The 'b' can be incorrectly matched by the search string 'b' when the search
// does not iterate the text correctly.
DocPlus doc("ab\xe9" "b ", 932); // a b {CJK UNIFIED IDEOGRAPH-9955} {space}
constexpr std::string_view finding = "b";
// Search forwards
Sci::Position lengthFinding = finding.length();
Sci::Position location = doc.FindNeedle(finding, FindOption::MatchCase, &lengthFinding);
REQUIRE(location == 1);
// Search backwards
lengthFinding = finding.length();
location = doc.document.FindText(doc.document.Length(), 0,, FindOption::MatchCase, &lengthFinding);
REQUIRE(location == 1);
SECTION("InsensitiveSearchInShiftJIS") {
// {CJK UNIFIED IDEOGRAPH-9955} is two bytes: {0xE9, 'b'} in Shift-JIS
// The 'b' can be incorrectly matched by the search string 'b' when the search
// does not iterate the text correctly.
DocPlus doc("ab\xe9" "b ", 932); // a b {CJK UNIFIED IDEOGRAPH-9955} {space}
constexpr std::string_view finding = "b";
// Search forwards
Sci::Position lengthFinding = finding.length();
Sci::Position location = doc.FindNeedle(finding, FindOption::None, &lengthFinding);
REQUIRE(location == 1);
// Search backwards
lengthFinding = finding.length();
location = doc.document.FindText(doc.document.Length(), 0,, FindOption::None, &lengthFinding);
REQUIRE(location == 1);
constexpr std::string_view finding932 = "\xe9" "b";
// Search forwards
lengthFinding = finding932.length();
location = doc.FindNeedle(finding932, FindOption::None, &lengthFinding);
REQUIRE(location == 2);
// Search backwards
lengthFinding = finding932.length();
location = doc.document.FindText(doc.document.Length(), 0,, FindOption::None, &lengthFinding);
REQUIRE(location == 2);
location = doc.document.FindText(0, 3,, FindOption::None, &lengthFinding);
REQUIRE(location == 2);
location = doc.document.FindText(0, 2,, FindOption::None, &lengthFinding);
REQUIRE(location == -1);
// Can not test case mapping of double byte text as folder available here does not implement this
SECTION("GetCharacterAndWidth DBCS") {
Document doc(DocumentOption::Default);
REQUIRE(doc.CodePage() == 932);
const Sci::Position length = doc.InsertString(0, "H\x84\xff\x84H", 5);
// This text is invalid in code page 932.
// A reasonable interpretation is as 4 items: 2 characters and 2 character fragments
// The last item is a 2-byte CYRILLIC CAPITAL LETTER ZE character
// H [84] [FF] ZE
REQUIRE(5 == length);
REQUIRE(5 == doc.Length());
Sci::Position width = 0;
// test GetCharacterAndWidth()
int ch = doc.GetCharacterAndWidth(0, &width);
REQUIRE(width == 1);
REQUIRE(ch == 'H');
ch = doc.GetCharacterAndWidth(1, &width);
REQUIRE(width == 1);
REQUIRE(ch == 0x84);
width = 0;
ch = doc.GetCharacterAndWidth(2, &width);
REQUIRE(width == 1);
REQUIRE(ch == 0xff);
width = 0;
ch = doc.GetCharacterAndWidth(3, &width);
REQUIRE(width == 2);
REQUIRE(ch == 0x8448);
// test LenChar()
width = doc.LenChar(0);
REQUIRE(width == 1);
width = doc.LenChar(1);
REQUIRE(width == 1);
width = doc.LenChar(2);
REQUIRE(width == 1);
width = doc.LenChar(3);
REQUIRE(width == 2);
// test MovePositionOutsideChar()
Sci::Position pos = doc.MovePositionOutsideChar(1, 1);
REQUIRE(pos == 1);
pos = doc.MovePositionOutsideChar(2, 1);
REQUIRE(pos == 2);
pos = doc.MovePositionOutsideChar(3, 1);
REQUIRE(pos == 3);
pos = doc.MovePositionOutsideChar(4, 1);
REQUIRE(pos == 5);
pos = doc.MovePositionOutsideChar(1, -1);
REQUIRE(pos == 1);
pos = doc.MovePositionOutsideChar(2, -1);
REQUIRE(pos == 2);
pos = doc.MovePositionOutsideChar(3, -1);
REQUIRE(pos == 3);
pos = doc.MovePositionOutsideChar(4, -1);
REQUIRE(pos == 3);
// test NextPosition()
pos = doc.NextPosition(0, 1);
REQUIRE(pos == 1);
pos = doc.NextPosition(1, 1);
REQUIRE(pos == 2);
pos = doc.NextPosition(2, 1);
REQUIRE(pos == 3);
pos = doc.NextPosition(3, 1);
REQUIRE(pos == 5);
pos = doc.NextPosition(1, -1);
REQUIRE(pos == 0);
pos = doc.NextPosition(2, -1);
REQUIRE(pos == 1);
pos = doc.NextPosition(3, -1);
REQUIRE(pos == 2);
pos = doc.NextPosition(5, -1);
REQUIRE(pos == 3);
SECTION("NextPosition Valid DBCS") {
Document doc(DocumentOption::Default);
REQUIRE(doc.CodePage() == 932);
// This text is valid in code page 932.
// O p e n = U+958B Ku ( O ) U+7DE8 -
// U+958B open
// U+7DE8 arrange
constexpr std::string_view japaneseText = "Open=\x8aJ\x82\xad(O)\x95\xd2-";
const Sci::Position length = doc.InsertString(0, japaneseText);
REQUIRE(length == 15);
// Forwards
REQUIRE(doc.NextPosition( 0, 1) == 1);
REQUIRE(doc.NextPosition( 1, 1) == 2);
REQUIRE(doc.NextPosition( 2, 1) == 3);
REQUIRE(doc.NextPosition( 3, 1) == 4);
REQUIRE(doc.NextPosition( 4, 1) == 5);
REQUIRE(doc.NextPosition( 5, 1) == 7); // Double byte
REQUIRE(doc.NextPosition( 6, 1) == 7);
REQUIRE(doc.NextPosition( 7, 1) == 9); // Double byte
REQUIRE(doc.NextPosition( 8, 1) == 9);
REQUIRE(doc.NextPosition( 9, 1) == 10);
REQUIRE(doc.NextPosition(10, 1) == 11);
REQUIRE(doc.NextPosition(11, 1) == 12);
REQUIRE(doc.NextPosition(12, 1) == 14); // Double byte
REQUIRE(doc.NextPosition(13, 1) == 14);
REQUIRE(doc.NextPosition(14, 1) == 15);
REQUIRE(doc.NextPosition(15, 1) == 15);
// Backwards
REQUIRE(doc.NextPosition( 0, -1) == 0);
REQUIRE(doc.NextPosition( 1, -1) == 0);
REQUIRE(doc.NextPosition( 2, -1) == 1);
REQUIRE(doc.NextPosition( 3, -1) == 2);
REQUIRE(doc.NextPosition( 4, -1) == 3);
REQUIRE(doc.NextPosition( 5, -1) == 4);
REQUIRE(doc.NextPosition( 6, -1) == 5); // Double byte
REQUIRE(doc.NextPosition( 7, -1) == 5);
REQUIRE(doc.NextPosition( 8, -1) == 7); // Double byte
REQUIRE(doc.NextPosition( 9, -1) == 7);
REQUIRE(doc.NextPosition(10, -1) == 9);
REQUIRE(doc.NextPosition(11, -1) == 10);
REQUIRE(doc.NextPosition(12, -1) == 11);
REQUIRE(doc.NextPosition(13, -1) == 12); // Double byte
REQUIRE(doc.NextPosition(14, -1) == 12);
REQUIRE(doc.NextPosition(15, -1) == 14);
SECTION("RegexSearchAndSubstitution") {
DocPlus doc("\n\r\r\n 1a\xCE\x93z \n\r\r\n 2b\xCE\x93y \n\r\r\n", CpUtf8);// 1a gamma z 2b gamma y
const Sci::Position docLength = doc.document.Length();
Match match;
constexpr std::string_view finding = R"(\d+(\w+))";
constexpr std::string_view substituteText = R"(\t\1\n)";
constexpr std::string_view longest = "\\w+";
std::string substituted;
match = doc.FindString(0, docLength, finding, rePosix);
REQUIRE(match == Match(5, 5));
substituted = doc.Substitute(substituteText);
REQUIRE(substituted == "\ta\xCE\x93z\n");
match = doc.FindString(docLength, 0, finding, rePosix);
REQUIRE(match == Match(16, 5));
substituted = doc.Substitute(substituteText);
REQUIRE(substituted == "\tb\xCE\x93y\n");
match = doc.FindString(docLength, 0, longest, rePosix);
REQUIRE(match == Match(16, 5));
#ifndef NO_CXX11_REGEX
match = doc.FindString(0, docLength, finding, reCxx11);
REQUIRE(match == Match(5, 5));
substituted = doc.Substitute(substituteText);
REQUIRE(substituted == "\ta\xCE\x93z\n");
match = doc.FindString(docLength, 0, finding, reCxx11);
REQUIRE(match == Match(16, 5));
substituted = doc.Substitute(substituteText);
REQUIRE(substituted == "\tb\xCE\x93y\n");
match = doc.FindString(docLength, 0, longest, reCxx11);
REQUIRE(match == Match(16, 5));
SECTION("RegexAssertion") {
DocPlus doc("ab cd ef\r\ngh ij kl", CpUtf8);
const Sci::Position docLength = doc.document.Length();
Match match;
constexpr std::string_view findingBOL = "^";
match = doc.FindString(0, docLength, findingBOL, rePosix);
REQUIRE(match == Match(0));
match = doc.FindString(1, docLength, findingBOL, rePosix);
REQUIRE(match == Match(10));
match = doc.FindString(docLength, 0, findingBOL, rePosix);
REQUIRE(match == Match(10));
match = doc.FindString(docLength - 1, 0, findingBOL, rePosix);
REQUIRE(match == Match(10));
#ifndef NO_CXX11_REGEX
match = doc.FindString(0, docLength, findingBOL, reCxx11);
REQUIRE(match == Match(0));
match = doc.FindString(1, docLength, findingBOL, reCxx11);
REQUIRE(match == Match(10));
match = doc.FindString(docLength, 0, findingBOL, reCxx11);
REQUIRE(match == Match(10));
match = doc.FindString(docLength - 1, 0, findingBOL, reCxx11);
REQUIRE(match == Match(10));
constexpr std::string_view findingEOL = "$";
match = doc.FindString(0, docLength, findingEOL, rePosix);
REQUIRE(match == Match(8));
match = doc.FindString(1, docLength, findingEOL, rePosix);
REQUIRE(match == Match(8));
match = doc.FindString(docLength, 0, findingEOL, rePosix);
REQUIRE(match == Match(18));
match = doc.FindString(docLength - 1, 0, findingEOL, rePosix);
REQUIRE(match == Match(8));
#if !defined(NO_CXX11_REGEX) && !defined(_LIBCPP_VERSION)
match = doc.FindString(0, docLength, findingEOL, reCxx11);
REQUIRE(match == Match(8));
match = doc.FindString(1, docLength, findingEOL, reCxx11);
REQUIRE(match == Match(8));
match = doc.FindString(docLength, 0, findingEOL, reCxx11);
REQUIRE(match == Match(18));
match = doc.FindString(docLength - 1, 0, findingEOL, reCxx11);
REQUIRE(match == Match(8));
constexpr std::string_view findingBOW = "\\<";
match = doc.FindString(0, docLength, findingBOW, rePosix);
REQUIRE(match == Match(0));
match = doc.FindString(1, docLength, findingBOW, rePosix);
REQUIRE(match == Match(3));
match = doc.FindString(docLength, 0, findingBOW, rePosix);
REQUIRE(match == Match(16));
match = doc.FindString(docLength - 1, 0, findingBOW, rePosix);
REQUIRE(match == Match(16));
constexpr std::string_view findingEOW = "\\>";
match = doc.FindString(0, docLength, findingEOW, rePosix);
REQUIRE(match == Match(2));
match = doc.FindString(1, docLength, findingEOW, rePosix);
REQUIRE(match == Match(2));
match = doc.FindString(docLength, 0, findingEOW, rePosix);
REQUIRE(match == Match(18));
match = doc.FindString(docLength - 1, 0, findingEOW, rePosix);
REQUIRE(match == Match(15));
constexpr std::string_view findingEOWEOL = "\\>$";
match = doc.FindString(0, docLength, findingEOWEOL, rePosix);
REQUIRE(match == Match(8));
match = doc.FindString(10, docLength, findingEOWEOL, rePosix);
REQUIRE(match == Match(18));
#ifndef NO_CXX11_REGEX
constexpr std::string_view findingWB = "\\b";
match = doc.FindString(0, docLength, findingWB, reCxx11);
REQUIRE(match == Match(0));
match = doc.FindString(1, docLength, findingWB, reCxx11);
REQUIRE(match == Match(2));
match = doc.FindString(docLength, 0, findingWB, reCxx11);
REQUIRE(match == Match(16));
REQUIRE(match == Match(18));
match = doc.FindString(docLength - 1, 0, findingWB, reCxx11);
REQUIRE(match == Match(16));
constexpr std::string_view findingNWB = "\\B";
match = doc.FindString(0, docLength, findingNWB, reCxx11);
REQUIRE(match == Match(1));
match = doc.FindString(1, docLength, findingNWB, reCxx11);
REQUIRE(match == Match(1));
match = doc.FindString(docLength, 0, findingNWB, reCxx11);
REQUIRE(match == Match(18));
match = doc.FindString(docLength - 1, 0, findingNWB, reCxx11);
REQUIRE(match == Match(14));
match = doc.FindString(docLength, 0, findingNWB, reCxx11);
REQUIRE(match == Match(17));
match = doc.FindString(docLength - 1, 0, findingNWB, reCxx11);
REQUIRE(match == Match(17));
SECTION("RegexContextualAssertion") {
// For std::regex, check the use of assertions next to text in forward direction
// These are more common than empty assertions
DocPlus doc("ab cd ef\r\ngh ij kl", CpUtf8);
const Sci::Position docLength = doc.document.Length();
Match match;
#ifndef NO_CXX11_REGEX
match = doc.FindString(0, docLength, "^[a-z]", reCxx11);
REQUIRE(match == Match(0, 1));
match = doc.FindString(1, docLength, "^[a-z]", reCxx11);
REQUIRE(match == Match(10, 1));
match = doc.FindString(0, docLength, "[a-z]$", reCxx11);
REQUIRE(match == Match(7, 1));
match = doc.FindString(10, docLength, "[a-z]$", reCxx11);
REQUIRE(match == Match(17, 1));
match = doc.FindString(0, docLength, "\\b[a-z]", reCxx11);
REQUIRE(match == Match(0, 1));
match = doc.FindString(1, docLength, "\\b[a-z]", reCxx11);
REQUIRE(match == Match(3, 1));
match = doc.FindString(0, docLength, "[a-z]\\b", reCxx11);
REQUIRE(match == Match(1, 1));
match = doc.FindString(2, docLength, "[a-z]\\b", reCxx11);
REQUIRE(match == Match(4, 1));
match = doc.FindString(0, docLength, "\\B[a-z]", reCxx11);
REQUIRE(match == Match(1, 1));
match = doc.FindString(1, docLength, "\\B[a-z]", reCxx11);
REQUIRE(match == Match(1, 1));
match = doc.FindString(0, docLength, "[a-z]\\B", reCxx11);
REQUIRE(match == Match(0, 1));
match = doc.FindString(2, docLength, "[a-z]\\B", reCxx11);
REQUIRE(match == Match(3, 1));
SECTION("RESearchMovePositionOutsideCharUTF8") {
DocPlus doc(" a\xCE\x93\xCE\x93z ", CpUtf8);// a gamma gamma z
const Sci::Position docLength = doc.document.Length();
constexpr std::string_view finding = R"([a-z](\w)\1)";
Match match = doc.FindString(0, docLength, finding, rePosix);
REQUIRE(match == Match(1, 5));
constexpr std::string_view substituteText = R"(\t\1\n)";
std::string substituted = doc.Substitute(substituteText);
REQUIRE(substituted == "\t\xCE\x93\n");
#ifndef NO_CXX11_REGEX
match = doc.FindString(0, docLength, finding, reCxx11);
REQUIRE(match == Match(1, 5));
substituted = doc.Substitute(substituteText);
REQUIRE(substituted == "\t\xCE\x93\n");
SECTION("RESearchMovePositionOutsideCharDBCS") {
DocPlus doc(" \x98\x61xx 1aa\x83\xA1\x83\xA1z ", 932);// U+548C xx 1aa gamma gamma z
const Sci::Position docLength = doc.document.Length();
Match match = doc.FindString(0, docLength, R"([a-z](\w)\1)", rePosix);
REQUIRE(match == Match(8, 5));
constexpr std::string_view substituteText = R"(\t\1\n)";
std::string substituted = doc.Substitute(substituteText);
REQUIRE(substituted == "\t\x83\xA1\n");
match = doc.FindString(0, docLength, R"(\w([a-z])\1)", rePosix);
REQUIRE(match == Match(6, 3));
substituted = doc.Substitute(substituteText);
REQUIRE(substituted == "\ta\n");
TEST_CASE("DocumentUndo") {
// These tests check that Undo reports the end of coalesced deletes
constexpr std::string_view sText = "Scintilla";
DocPlus doc(sText, 0);
SECTION("CheckDeleteForwards") {
// Delete forwards like the Del key
doc.document.DeleteChars(1, 1);
doc.document.DeleteChars(1, 1);
doc.document.DeleteChars(1, 1);
const Sci::Position position = doc.document.Undo();
REQUIRE(position == 4); // End of reinsertion
REQUIRE(!doc.document.CanUndo()); // Exhausted undo stack
SECTION("CheckDeleteBackwards") {
// Delete backwards like the backspace key
doc.document.DeleteChars(5, 1);
doc.document.DeleteChars(4, 1);
doc.document.DeleteChars(3, 1);
const Sci::Position position = doc.document.Undo();
REQUIRE(position == 6); // End of reinsertion
REQUIRE(!doc.document.CanUndo()); // Exhausted undo stack
SECTION("CheckBothWays") {
// Delete backwards like the backspace key
// Like having the caret at position 5 then
doc.document.DeleteChars(5, 1); // Del
doc.document.DeleteChars(4, 1); // Backspace
doc.document.DeleteChars(4, 1); // Del
doc.document.DeleteChars(3, 1); // Backspace
const Sci::Position position = doc.document.Undo();
REQUIRE(position == 7); // End of reinsertion, Start at 5, 2*Del
REQUIRE(!doc.document.CanUndo()); // Exhausted undo stack
SECTION("CheckInsert") {
// Insertions are only coalesced when following previous
doc.document.InsertString(1, "1");
doc.document.InsertString(2, "2");
doc.document.InsertString(3, "3");
REQUIRE(doc.Contents() == "S123cintilla");
const Sci::Position position = doc.document.Undo();
REQUIRE(position == 1); // Start of insertions
REQUIRE(!doc.document.CanUndo()); // Exhausted undo stack
SECTION("CheckGrouped") {
// Check that position returned for group is that at end of first deletion set
// Also include a container undo action.
// At 1, 2*Del so end of initial deletion sequence is 3
doc.document.DeleteChars(1, 1); // 'c'
doc.document.DeleteChars(1, 1); // 'i'
doc.document.AddUndoAction(99, true);
doc.document.InsertString(1, "1");
doc.document.DeleteChars(4, 2); // 'il'
REQUIRE(doc.Contents() == "S1ntla");
const Sci::Position position = doc.document.Undo();
REQUIRE(position == 3); // Start of insertions
REQUIRE(!doc.document.CanUndo()); // Exhausted undo stack
TEST_CASE("Words") {
SECTION("WordsInText") {
const DocPlus doc(" abc ", 0);
REQUIRE(doc.document.IsWordAt(1, 4));
REQUIRE(!doc.document.IsWordAt(0, 1));
REQUIRE(!doc.document.IsWordAt(1, 2));
const DocPlus docPunct(" [!] ", 0);
REQUIRE(docPunct.document.IsWordAt(1, 4));
REQUIRE(!docPunct.document.IsWordAt(0, 1));
REQUIRE(!docPunct.document.IsWordAt(1, 2));
const DocPlus docMixed(" -ab ", 0); // '-' is punctuation, 'ab' is word
REQUIRE(docMixed.document.IsWordAt(2, 4));
REQUIRE(docMixed.document.IsWordAt(1, 4));
REQUIRE(docMixed.document.IsWordAt(1, 2));
REQUIRE(!docMixed.document.IsWordAt(1, 3)); // 3 is between a and b so not word edge
// Scintilla's word definition just examines the ends
const DocPlus docOverSpace(" a b ", 0);
REQUIRE(docOverSpace.document.IsWordAt(1, 4));
SECTION("WordsAtEnds") {
const DocPlus doc("a c", 0);
REQUIRE(doc.document.IsWordAt(0, 1));
REQUIRE(doc.document.IsWordAt(2, 3));
const DocPlus docEndSpace(" a c ", 0);
REQUIRE(!docEndSpace.document.IsWordAt(0, 2));
REQUIRE(!docEndSpace.document.IsWordAt(3, 5));
TEST_CASE("SafeSegment") {
SECTION("Short") {
const DocPlus doc("", 0);
// all encoding: break before or after last space
constexpr std::string_view text = "12 ";
const size_t length = doc.document.SafeSegment(text);
REQUIRE(length <= text.length());
REQUIRE(text[length - 1] == '2');
REQUIRE(text[length] == ' ');
const DocPlus doc("", 0);
// all encoding: break before or after last space
std::string_view text = "12 3 \t45";
size_t length = doc.document.SafeSegment(text);
REQUIRE(text[length - 1] == ' ');
REQUIRE(text[length] == '\t');
// UTF-8 and ASCII: word and punctuation boundary in middle of text
text = "(IsBreakSpace(text[j]))";
length = doc.document.SafeSegment(text);
REQUIRE(text[length - 1] == 'j');
REQUIRE(text[length] == ']');
// UTF-8 and ASCII: word and punctuation boundary near start of text
text = "(IsBreakSpace";
length = doc.document.SafeSegment(text);
REQUIRE(text[length - 1] == '(');
REQUIRE(text[length] == 'I');
// UTF-8 and ASCII: word and punctuation boundary near end of text
text = "IsBreakSpace)";
length = doc.document.SafeSegment(text);
REQUIRE(text[length - 1] == 'e');
REQUIRE(text[length] == ')');
// break before last character
text = "JapaneseJa";
length = doc.document.SafeSegment(text);
REQUIRE(text[length - 1] == 'J');
REQUIRE(text[length] == 'a');
const DocPlus doc("", CpUtf8);
// break before last character: no trail byte
std::string_view text = "JapaneseJa";
size_t length = doc.document.SafeSegment(text);
REQUIRE(text[length - 1] == 'J');
REQUIRE(text[length] == 'a');
// break before last character: 1 trail byte
text = "Japanese\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xc2\xa9";
length = doc.document.SafeSegment(text);
REQUIRE(text[length - 1] == '\x9e');
REQUIRE(text[length] == '\xc2');
// break before last character: 2 trail bytes
text = "Japanese\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e";
length = doc.document.SafeSegment(text);
REQUIRE(text[length - 1] == '\xac');
REQUIRE(text[length] == '\xe8');
// break before last character: 3 trail bytes
text = "Japanese\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xf0\x9f\x98\x8a";
length = doc.document.SafeSegment(text);
REQUIRE(text[length - 1] == '\x9e');
REQUIRE(text[length] == '\xf0');
const DocPlus doc("", 932);
// word and punctuation boundary in middle of text: single byte
std::string_view text = "(IsBreakSpace(text[j]))";
size_t length = doc.document.SafeSegment(text);
REQUIRE(text[length - 1] == 'j');
REQUIRE(text[length] == ']');
// word and punctuation boundary in middle of text: double byte
text = "(IsBreakSpace(text[\x8c\xea]))";
length = doc.document.SafeSegment(text);
REQUIRE(text[length - 1] == '\xea');
REQUIRE(text[length] == ']');
// word and punctuation boundary near start of text
text = "(IsBreakSpace";
length = doc.document.SafeSegment(text);
REQUIRE(text[length - 1] == '(');
REQUIRE(text[length] == 'I');
// word and punctuation boundary near end of text: single byte
text = "IsBreakSpace)";
length = doc.document.SafeSegment(text);
REQUIRE(text[length - 1] == 'e');
REQUIRE(text[length] == ')');
// word and punctuation boundary near end of text: double byte
text = "IsBreakSpace\x8c\xea)";
length = doc.document.SafeSegment(text);
REQUIRE(text[length - 1] == '\xea');
REQUIRE(text[length] == ')');
// break before last character: single byte
text = "JapaneseJa";
length = doc.document.SafeSegment(text);
REQUIRE(text[length - 1] == 'J');
REQUIRE(text[length] == 'a');
// break before last character: double byte
text = "Japanese\x93\xfa\x96\x7b\x8c\xea";
length = doc.document.SafeSegment(text);
REQUIRE(text[length - 1] == '\x7b');
REQUIRE(text[length] == '\x8c');
TEST_CASE("PerLine") {
SECTION("LineMarkers") {
DocPlus doc("1\n2\n", CpUtf8);
REQUIRE(doc.document.LinesTotal() == 3);
const int mh1 = doc.document.AddMark(0, 0);
const int mh2 = doc.document.AddMark(1, 1);
const int mh3 = doc.document.AddMark(2, 2);
REQUIRE(mh1 != -1);
REQUIRE(mh2 != -1);
REQUIRE(mh3 != -1);
REQUIRE(doc.document.AddMark(3, 3) == -1);
// delete first character, no change
REQUIRE(doc.document.CharAt(0) == '1');
doc.document.DeleteChars(0, 1);
REQUIRE(doc.document.LinesTotal() == 3);
REQUIRE(doc.document.MarkerHandleFromLine(0, 0) == mh1);
REQUIRE(doc.document.MarkerHandleFromLine(0, 1) == -1);
REQUIRE(doc.document.MarkerHandleFromLine(1, 0) == mh2);
REQUIRE(doc.document.MarkerHandleFromLine(1, 1) == -1);
// delete first line, so merged
REQUIRE(doc.document.CharAt(0) == '\n');
doc.document.DeleteChars(0, 1);
REQUIRE(doc.document.CharAt(0) == '2');
const std::set handleSet {mh1, mh2};
const int handle1 = doc.document.MarkerHandleFromLine(0, 0);
const int handle2 = doc.document.MarkerHandleFromLine(0, 1);
REQUIRE(handle1 != handle2);
REQUIRE(handleSet.count(handle1) == 1);
REQUIRE(handleSet.count(handle2) == 1);
REQUIRE(doc.document.MarkerHandleFromLine(0, 2) == -1);
REQUIRE(doc.document.MarkerHandleFromLine(1, 0) == mh3);
REQUIRE(doc.document.MarkerHandleFromLine(1, 1) == -1);
SECTION("LineAnnotation") {
DocPlus doc("1\n2\n", CpUtf8);
REQUIRE(doc.document.LinesTotal() == 3);
Sci::Position length = doc.document.Length();
doc.document.AnnotationSetText(0, "1");
doc.document.AnnotationSetText(1, "1\n2");
doc.document.AnnotationSetText(2, "1\n2\n3");
REQUIRE(doc.document.AnnotationLines(0) == 1);
REQUIRE(doc.document.AnnotationLines(1) == 2);
REQUIRE(doc.document.AnnotationLines(2) == 3);
REQUIRE(doc.document.AnnotationLines(3) == 0);
// delete last line
length -= 1;
doc.document.DeleteChars(length, 1);
// Deleting the last line moves its 3-line annotation to previous line,
// deleting the 2-line annotation of the previous line.
REQUIRE(doc.document.LinesTotal() == 2);
REQUIRE(doc.document.AnnotationLines(0) == 1);
REQUIRE(doc.document.AnnotationLines(1) == 3);
REQUIRE(doc.document.AnnotationLines(2) == 0);
// delete last character, no change
length -= 1;
REQUIRE(doc.document.CharAt(length) == '2');
doc.document.DeleteChars(length, 1);
REQUIRE(doc.document.LinesTotal() == 2);
REQUIRE(doc.document.AnnotationLines(0) == 1);
REQUIRE(doc.document.AnnotationLines(1) == 3);
REQUIRE(doc.document.AnnotationLines(2) == 0);