401 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			401 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
// Scintilla Lexer for X12
 | 
						|
// @file LexX12.cxx
 | 
						|
// Written by Iain Clarke, IMCSoft & Inobiz AB.
 | 
						|
// X12 official documentation is behind a paywall, but there's a description of the syntax here:
 | 
						|
// http://www.rawlinsecconsulting.com/x12tutorial/x12syn.html
 | 
						|
// This code is subject to the same license terms as the rest of the scintilla project:
 | 
						|
// The License.txt file describes the conditions under which this software may be distributed.
 | 
						|
//
 | 
						|
 | 
						|
// Header order must match order in scripts/HeaderOrder.txt
 | 
						|
#include <cstdlib>
 | 
						|
#include <cassert>
 | 
						|
#include <cstring>
 | 
						|
#include <cctype>
 | 
						|
 | 
						|
#include <string>
 | 
						|
#include <string_view>
 | 
						|
 | 
						|
#include <vector>
 | 
						|
#include <algorithm>
 | 
						|
 | 
						|
#include "ILexer.h"
 | 
						|
#include "Scintilla.h"
 | 
						|
#include "SciLexer.h"
 | 
						|
#include "LexerModule.h"
 | 
						|
#include "DefaultLexer.h"
 | 
						|
 | 
						|
using namespace Scintilla;
 | 
						|
using namespace Lexilla;
 | 
						|
 | 
						|
class LexerX12 : public DefaultLexer
 | 
						|
{
 | 
						|
public:
 | 
						|
	LexerX12();
 | 
						|
	virtual ~LexerX12() {} // virtual destructor, as we inherit from ILexer
 | 
						|
 | 
						|
	static ILexer5 *Factory() {
 | 
						|
		return new LexerX12;
 | 
						|
	}
 | 
						|
 | 
						|
	int SCI_METHOD Version() const override
 | 
						|
	{
 | 
						|
		return lvRelease5;
 | 
						|
	}
 | 
						|
	void SCI_METHOD Release() override
 | 
						|
	{
 | 
						|
		delete this;
 | 
						|
	}
 | 
						|
 | 
						|
	const char * SCI_METHOD PropertyNames() override
 | 
						|
	{
 | 
						|
		return "fold";
 | 
						|
	}
 | 
						|
	int SCI_METHOD PropertyType(const char *) override
 | 
						|
	{
 | 
						|
		return SC_TYPE_BOOLEAN; // Only one property!
 | 
						|
	}
 | 
						|
	const char * SCI_METHOD DescribeProperty(const char *name) override
 | 
						|
	{
 | 
						|
		if (!strcmp(name, "fold"))
 | 
						|
			return "Whether to apply folding to document or not";
 | 
						|
		return "";
 | 
						|
	}
 | 
						|
 | 
						|
	Sci_Position SCI_METHOD PropertySet(const char *key, const char *val) override
 | 
						|
	{
 | 
						|
		if (!strcmp(key, "fold"))
 | 
						|
		{
 | 
						|
			m_bFold = strcmp(val, "0") ? true : false;
 | 
						|
			return 0;
 | 
						|
		}
 | 
						|
		return -1;
 | 
						|
	}
 | 
						|
	const char * SCI_METHOD PropertyGet(const char *) override {
 | 
						|
		return "";
 | 
						|
	}
 | 
						|
	const char * SCI_METHOD DescribeWordListSets() override
 | 
						|
	{
 | 
						|
		return "";
 | 
						|
	}
 | 
						|
	Sci_Position SCI_METHOD WordListSet(int, const char *) override
 | 
						|
	{
 | 
						|
		return -1;
 | 
						|
	}
 | 
						|
	void SCI_METHOD Lex(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
 | 
						|
	void SCI_METHOD Fold(Sci_PositionU startPos, Sci_Position length, int initStyle, IDocument *pAccess) override;
 | 
						|
	void * SCI_METHOD PrivateCall(int, void *) override
 | 
						|
	{
 | 
						|
		return NULL;
 | 
						|
	}
 | 
						|
 | 
						|
protected:
 | 
						|
	struct Terminator
 | 
						|
	{
 | 
						|
		int Style = SCE_X12_BAD;
 | 
						|
		Sci_PositionU pos = 0;
 | 
						|
		Sci_PositionU length = 0;
 | 
						|
		int FoldChange = 0;
 | 
						|
	};
 | 
						|
	Terminator InitialiseFromISA(IDocument *pAccess);
 | 
						|
	Sci_PositionU FindPreviousSegmentStart(IDocument *pAccess, Sci_Position startPos) const;
 | 
						|
	Terminator DetectSegmentHeader(IDocument *pAccess, Sci_PositionU pos) const;
 | 
						|
	Terminator FindNextTerminator(IDocument *pAccess, Sci_PositionU pos, bool bJustSegmentTerminator = false) const;
 | 
						|
 | 
						|
	bool m_bFold = false;
 | 
						|
	char m_SeparatorSubElement = 0;
 | 
						|
	char m_SeparatorElement = 0;
 | 
						|
	std::string m_SeparatorSegment; // might be multiple characters
 | 
						|
	std::string m_LineFeed;
 | 
						|
};
 | 
						|
 | 
						|
extern const LexerModule lmX12(SCLEX_X12, LexerX12::Factory, "x12");
 | 
						|
 | 
						|
///////////////////////////////////////////////////////////////////////////////
 | 
						|
 | 
						|
 | 
						|
 | 
						|
///////////////////////////////////////////////////////////////////////////////
 | 
						|
 | 
						|
LexerX12::LexerX12() : DefaultLexer("x12", SCLEX_X12)
 | 
						|
{
 | 
						|
}
 | 
						|
 | 
						|
void LexerX12::Lex(Sci_PositionU startPos, Sci_Position length, int, IDocument *pAccess)
 | 
						|
{
 | 
						|
	Sci_PositionU posFinish = startPos + length;
 | 
						|
 | 
						|
	Terminator T = InitialiseFromISA(pAccess);
 | 
						|
 | 
						|
	if (T.Style == SCE_X12_BAD)
 | 
						|
	{
 | 
						|
		if (T.pos < startPos)
 | 
						|
			T.pos = startPos; // we may be colouring in batches.
 | 
						|
		pAccess->StartStyling(startPos);
 | 
						|
		pAccess->SetStyleFor(T.pos - startPos, SCE_X12_ENVELOPE);
 | 
						|
		pAccess->SetStyleFor(posFinish - T.pos, SCE_X12_BAD);
 | 
						|
		return;
 | 
						|
	}
 | 
						|
 | 
						|
	// Look backwards for a segment start or a document beginning
 | 
						|
	Sci_PositionU posCurrent = FindPreviousSegmentStart (pAccess, startPos);
 | 
						|
 | 
						|
	// Style buffer, so we're not issuing loads of notifications
 | 
						|
	pAccess->StartStyling(posCurrent);
 | 
						|
 | 
						|
	while (posCurrent < posFinish)
 | 
						|
	{
 | 
						|
		// Look for first element marker, so we can denote segment
 | 
						|
		T = DetectSegmentHeader(pAccess, posCurrent);
 | 
						|
		if (T.Style == SCE_X12_BAD)
 | 
						|
			break;
 | 
						|
 | 
						|
		pAccess->SetStyleFor(T.pos - posCurrent, T.Style);
 | 
						|
		pAccess->SetStyleFor(T.length, SCE_X12_SEP_ELEMENT);
 | 
						|
		posCurrent = T.pos + T.length;
 | 
						|
 | 
						|
		while (T.Style != SCE_X12_BAD && T.Style != SCE_X12_SEGMENTEND) // Break on bad or segment ending
 | 
						|
		{
 | 
						|
			T = FindNextTerminator(pAccess, posCurrent, false);
 | 
						|
			if (T.Style == SCE_X12_BAD)
 | 
						|
				break;
 | 
						|
 | 
						|
			int Style = T.Style;
 | 
						|
 | 
						|
			pAccess->SetStyleFor(T.pos - posCurrent, SCE_X12_DEFAULT);
 | 
						|
			pAccess->SetStyleFor(T.length, Style);
 | 
						|
			posCurrent = T.pos + T.length;
 | 
						|
		}
 | 
						|
		if (T.Style == SCE_X12_BAD)
 | 
						|
			break;
 | 
						|
	}
 | 
						|
 | 
						|
	pAccess->SetStyleFor(posFinish - posCurrent, SCE_X12_BAD);
 | 
						|
}
 | 
						|
 | 
						|
void LexerX12::Fold(Sci_PositionU startPos, Sci_Position length, int, IDocument *pAccess)
 | 
						|
{
 | 
						|
	if (!m_bFold)
 | 
						|
		return;
 | 
						|
 | 
						|
	// Are we even foldable?
 | 
						|
	// check for cr,lf,cr+lf.
 | 
						|
	if (m_LineFeed.empty())
 | 
						|
		return;
 | 
						|
 | 
						|
	Sci_PositionU posFinish = startPos + length;
 | 
						|
 | 
						|
	// Look backwards for a segment start or a document beginning
 | 
						|
	startPos = FindPreviousSegmentStart(pAccess, startPos);
 | 
						|
	Terminator T;
 | 
						|
 | 
						|
	Sci_PositionU currLine = pAccess->LineFromPosition(startPos);
 | 
						|
	int levelCurrentStyle = SC_FOLDLEVELBASE;
 | 
						|
	int indentCurrent = 0;
 | 
						|
	if (currLine > 0)
 | 
						|
	{
 | 
						|
		levelCurrentStyle = pAccess->GetLevel(currLine - 1); // bottom 12 bits are level
 | 
						|
		indentCurrent = levelCurrentStyle & (SC_FOLDLEVELBASE - 1); // indent from previous line
 | 
						|
		Sci_PositionU posLine = pAccess->LineStart(currLine - 1);
 | 
						|
		T = DetectSegmentHeader(pAccess, posLine);
 | 
						|
		indentCurrent += T.FoldChange;
 | 
						|
	}
 | 
						|
 | 
						|
	while (startPos < posFinish)
 | 
						|
	{
 | 
						|
		T = DetectSegmentHeader(pAccess, startPos);
 | 
						|
		int indentNext = indentCurrent + T.FoldChange;
 | 
						|
		if (indentNext < 0)
 | 
						|
			indentNext = 0;
 | 
						|
 | 
						|
		levelCurrentStyle = (T.FoldChange > 0) ? (SC_FOLDLEVELBASE | SC_FOLDLEVELHEADERFLAG) : SC_FOLDLEVELBASE;
 | 
						|
 | 
						|
		currLine = pAccess->LineFromPosition(startPos);
 | 
						|
		pAccess->SetLevel(currLine, levelCurrentStyle | indentCurrent);
 | 
						|
 | 
						|
		T = FindNextTerminator(pAccess, startPos, true);
 | 
						|
 | 
						|
		if (T.Style == SCE_X12_BAD)
 | 
						|
			break;
 | 
						|
 | 
						|
		startPos = T.pos + T.length;
 | 
						|
		indentCurrent = indentNext;
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
LexerX12::Terminator LexerX12::InitialiseFromISA(IDocument *pAccess)
 | 
						|
{
 | 
						|
	Sci_Position length = pAccess->Length();
 | 
						|
	if (length <= 108)
 | 
						|
		return { SCE_X12_BAD, 0 };
 | 
						|
 | 
						|
	pAccess->GetCharRange(&m_SeparatorElement, 3, 1);
 | 
						|
	pAccess->GetCharRange(&m_SeparatorSubElement, 104, 1);
 | 
						|
 | 
						|
	// Look for GS, as that's the next segment. Anything between 105 and GS/IEA is our segment separator.
 | 
						|
	Sci_Position posNextSegment;
 | 
						|
	char bufSegment[3] = { 0 };
 | 
						|
	for (posNextSegment = 105; posNextSegment < length - 3; posNextSegment++)
 | 
						|
	{
 | 
						|
		pAccess->GetCharRange(bufSegment, posNextSegment, 3);
 | 
						|
		if (!memcmp (bufSegment, "GS", 2) || !memcmp(bufSegment, "IEA", 3))
 | 
						|
		{
 | 
						|
			m_SeparatorSegment.resize(posNextSegment - 105);
 | 
						|
			pAccess->GetCharRange(&m_SeparatorSegment.at(0), 105, posNextSegment - 105);
 | 
						|
 | 
						|
			// Is some of that CR+LF?
 | 
						|
			size_t nPos = m_SeparatorSegment.find_last_not_of("\r\n");
 | 
						|
			m_LineFeed = m_SeparatorSegment.substr(nPos + 1);
 | 
						|
			m_SeparatorSegment = m_SeparatorSegment.substr(0, nPos + 1);
 | 
						|
			break;
 | 
						|
		}
 | 
						|
	}
 | 
						|
	if (m_SeparatorSegment.empty() && m_LineFeed.empty())
 | 
						|
	{
 | 
						|
		return { SCE_X12_BAD, 105 };
 | 
						|
	}
 | 
						|
 | 
						|
	// Validate we have an element separator, and it's not silly!
 | 
						|
	if (m_SeparatorElement == '\0' || m_SeparatorElement == '\n' || m_SeparatorElement == '\r')
 | 
						|
		return { SCE_X12_BAD, 3 };
 | 
						|
 | 
						|
	// Validate we have an element separator, and it's not silly!
 | 
						|
	if (m_SeparatorSubElement == '\0' || m_SeparatorSubElement == '\n' || m_SeparatorSubElement == '\r')
 | 
						|
		return { SCE_X12_BAD, 103 };
 | 
						|
	if (m_SeparatorElement == m_SeparatorSubElement)
 | 
						|
		return { SCE_X12_BAD, 104 };
 | 
						|
	for (auto& c : m_SeparatorSegment)
 | 
						|
	{
 | 
						|
		if (m_SeparatorElement == c)
 | 
						|
			return { SCE_X12_BAD, 105 };
 | 
						|
		if (m_SeparatorSubElement == c)
 | 
						|
			return { SCE_X12_BAD, 105 };
 | 
						|
	}
 | 
						|
 | 
						|
	// Check we have element markers at all the right places! ISA element has fixed entries.
 | 
						|
	std::vector<Sci_PositionU> ElementMarkers = { 3, 6, 17, 20, 31, 34, 50, 53, 69, 76, 81, 83, 89, 99, 101, 103 };
 | 
						|
	for (auto i : ElementMarkers)
 | 
						|
	{
 | 
						|
		char c;
 | 
						|
		pAccess->GetCharRange(&c, i, 1);
 | 
						|
		if (c != m_SeparatorElement)
 | 
						|
			return { SCE_X12_BAD, i };
 | 
						|
	}
 | 
						|
	// Check we have no element markers anywhere else!
 | 
						|
	for (Sci_PositionU i = 0; i < 105; i++)
 | 
						|
	{
 | 
						|
		if (std::find(ElementMarkers.begin(), ElementMarkers.end(), i) != ElementMarkers.end())
 | 
						|
			continue;
 | 
						|
 | 
						|
		char c;
 | 
						|
		pAccess->GetCharRange(&c, i, 1);
 | 
						|
		if (c == m_SeparatorElement)
 | 
						|
			return { SCE_X12_BAD, i };
 | 
						|
	}
 | 
						|
 | 
						|
	return { SCE_X12_ENVELOPE };
 | 
						|
}
 | 
						|
 | 
						|
Sci_PositionU LexerX12::FindPreviousSegmentStart(IDocument *pAccess, Sci_Position startPos) const
 | 
						|
{
 | 
						|
	Sci_PositionU length = pAccess->Length();
 | 
						|
	std::string bufTest = m_SeparatorSegment + m_LineFeed; // quick way of making the lengths the same
 | 
						|
	std::string bufCompare = bufTest;
 | 
						|
 | 
						|
	for (; startPos > 0; startPos--)
 | 
						|
	{
 | 
						|
		if (startPos + bufTest.size() > length)
 | 
						|
			continue;
 | 
						|
 | 
						|
		pAccess->GetCharRange(&bufTest.at(0), startPos, bufTest.size());
 | 
						|
		if (bufTest == bufCompare)
 | 
						|
		{
 | 
						|
			return startPos + bufTest.size();
 | 
						|
		}
 | 
						|
	}
 | 
						|
	// We didn't find a ', so just go with the beginning
 | 
						|
	return 0;
 | 
						|
}
 | 
						|
 | 
						|
LexerX12::Terminator LexerX12::DetectSegmentHeader(IDocument *pAccess, Sci_PositionU pos) const
 | 
						|
{
 | 
						|
	Sci_PositionU Length = pAccess->Length();
 | 
						|
	Length -= pos;
 | 
						|
	char c, Buf[4] = { 0 }; // max 3 + separator
 | 
						|
	for (Sci_PositionU posOffset = 0; posOffset < std::size(Buf) && posOffset < Length; posOffset++)
 | 
						|
	{
 | 
						|
		pAccess->GetCharRange(&c, pos + posOffset, 1);
 | 
						|
		if (c != m_SeparatorElement)
 | 
						|
		{
 | 
						|
			Buf[posOffset] = c;
 | 
						|
			continue;
 | 
						|
		}
 | 
						|
 | 
						|
		// check for special segments, involved in folding start/stop.
 | 
						|
		if (memcmp(Buf, "ISA", 3) == 0)
 | 
						|
			return { SCE_X12_ENVELOPE, pos + posOffset, 1, +1 };
 | 
						|
		if (memcmp(Buf, "IEA", 3) == 0)
 | 
						|
			return { SCE_X12_ENVELOPE, pos + posOffset, 1, -1 };
 | 
						|
		if (memcmp(Buf, "GS", 2) == 0)
 | 
						|
			return { SCE_X12_FUNCTIONGROUP, pos + posOffset, 1, +1 };
 | 
						|
		if (memcmp(Buf, "GE", 2) == 0)
 | 
						|
			return { SCE_X12_FUNCTIONGROUP, pos + posOffset, 1, -1 };
 | 
						|
		if (memcmp(Buf, "ST", 2) == 0)
 | 
						|
			return { SCE_X12_TRANSACTIONSET, pos + posOffset, 1, +1 };
 | 
						|
		if (memcmp(Buf, "SE", 2) == 0)
 | 
						|
			return { SCE_X12_TRANSACTIONSET, pos + posOffset, 1, -1 };
 | 
						|
		return { SCE_X12_SEGMENTHEADER, pos + posOffset, 1, 0 };
 | 
						|
	}
 | 
						|
	return { SCE_X12_BAD, pos, 0, 0 };
 | 
						|
}
 | 
						|
 | 
						|
LexerX12::Terminator LexerX12::FindNextTerminator(IDocument *pAccess, Sci_PositionU pos, bool bJustSegmentTerminator) const
 | 
						|
{
 | 
						|
	char c;
 | 
						|
	Sci_PositionU length = pAccess->Length();
 | 
						|
	std::string bufTestSegment = m_SeparatorSegment; // quick way of making the lengths the same
 | 
						|
	std::string bufTestLineFeed = m_LineFeed; // quick way of making the lengths the same
 | 
						|
 | 
						|
 | 
						|
	while (pos < (Sci_PositionU)length)
 | 
						|
	{
 | 
						|
		pAccess->GetCharRange(&c, pos, 1);
 | 
						|
		if (pos + m_SeparatorSegment.size() > length)
 | 
						|
			bufTestSegment.clear(); // going up - so once we can't get this, we're done with the buffer.
 | 
						|
		else if (!bufTestSegment.empty())
 | 
						|
			pAccess->GetCharRange(&bufTestSegment.at(0), pos, bufTestSegment.size());
 | 
						|
		if (pos + m_LineFeed.size() > length)
 | 
						|
			bufTestLineFeed.clear(); // going up - so once we can't get this, we're done with the buffer.
 | 
						|
		else if (!bufTestLineFeed.empty())
 | 
						|
			pAccess->GetCharRange(&bufTestLineFeed.at(0), pos, bufTestLineFeed.size());
 | 
						|
 | 
						|
		if (!bJustSegmentTerminator && c == m_SeparatorElement)
 | 
						|
			return { SCE_X12_SEP_ELEMENT, pos, 1 };
 | 
						|
		else if (!bJustSegmentTerminator && c == m_SeparatorSubElement)
 | 
						|
			return { SCE_X12_SEP_SUBELEMENT, pos, 1 };
 | 
						|
		else if (!m_SeparatorSegment.empty() && bufTestSegment == m_SeparatorSegment)
 | 
						|
		{
 | 
						|
			if (m_LineFeed.empty())
 | 
						|
				return { SCE_X12_SEGMENTEND, pos, m_SeparatorSegment.size() };
 | 
						|
			// is this the end?
 | 
						|
			if (pos + m_SeparatorSegment.size() == length)
 | 
						|
				return { SCE_X12_SEGMENTEND, pos, m_SeparatorSegment.size() };
 | 
						|
			// Check if we're followed by a linefeed.
 | 
						|
			if (pos + m_SeparatorSegment.size() + m_LineFeed.size() > length)
 | 
						|
				return { SCE_X12_BAD, pos };
 | 
						|
			bufTestSegment = m_LineFeed;
 | 
						|
			pAccess->GetCharRange(&bufTestSegment.at(0), pos + m_SeparatorSegment.size(), bufTestSegment.size());
 | 
						|
			if (bufTestSegment == m_LineFeed)
 | 
						|
				return { SCE_X12_SEGMENTEND, pos, m_SeparatorSegment.size() + m_LineFeed.size() };
 | 
						|
			break;
 | 
						|
		}
 | 
						|
		else if (m_SeparatorSegment.empty() && bufTestLineFeed == m_LineFeed)
 | 
						|
		{
 | 
						|
			return { SCE_X12_SEGMENTEND, pos, m_LineFeed.size() };
 | 
						|
		}
 | 
						|
		pos++;
 | 
						|
	}
 | 
						|
 | 
						|
	return { SCE_X12_BAD, pos };
 | 
						|
}
 |