Skip to content

Commit 2808213

Browse files
committed
moved some helper function into TokenList::Stream
1 parent 8d20143 commit 2808213

2 files changed

Lines changed: 126 additions & 125 deletions

File tree

simplecpp.cpp

Lines changed: 124 additions & 123 deletions
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,9 @@ class simplecpp::TokenList::Stream {
231231
public:
232232
Stream(std::istream &istr)
233233
: istr(istr)
234-
{}
234+
{
235+
bom = getAndSkipBOM();
236+
}
235237

236238
int get() {
237239
return istr.get();
@@ -246,8 +248,95 @@ class simplecpp::TokenList::Stream {
246248
return istr.good();
247249
}
248250

251+
unsigned char readChar()
252+
{
253+
unsigned char ch = static_cast<unsigned char>(get());
254+
255+
// For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the
256+
// character is non-ASCII character then replace it with 0xff
257+
if (bom == 0xfeff || bom == 0xfffe) {
258+
const unsigned char ch2 = static_cast<unsigned char>(get());
259+
const int ch16 = (bom == 0xfeff) ? (ch<<8 | ch2) : (ch2<<8 | ch);
260+
ch = static_cast<unsigned char>(((ch16 >= 0x80) ? 0xff : ch16));
261+
}
262+
263+
// Handling of newlines..
264+
if (ch == '\r') {
265+
ch = '\n';
266+
if (bom == 0 && static_cast<char>(peek()) == '\n')
267+
(void)get();
268+
else if (bom == 0xfeff || bom == 0xfffe) {
269+
int c1 = get();
270+
int c2 = get();
271+
int ch16 = (bom == 0xfeff) ? (c1<<8 | c2) : (c2<<8 | c1);
272+
if (ch16 != '\n') {
273+
unget();
274+
unget();
275+
}
276+
}
277+
}
278+
279+
return ch;
280+
}
281+
282+
unsigned char peekChar()
283+
{
284+
unsigned char ch = static_cast<unsigned char>(peek());
285+
286+
// For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the
287+
// character is non-ASCII character then replace it with 0xff
288+
if (bom == 0xfeff || bom == 0xfffe) {
289+
(void)get();
290+
const unsigned char ch2 = static_cast<unsigned char>(peek());
291+
unget();
292+
const int ch16 = (bom == 0xfeff) ? (ch<<8 | ch2) : (ch2<<8 | ch);
293+
ch = static_cast<unsigned char>(((ch16 >= 0x80) ? 0xff : ch16));
294+
}
295+
296+
// Handling of newlines..
297+
if (ch == '\r')
298+
ch = '\n';
299+
300+
return ch;
301+
}
302+
303+
void ungetChar()
304+
{
305+
unget();
306+
if (bom == 0xfeff || bom == 0xfffe)
307+
unget();
308+
}
309+
249310
private:
311+
unsigned short getAndSkipBOM()
312+
{
313+
const int ch1 = peek();
314+
315+
// The UTF-16 BOM is 0xfffe or 0xfeff.
316+
if (ch1 >= 0xfe) {
317+
unsigned short bom = (static_cast<unsigned char>(get()) << 8);
318+
if (peek() >= 0xfe)
319+
return bom | static_cast<unsigned char>(get());
320+
unget();
321+
return 0;
322+
}
323+
324+
// Skip UTF-8 BOM 0xefbbbf
325+
if (ch1 == 0xef) {
326+
(void)get();
327+
if (get() == 0xbb && peek() == 0xbf) {
328+
(void)get();
329+
} else {
330+
unget();
331+
unget();
332+
}
333+
}
334+
335+
return 0;
336+
}
337+
250338
std::istream &istr;
339+
unsigned short bom;
251340
};
252341

253342
simplecpp::TokenList::TokenList(std::vector<std::string> &filenames) : frontToken(nullptr), backToken(nullptr), files(filenames) {}
@@ -356,92 +445,6 @@ std::string simplecpp::TokenList::stringify() const
356445
return ret.str();
357446
}
358447

359-
static unsigned char readChar(simplecpp::TokenList::Stream &istr, unsigned int bom)
360-
{
361-
unsigned char ch = static_cast<unsigned char>(istr.get());
362-
363-
// For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the
364-
// character is non-ASCII character then replace it with 0xff
365-
if (bom == 0xfeff || bom == 0xfffe) {
366-
const unsigned char ch2 = static_cast<unsigned char>(istr.get());
367-
const int ch16 = (bom == 0xfeff) ? (ch<<8 | ch2) : (ch2<<8 | ch);
368-
ch = static_cast<unsigned char>(((ch16 >= 0x80) ? 0xff : ch16));
369-
}
370-
371-
// Handling of newlines..
372-
if (ch == '\r') {
373-
ch = '\n';
374-
if (bom == 0 && static_cast<char>(istr.peek()) == '\n')
375-
(void)istr.get();
376-
else if (bom == 0xfeff || bom == 0xfffe) {
377-
int c1 = istr.get();
378-
int c2 = istr.get();
379-
int ch16 = (bom == 0xfeff) ? (c1<<8 | c2) : (c2<<8 | c1);
380-
if (ch16 != '\n') {
381-
istr.unget();
382-
istr.unget();
383-
}
384-
}
385-
}
386-
387-
return ch;
388-
}
389-
390-
static unsigned char peekChar(simplecpp::TokenList::Stream &istr, unsigned int bom)
391-
{
392-
unsigned char ch = static_cast<unsigned char>(istr.peek());
393-
394-
// For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the
395-
// character is non-ASCII character then replace it with 0xff
396-
if (bom == 0xfeff || bom == 0xfffe) {
397-
(void)istr.get();
398-
const unsigned char ch2 = static_cast<unsigned char>(istr.peek());
399-
istr.unget();
400-
const int ch16 = (bom == 0xfeff) ? (ch<<8 | ch2) : (ch2<<8 | ch);
401-
ch = static_cast<unsigned char>(((ch16 >= 0x80) ? 0xff : ch16));
402-
}
403-
404-
// Handling of newlines..
405-
if (ch == '\r')
406-
ch = '\n';
407-
408-
return ch;
409-
}
410-
411-
static void ungetChar(simplecpp::TokenList::Stream &istr, unsigned int bom)
412-
{
413-
istr.unget();
414-
if (bom == 0xfeff || bom == 0xfffe)
415-
istr.unget();
416-
}
417-
418-
static unsigned short getAndSkipBOM(simplecpp::TokenList::Stream &istr)
419-
{
420-
const int ch1 = istr.peek();
421-
422-
// The UTF-16 BOM is 0xfffe or 0xfeff.
423-
if (ch1 >= 0xfe) {
424-
unsigned short bom = (static_cast<unsigned char>(istr.get()) << 8);
425-
if (istr.peek() >= 0xfe)
426-
return bom | static_cast<unsigned char>(istr.get());
427-
istr.unget();
428-
return 0;
429-
}
430-
431-
// Skip UTF-8 BOM 0xefbbbf
432-
if (ch1 == 0xef) {
433-
(void)istr.get();
434-
if (istr.get() == 0xbb && istr.peek() == 0xbf) {
435-
(void)istr.get();
436-
} else {
437-
istr.unget();
438-
istr.unget();
439-
}
440-
}
441-
442-
return 0;
443-
}
444-
445448
static bool isNameChar(unsigned char ch)
446449
{
447450
return std::isalnum(ch) || ch == '_' || ch == '$';
@@ -497,23 +500,21 @@ void simplecpp::TokenList::lineDirective(unsigned int fileIndex, unsigned int li
497500

498501
static const std::string COMMENT_END("*/");
499502

500-
void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, OutputList *outputList)
503+
void simplecpp::TokenList::readfile(Stream &stream, const std::string &filename, OutputList *outputList)
501504
{
502505
std::stack<simplecpp::Location> loc;
503506

504507
unsigned int multiline = 0U;
505508

506509
const Token *oldLastToken = nullptr;
507510

508-
const unsigned short bom = getAndSkipBOM(istr);
509-
510511
Location location(files);
511512
location.fileIndex = fileIndex(filename);
512513
location.line = 1U;
513514
location.col = 1U;
514-
while (istr.good()) {
515-
unsigned char ch = readChar(istr,bom);
516-
if (!istr.good())
515+
while (stream.good()) {
516+
unsigned char ch = stream.readChar();
517+
if (!stream.good())
517518
break;
518519
if (ch < ' ' && ch != '\t' && ch != '\n' && ch != '\r')
519520
ch = ' ';
@@ -591,12 +592,12 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O
591592

592593
if (cback() && cback()->location.line == location.line && cback()->previous && cback()->previous->op == '#' && (lastLine() == "# error" || lastLine() == "# warning")) {
593594
char prev = ' ';
594-
while (istr.good() && (prev == '\\' || (ch != '\r' && ch != '\n'))) {
595+
while (stream.good() && (prev == '\\' || (ch != '\r' && ch != '\n'))) {
595596
currentToken += ch;
596597
prev = ch;
597-
ch = readChar(istr, bom);
598+
ch = stream.readChar();
598599
}
599-
ungetChar(istr, bom);
600+
stream.ungetChar();
600601
push_back(new Token(currentToken, location));
601602
location.adjust(currentToken);
602603
continue;
@@ -605,21 +606,21 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O
605606
// number or name
606607
if (isNameChar(ch)) {
607608
const bool num = std::isdigit(ch);
608-
while (istr.good() && isNameChar(ch)) {
609+
while (stream.good() && isNameChar(ch)) {
609610
currentToken += ch;
610-
ch = readChar(istr,bom);
611-
if (num && ch=='\'' && isNameChar(peekChar(istr,bom)))
612-
ch = readChar(istr,bom);
611+
ch = stream.readChar();
612+
if (num && ch=='\'' && isNameChar(stream.peekChar()))
613+
ch = stream.readChar();
613614
}
614615

615-
ungetChar(istr,bom);
616+
stream.ungetChar();
616617
}
617618

618619
// comment
619-
else if (ch == '/' && peekChar(istr,bom) == '/') {
620-
while (istr.good() && ch != '\r' && ch != '\n') {
620+
else if (ch == '/' && stream.peekChar() == '/') {
621+
while (stream.good() && ch != '\r' && ch != '\n') {
621622
currentToken += ch;
622-
ch = readChar(istr, bom);
623+
ch = stream.readChar();
623624
}
624625
const std::string::size_type pos = currentToken.find_last_not_of(" \t");
625626
if (pos < currentToken.size() - 1U && currentToken[pos] == '\\')
@@ -628,20 +629,20 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O
628629
++multiline;
629630
currentToken.erase(currentToken.size() - 1U);
630631
} else {
631-
ungetChar(istr, bom);
632+
stream.ungetChar();
632633
}
633634
}
634635

635636
// comment
636-
else if (ch == '/' && peekChar(istr,bom) == '*') {
637+
else if (ch == '/' && stream.peekChar() == '*') {
637638
currentToken = "/*";
638-
(void)readChar(istr,bom);
639-
ch = readChar(istr,bom);
640-
while (istr.good()) {
639+
(void)stream.readChar();
640+
ch = stream.readChar();
641+
while (stream.good()) {
641642
currentToken += ch;
642643
if (currentToken.size() >= 4U && endsWith(currentToken, COMMENT_END))
643644
break;
644-
ch = readChar(istr,bom);
645+
ch = stream.readChar();
645646
}
646647
// multiline..
647648

@@ -672,12 +673,12 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O
672673
std::string delim;
673674
currentToken = ch;
674675
prefix.resize(prefix.size() - 1);
675-
ch = readChar(istr,bom);
676-
while (istr.good() && ch != '(' && ch != '\n') {
676+
ch = stream.readChar();
677+
while (stream.good() && ch != '(' && ch != '\n') {
677678
delim += ch;
678-
ch = readChar(istr,bom);
679+
ch = stream.readChar();
679680
}
680-
if (!istr.good() || ch == '\n') {
681+
if (!stream.good() || ch == '\n') {
681682
if (outputList) {
682683
Output err(files);
683684
err.type = Output::SYNTAX_ERROR;
@@ -688,8 +689,8 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O
688689
return;
689690
}
690691
const std::string endOfRawString(')' + delim + currentToken);
691-
while (istr.good() && !(endsWith(currentToken, endOfRawString) && currentToken.size() > 1))
692-
currentToken += readChar(istr,bom);
692+
while (stream.good() && !(endsWith(currentToken, endOfRawString) && currentToken.size() > 1))
693+
currentToken += stream.readChar();
693694
if (!endsWith(currentToken, endOfRawString)) {
694695
if (outputList) {
695696
Output err(files);
@@ -713,7 +714,7 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O
713714
continue;
714715
}
715716

716-
currentToken = readUntil(istr,location,ch,ch,outputList,bom);
717+
currentToken = readUntil(stream,location,ch,ch,outputList);
717718
if (currentToken.size() < 2U)
718719
// Error is reported by readUntil()
719720
return;
@@ -745,7 +746,7 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O
745746
}
746747

747748
if (currentToken == "<" && lastLine() == "# include") {
748-
currentToken = readUntil(istr, location, '<', '>', outputList, bom);
749+
currentToken = readUntil(stream, location, '<', '>', outputList);
749750
if (currentToken.size() < 2U)
750751
return;
751752
}
@@ -1190,15 +1191,15 @@ void simplecpp::TokenList::removeComments()
11901191
}
11911192
}
11921193

1193-
std::string simplecpp::TokenList::readUntil(Stream &istr, const Location &location, const char start, const char end, OutputList *outputList, unsigned int bom)
1194+
std::string simplecpp::TokenList::readUntil(Stream &stream, const Location &location, const char start, const char end, OutputList *outputList)
11941195
{
11951196
std::string ret;
11961197
ret += start;
11971198

11981199
bool backslash = false;
11991200
char ch = 0;
1200-
while (ch != end && ch != '\r' && ch != '\n' && istr.good()) {
1201-
ch = readChar(istr, bom);
1201+
while (ch != end && ch != '\r' && ch != '\n' && stream.good()) {
1202+
ch = stream.readChar();
12021203
if (backslash && ch == '\n') {
12031204
ch = 0;
12041205
backslash = false;
@@ -1210,7 +1211,7 @@ std::string simplecpp::TokenList::readUntil(Stream &istr, const Location &locati
12101211
bool update_ch = false;
12111212
char next = 0;
12121213
do {
1213-
next = readChar(istr, bom);
1214+
next = stream.readChar();
12141215
if (next == '\r' || next == '\n') {
12151216
ret.erase(ret.size()-1U);
12161217
backslash = (next == '\r');
@@ -1224,7 +1225,7 @@ std::string simplecpp::TokenList::readUntil(Stream &istr, const Location &locati
12241225
}
12251226
}
12261227

1227-
if (!istr.good() || ch != end) {
1228+
if (!stream.good() || ch != end) {
12281229
clear();
12291230
if (outputList) {
12301231
Output err(files);

0 commit comments

Comments
 (0)