@@ -231,7 +231,9 @@ class simplecpp::TokenList::Stream {
231231public:
232232 Stream (std::istream &istr)
233233 : istr(istr)
234- {}
234+ {
235+ bom = getAndSkipBOM ();
236+ }
235237
236238 int get () {
237239 return istr.get ();
@@ -246,8 +248,95 @@ class simplecpp::TokenList::Stream {
246248 return istr.good ();
247249 }
248250
251+ unsigned char readChar ()
252+ {
253+ unsigned char ch = static_cast <unsigned char >(get ());
254+
255+ // For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the
256+ // character is non-ASCII character then replace it with 0xff
257+ if (bom == 0xfeff || bom == 0xfffe ) {
258+ const unsigned char ch2 = static_cast <unsigned char >(get ());
259+ const int ch16 = (bom == 0xfeff ) ? (ch<<8 | ch2) : (ch2<<8 | ch);
260+ ch = static_cast <unsigned char >(((ch16 >= 0x80 ) ? 0xff : ch16));
261+ }
262+
263+ // Handling of newlines..
264+ if (ch == ' \r ' ) {
265+ ch = ' \n ' ;
266+ if (bom == 0 && static_cast <char >(peek ()) == ' \n ' )
267+ (void )get ();
268+ else if (bom == 0xfeff || bom == 0xfffe ) {
269+ int c1 = get ();
270+ int c2 = get ();
271+ int ch16 = (bom == 0xfeff ) ? (c1<<8 | c2) : (c2<<8 | c1);
272+ if (ch16 != ' \n ' ) {
273+ unget ();
274+ unget ();
275+ }
276+ }
277+ }
278+
279+ return ch;
280+ }
281+
282+ unsigned char peekChar ()
283+ {
284+ unsigned char ch = static_cast <unsigned char >(peek ());
285+
286+ // For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the
287+ // character is non-ASCII character then replace it with 0xff
288+ if (bom == 0xfeff || bom == 0xfffe ) {
289+ (void )get ();
290+ const unsigned char ch2 = static_cast <unsigned char >(peek ());
291+ unget ();
292+ const int ch16 = (bom == 0xfeff ) ? (ch<<8 | ch2) : (ch2<<8 | ch);
293+ ch = static_cast <unsigned char >(((ch16 >= 0x80 ) ? 0xff : ch16));
294+ }
295+
296+ // Handling of newlines..
297+ if (ch == ' \r ' )
298+ ch = ' \n ' ;
299+
300+ return ch;
301+ }
302+
303+ void ungetChar ()
304+ {
305+ unget ();
306+ if (bom == 0xfeff || bom == 0xfffe )
307+ unget ();
308+ }
309+
249310private:
311+ unsigned short getAndSkipBOM ()
312+ {
313+ const int ch1 = peek ();
314+
315+ // The UTF-16 BOM is 0xfffe or 0xfeff.
316+ if (ch1 >= 0xfe ) {
317+ unsigned short bom = (static_cast <unsigned char >(get ()) << 8 );
318+ if (peek () >= 0xfe )
319+ return bom | static_cast <unsigned char >(get ());
320+ unget ();
321+ return 0 ;
322+ }
323+
324+ // Skip UTF-8 BOM 0xefbbbf
325+ if (ch1 == 0xef ) {
326+ (void )get ();
327+ if (get () == 0xbb && peek () == 0xbf ) {
328+ (void )get ();
329+ } else {
330+ unget ();
331+ unget ();
332+ }
333+ }
334+
335+ return 0 ;
336+ }
337+
250338 std::istream &istr;
339+ unsigned short bom;
251340};
252341
253342simplecpp::TokenList::TokenList (std::vector<std::string> &filenames) : frontToken(nullptr ), backToken(nullptr ), files(filenames) {}
@@ -356,92 +445,6 @@ std::string simplecpp::TokenList::stringify() const
356445 return ret.str ();
357446}
358447
359- static unsigned char readChar (simplecpp::TokenList::Stream &istr, unsigned int bom)
360- {
361- unsigned char ch = static_cast <unsigned char >(istr.get ());
362-
363- // For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the
364- // character is non-ASCII character then replace it with 0xff
365- if (bom == 0xfeff || bom == 0xfffe ) {
366- const unsigned char ch2 = static_cast <unsigned char >(istr.get ());
367- const int ch16 = (bom == 0xfeff ) ? (ch<<8 | ch2) : (ch2<<8 | ch);
368- ch = static_cast <unsigned char >(((ch16 >= 0x80 ) ? 0xff : ch16));
369- }
370-
371- // Handling of newlines..
372- if (ch == ' \r ' ) {
373- ch = ' \n ' ;
374- if (bom == 0 && static_cast <char >(istr.peek ()) == ' \n ' )
375- (void )istr.get ();
376- else if (bom == 0xfeff || bom == 0xfffe ) {
377- int c1 = istr.get ();
378- int c2 = istr.get ();
379- int ch16 = (bom == 0xfeff ) ? (c1<<8 | c2) : (c2<<8 | c1);
380- if (ch16 != ' \n ' ) {
381- istr.unget ();
382- istr.unget ();
383- }
384- }
385- }
386-
387- return ch;
388- }
389-
390- static unsigned char peekChar (simplecpp::TokenList::Stream &istr, unsigned int bom)
391- {
392- unsigned char ch = static_cast <unsigned char >(istr.peek ());
393-
394- // For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the
395- // character is non-ASCII character then replace it with 0xff
396- if (bom == 0xfeff || bom == 0xfffe ) {
397- (void )istr.get ();
398- const unsigned char ch2 = static_cast <unsigned char >(istr.peek ());
399- istr.unget ();
400- const int ch16 = (bom == 0xfeff ) ? (ch<<8 | ch2) : (ch2<<8 | ch);
401- ch = static_cast <unsigned char >(((ch16 >= 0x80 ) ? 0xff : ch16));
402- }
403-
404- // Handling of newlines..
405- if (ch == ' \r ' )
406- ch = ' \n ' ;
407-
408- return ch;
409- }
410-
411- static void ungetChar (simplecpp::TokenList::Stream &istr, unsigned int bom)
412- {
413- istr.unget ();
414- if (bom == 0xfeff || bom == 0xfffe )
415- istr.unget ();
416- }
417-
418- static unsigned short getAndSkipBOM (simplecpp::TokenList::Stream &istr)
419- {
420- const int ch1 = istr.peek ();
421-
422- // The UTF-16 BOM is 0xfffe or 0xfeff.
423- if (ch1 >= 0xfe ) {
424- unsigned short bom = (static_cast <unsigned char >(istr.get ()) << 8 );
425- if (istr.peek () >= 0xfe )
426- return bom | static_cast <unsigned char >(istr.get ());
427- istr.unget ();
428- return 0 ;
429- }
430-
431- // Skip UTF-8 BOM 0xefbbbf
432- if (ch1 == 0xef ) {
433- (void )istr.get ();
434- if (istr.get () == 0xbb && istr.peek () == 0xbf ) {
435- (void )istr.get ();
436- } else {
437- istr.unget ();
438- istr.unget ();
439- }
440- }
441-
442- return 0 ;
443- }
444-
445448static bool isNameChar (unsigned char ch)
446449{
447450 return std::isalnum (ch) || ch == ' _' || ch == ' $' ;
@@ -497,23 +500,21 @@ void simplecpp::TokenList::lineDirective(unsigned int fileIndex, unsigned int li
497500
498501static const std::string COMMENT_END (" */" );
499502
500- void simplecpp::TokenList::readfile (Stream &istr , const std::string &filename, OutputList *outputList)
503+ void simplecpp::TokenList::readfile (Stream &stream , const std::string &filename, OutputList *outputList)
501504{
502505 std::stack<simplecpp::Location> loc;
503506
504507 unsigned int multiline = 0U ;
505508
506509 const Token *oldLastToken = nullptr ;
507510
508- const unsigned short bom = getAndSkipBOM (istr);
509-
510511 Location location (files);
511512 location.fileIndex = fileIndex (filename);
512513 location.line = 1U ;
513514 location.col = 1U ;
514- while (istr .good ()) {
515- unsigned char ch = readChar (istr,bom );
516- if (!istr .good ())
515+ while (stream .good ()) {
516+ unsigned char ch = stream. readChar ();
517+ if (!stream .good ())
517518 break ;
518519 if (ch < ' ' && ch != ' \t ' && ch != ' \n ' && ch != ' \r ' )
519520 ch = ' ' ;
@@ -591,12 +592,12 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O
591592
592593 if (cback () && cback ()->location .line == location.line && cback ()->previous && cback ()->previous ->op == ' #' && (lastLine () == " # error" || lastLine () == " # warning" )) {
593594 char prev = ' ' ;
594- while (istr .good () && (prev == ' \\ ' || (ch != ' \r ' && ch != ' \n ' ))) {
595+ while (stream .good () && (prev == ' \\ ' || (ch != ' \r ' && ch != ' \n ' ))) {
595596 currentToken += ch;
596597 prev = ch;
597- ch = readChar (istr, bom );
598+ ch = stream. readChar ();
598599 }
599- ungetChar (istr, bom );
600+ stream. ungetChar ();
600601 push_back (new Token (currentToken, location));
601602 location.adjust (currentToken);
602603 continue ;
@@ -605,21 +606,21 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O
605606 // number or name
606607 if (isNameChar (ch)) {
607608 const bool num = std::isdigit (ch);
608- while (istr .good () && isNameChar (ch)) {
609+ while (stream .good () && isNameChar (ch)) {
609610 currentToken += ch;
610- ch = readChar (istr,bom );
611- if (num && ch==' \' ' && isNameChar (peekChar (istr,bom )))
612- ch = readChar (istr,bom );
611+ ch = stream. readChar ();
612+ if (num && ch==' \' ' && isNameChar (stream. peekChar ()))
613+ ch = stream. readChar ();
613614 }
614615
615- ungetChar (istr,bom );
616+ stream. ungetChar ();
616617 }
617618
618619 // comment
619- else if (ch == ' /' && peekChar (istr,bom ) == ' /' ) {
620- while (istr .good () && ch != ' \r ' && ch != ' \n ' ) {
620+ else if (ch == ' /' && stream. peekChar () == ' /' ) {
621+ while (stream .good () && ch != ' \r ' && ch != ' \n ' ) {
621622 currentToken += ch;
622- ch = readChar (istr, bom );
623+ ch = stream. readChar ();
623624 }
624625 const std::string::size_type pos = currentToken.find_last_not_of (" \t " );
625626 if (pos < currentToken.size () - 1U && currentToken[pos] == ' \\ ' )
@@ -628,20 +629,20 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O
628629 ++multiline;
629630 currentToken.erase (currentToken.size () - 1U );
630631 } else {
631- ungetChar (istr, bom );
632+ stream. ungetChar ();
632633 }
633634 }
634635
635636 // comment
636- else if (ch == ' /' && peekChar (istr,bom ) == ' *' ) {
637+ else if (ch == ' /' && stream. peekChar () == ' *' ) {
637638 currentToken = " /*" ;
638- (void )readChar (istr,bom );
639- ch = readChar (istr,bom );
640- while (istr .good ()) {
639+ (void )stream. readChar ();
640+ ch = stream. readChar ();
641+ while (stream .good ()) {
641642 currentToken += ch;
642643 if (currentToken.size () >= 4U && endsWith (currentToken, COMMENT_END))
643644 break ;
644- ch = readChar (istr,bom );
645+ ch = stream. readChar ();
645646 }
646647 // multiline..
647648
@@ -672,12 +673,12 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O
672673 std::string delim;
673674 currentToken = ch;
674675 prefix.resize (prefix.size () - 1 );
675- ch = readChar (istr,bom );
676- while (istr .good () && ch != ' (' && ch != ' \n ' ) {
676+ ch = stream. readChar ();
677+ while (stream .good () && ch != ' (' && ch != ' \n ' ) {
677678 delim += ch;
678- ch = readChar (istr,bom );
679+ ch = stream. readChar ();
679680 }
680- if (!istr .good () || ch == ' \n ' ) {
681+ if (!stream .good () || ch == ' \n ' ) {
681682 if (outputList) {
682683 Output err (files);
683684 err.type = Output::SYNTAX_ERROR;
@@ -688,8 +689,8 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O
688689 return ;
689690 }
690691 const std::string endOfRawString (' )' + delim + currentToken);
691- while (istr .good () && !(endsWith (currentToken, endOfRawString) && currentToken.size () > 1 ))
692- currentToken += readChar (istr,bom );
692+ while (stream .good () && !(endsWith (currentToken, endOfRawString) && currentToken.size () > 1 ))
693+ currentToken += stream. readChar ();
693694 if (!endsWith (currentToken, endOfRawString)) {
694695 if (outputList) {
695696 Output err (files);
@@ -713,7 +714,7 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O
713714 continue ;
714715 }
715716
716- currentToken = readUntil (istr ,location,ch,ch,outputList,bom );
717+ currentToken = readUntil (stream ,location,ch,ch,outputList);
717718 if (currentToken.size () < 2U )
718719 // Error is reported by readUntil()
719720 return ;
@@ -745,7 +746,7 @@ void simplecpp::TokenList::readfile(Stream &istr, const std::string &filename, O
745746 }
746747
747748 if (currentToken == " <" && lastLine () == " # include" ) {
748- currentToken = readUntil (istr , location, ' <' , ' >' , outputList, bom );
749+ currentToken = readUntil (stream , location, ' <' , ' >' , outputList);
749750 if (currentToken.size () < 2U )
750751 return ;
751752 }
@@ -1190,15 +1191,15 @@ void simplecpp::TokenList::removeComments()
11901191 }
11911192}
11921193
1193- std::string simplecpp::TokenList::readUntil (Stream &istr , const Location &location, const char start, const char end, OutputList *outputList, unsigned int bom )
1194+ std::string simplecpp::TokenList::readUntil (Stream &stream , const Location &location, const char start, const char end, OutputList *outputList)
11941195{
11951196 std::string ret;
11961197 ret += start;
11971198
11981199 bool backslash = false ;
11991200 char ch = 0 ;
1200- while (ch != end && ch != ' \r ' && ch != ' \n ' && istr .good ()) {
1201- ch = readChar (istr, bom );
1201+ while (ch != end && ch != ' \r ' && ch != ' \n ' && stream .good ()) {
1202+ ch = stream. readChar ();
12021203 if (backslash && ch == ' \n ' ) {
12031204 ch = 0 ;
12041205 backslash = false ;
@@ -1210,7 +1211,7 @@ std::string simplecpp::TokenList::readUntil(Stream &istr, const Location &locati
12101211 bool update_ch = false ;
12111212 char next = 0 ;
12121213 do {
1213- next = readChar (istr, bom );
1214+ next = stream. readChar ();
12141215 if (next == ' \r ' || next == ' \n ' ) {
12151216 ret.erase (ret.size ()-1U );
12161217 backslash = (next == ' \r ' );
@@ -1224,7 +1225,7 @@ std::string simplecpp::TokenList::readUntil(Stream &istr, const Location &locati
12241225 }
12251226 }
12261227
1227- if (!istr .good () || ch != end) {
1228+ if (!stream .good () || ch != end) {
12281229 clear ();
12291230 if (outputList) {
12301231 Output err (files);
0 commit comments