Skip to content

Commit 1a8970d

Browse files
committed
simplified UTF-16 checks
1 parent 2808213 commit 1a8970d

1 file changed

Lines changed: 8 additions & 6 deletions

File tree

simplecpp.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -231,8 +231,9 @@ class simplecpp::TokenList::Stream {
231231
public:
232232
Stream(std::istream &istr)
233233
: istr(istr)
234+
, bom(getAndSkipBOM())
235+
, isUtf16(bom == 0xfeff || bom == 0xfffe)
234236
{
235-
bom = getAndSkipBOM();
236237
}
237238

238239
int get() {
@@ -254,7 +255,7 @@ class simplecpp::TokenList::Stream {
254255

255256
// For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the
256257
// character is non-ASCII character then replace it with 0xff
257-
if (bom == 0xfeff || bom == 0xfffe) {
258+
if (isUtf16) {
258259
const unsigned char ch2 = static_cast<unsigned char>(get());
259260
const int ch16 = (bom == 0xfeff) ? (ch<<8 | ch2) : (ch2<<8 | ch);
260261
ch = static_cast<unsigned char>(((ch16 >= 0x80) ? 0xff : ch16));
@@ -265,7 +266,7 @@ class simplecpp::TokenList::Stream {
265266
ch = '\n';
266267
if (bom == 0 && static_cast<char>(peek()) == '\n')
267268
(void)get();
268-
else if (bom == 0xfeff || bom == 0xfffe) {
269+
else if (isUtf16) {
269270
int c1 = get();
270271
int c2 = get();
271272
int ch16 = (bom == 0xfeff) ? (c1<<8 | c2) : (c2<<8 | c1);
@@ -285,7 +286,7 @@ class simplecpp::TokenList::Stream {
285286

286287
// For UTF-16 encoded files the BOM is 0xfeff/0xfffe. If the
287288
// character is non-ASCII character then replace it with 0xff
288-
if (bom == 0xfeff || bom == 0xfffe) {
289+
if (isUtf16) {
289290
(void)get();
290291
const unsigned char ch2 = static_cast<unsigned char>(peek());
291292
unget();
@@ -303,7 +304,7 @@ class simplecpp::TokenList::Stream {
303304
void ungetChar()
304305
{
305306
unget();
306-
if (bom == 0xfeff || bom == 0xfffe)
307+
if (isUtf16)
307308
unget();
308309
}
309310

@@ -336,7 +337,8 @@ class simplecpp::TokenList::Stream {
336337
}
337338

338339
std::istream &istr;
339-
unsigned short bom;
340+
const unsigned short bom;
341+
const bool isUtf16;
340342
};
341343

342344
simplecpp::TokenList::TokenList(std::vector<std::string> &filenames) : frontToken(nullptr), backToken(nullptr), files(filenames) {}

0 commit comments

Comments
 (0)