rapidjson: filterate BOM encoding mark out as it's necessary.

Previously, json file containing BOM(Byte Order Mark),
it wouldn't recognize it as the normal data,

Our StringStream could filter out BOM if it's necessary.

See: https://en.wikipedia.org/wiki/Byte_order_mark
This commit is contained in:
Hermet Park 2019-06-25 16:21:33 +09:00
parent a5b6a23bd5
commit 7339baf3e5

View File

@ -188,7 +188,9 @@ template <typename Encoding>
struct GenericInsituStringStream {
typedef typename Encoding::Ch Ch;
GenericInsituStringStream(Ch *src) : src_(src), dst_(0), head_(src) {}
GenericInsituStringStream(Ch *src) : src_(src), dst_(0), head_(src) {
SkipBOM();
}
// Read
Ch Peek() { return *src_; }
@ -205,6 +207,31 @@ struct GenericInsituStringStream {
Ch* Push(size_t count) { Ch* begin = dst_; dst_ += count; return begin; }
void Pop(size_t count) { dst_ -= count; }
/*
Detect encoding type with BOM or RFC 4627
BOM (Byte Order Mark):
00 00 FE FF UTF-32BE
FF FE 00 00 UTF-32LE
FE FF UTF-16BE
FF FE UTF-16LE
EF BB BF UTF-8
*/
void SkipBOM() {
unsigned char *c = reinterpret_cast<unsigned char *>(src_);
if (!c) return;
unsigned bom = (c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24));
if (bom == 0xFFFE0000) { Take(); Take(); Take(); Take(); }
else if (bom == 0x0000FEFF) { Take(); Take(); Take(); Take(); }
else if ((bom & 0xFFFF) == 0xFFFE) { Take(); Take(); }
else if ((bom & 0xFFFF) == 0xFEFF) { Take(); Take(); }
else if ((bom & 0xFFFFFF) == 0xBFBBEF) { Take(); Take(); Take(); }
//It might need to clarify this file is a type of RFC 4627?
}
Ch* src_;
Ch* dst_;
Ch* head_;