Commit 4eaca771e5d61546ff32008da54e86c419f9d02f
1 parent
bf961235
Add isValidUtf8()
Showing
2 changed files
with
60 additions
and
0 deletions
utils.cpp
| ... | ... | @@ -48,3 +48,61 @@ bool topicsMatch(const std::string &subscribeTopic, const std::string &publishTo |
| 48 | 48 | result = subscribe_itr == subscribeParts.end() && publish_itr == publishParts.end(); |
| 49 | 49 | return result; |
| 50 | 50 | } |
| 51 | + | |
| 52 | +bool isValidUtf8(const std::string &s) | |
| 53 | +{ | |
| 54 | + int multibyte_remain = 0; | |
| 55 | + int cur_code_point = 0; | |
| 56 | + for(const char &x : s) | |
| 57 | + { | |
| 58 | + if (x == 0) | |
| 59 | + return false; | |
| 60 | + | |
| 61 | + if(!multibyte_remain) | |
| 62 | + { | |
| 63 | + cur_code_point = 0; | |
| 64 | + | |
| 65 | + if((x & 0b11100000) == 0b11000000) // 2 byte char | |
| 66 | + { | |
| 67 | + multibyte_remain = 1; | |
| 68 | + cur_code_point += ((x & 0b00011111) << 6); | |
| 69 | + } | |
| 70 | + else if((x & 0b11110000) == 0b11100000) // 3 byte char | |
| 71 | + { | |
| 72 | + multibyte_remain = 2; | |
| 73 | + cur_code_point += ((x & 0b00001111) << 12); | |
| 74 | + } | |
| 75 | + else if((x & 0b11111000) == 0b11110000) // 4 byte char | |
| 76 | + { | |
| 77 | + multibyte_remain = 3; | |
| 78 | + cur_code_point += ((x & 0b00000111) << 18); | |
| 79 | + } | |
| 80 | + else if((x & 0b10000000) != 0) | |
| 81 | + return false; | |
| 82 | + else | |
| 83 | + cur_code_point += (x & 0b01111111); | |
| 84 | + } | |
| 85 | + else // All remainer bytes of this code point needs to start with 10 | |
| 86 | + { | |
| 87 | + if((x & 0b11000000) != 0b10000000) | |
| 88 | + return false; | |
| 89 | + multibyte_remain--; | |
| 90 | + cur_code_point += ((x & 0b00111111) << (6*multibyte_remain)); | |
| 91 | + } | |
| 92 | + | |
| 93 | + if (multibyte_remain == 0) | |
| 94 | + { | |
| 95 | + // Invalid range for MQTT. [MQTT-1.5.3-1] | |
| 96 | + if (cur_code_point >= 0xD800 && cur_code_point <= 0xDFFF) // Dec 55296-57343 | |
| 97 | + return false; | |
| 98 | + if (cur_code_point >= 0x0001 && cur_code_point <= 0x001F) | |
| 99 | + return false; | |
| 100 | + if (cur_code_point >= 0x007F && cur_code_point <= 0x009F) | |
| 101 | + return false; | |
| 102 | + if (cur_code_point == 0xFFFF) | |
| 103 | + return false; | |
| 104 | + cur_code_point = 0; | |
| 105 | + } | |
| 106 | + } | |
| 107 | + return multibyte_remain == 0; | |
| 108 | +} | ... | ... |
utils.h
| ... | ... | @@ -23,4 +23,6 @@ std::list<std::string> split(const std::string &input, const char sep, size_t ma |
| 23 | 23 | |
| 24 | 24 | bool topicsMatch(const std::string &subscribeTopic, const std::string &publishTopic); |
| 25 | 25 | |
| 26 | +bool isValidUtf8(const std::string &s); | |
| 27 | + | |
| 26 | 28 | #endif // UTILS_H | ... | ... |