Commit 4eaca771e5d61546ff32008da54e86c419f9d02f
1 parent
bf961235
Add isValidUtf8()
Showing
2 changed files
with
60 additions
and
0 deletions
utils.cpp
| @@ -48,3 +48,61 @@ bool topicsMatch(const std::string &subscribeTopic, const std::string &publishTo | @@ -48,3 +48,61 @@ bool topicsMatch(const std::string &subscribeTopic, const std::string &publishTo | ||
| 48 | result = subscribe_itr == subscribeParts.end() && publish_itr == publishParts.end(); | 48 | result = subscribe_itr == subscribeParts.end() && publish_itr == publishParts.end(); |
| 49 | return result; | 49 | return result; |
| 50 | } | 50 | } |
| 51 | + | ||
| 52 | +bool isValidUtf8(const std::string &s) | ||
| 53 | +{ | ||
| 54 | + int multibyte_remain = 0; | ||
| 55 | + int cur_code_point = 0; | ||
| 56 | + for(const char &x : s) | ||
| 57 | + { | ||
| 58 | + if (x == 0) | ||
| 59 | + return false; | ||
| 60 | + | ||
| 61 | + if(!multibyte_remain) | ||
| 62 | + { | ||
| 63 | + cur_code_point = 0; | ||
| 64 | + | ||
| 65 | + if((x & 0b11100000) == 0b11000000) // 2 byte char | ||
| 66 | + { | ||
| 67 | + multibyte_remain = 1; | ||
| 68 | + cur_code_point += ((x & 0b00011111) << 6); | ||
| 69 | + } | ||
| 70 | + else if((x & 0b11110000) == 0b11100000) // 3 byte char | ||
| 71 | + { | ||
| 72 | + multibyte_remain = 2; | ||
| 73 | + cur_code_point += ((x & 0b00001111) << 12); | ||
| 74 | + } | ||
| 75 | + else if((x & 0b11111000) == 0b11110000) // 4 byte char | ||
| 76 | + { | ||
| 77 | + multibyte_remain = 3; | ||
| 78 | + cur_code_point += ((x & 0b00000111) << 18); | ||
| 79 | + } | ||
| 80 | + else if((x & 0b10000000) != 0) | ||
| 81 | + return false; | ||
| 82 | + else | ||
| 83 | + cur_code_point += (x & 0b01111111); | ||
| 84 | + } | ||
| 85 | + else // All remainer bytes of this code point needs to start with 10 | ||
| 86 | + { | ||
| 87 | + if((x & 0b11000000) != 0b10000000) | ||
| 88 | + return false; | ||
| 89 | + multibyte_remain--; | ||
| 90 | + cur_code_point += ((x & 0b00111111) << (6*multibyte_remain)); | ||
| 91 | + } | ||
| 92 | + | ||
| 93 | + if (multibyte_remain == 0) | ||
| 94 | + { | ||
| 95 | + // Invalid range for MQTT. [MQTT-1.5.3-1] | ||
| 96 | + if (cur_code_point >= 0xD800 && cur_code_point <= 0xDFFF) // Dec 55296-57343 | ||
| 97 | + return false; | ||
| 98 | + if (cur_code_point >= 0x0001 && cur_code_point <= 0x001F) | ||
| 99 | + return false; | ||
| 100 | + if (cur_code_point >= 0x007F && cur_code_point <= 0x009F) | ||
| 101 | + return false; | ||
| 102 | + if (cur_code_point == 0xFFFF) | ||
| 103 | + return false; | ||
| 104 | + cur_code_point = 0; | ||
| 105 | + } | ||
| 106 | + } | ||
| 107 | + return multibyte_remain == 0; | ||
| 108 | +} |
utils.h
| @@ -23,4 +23,6 @@ std::list<std::string> split(const std::string &input, const char sep, size_t ma | @@ -23,4 +23,6 @@ std::list<std::string> split(const std::string &input, const char sep, size_t ma | ||
| 23 | 23 | ||
| 24 | bool topicsMatch(const std::string &subscribeTopic, const std::string &publishTopic); | 24 | bool topicsMatch(const std::string &subscribeTopic, const std::string &publishTopic); |
| 25 | 25 | ||
| 26 | +bool isValidUtf8(const std::string &s); | ||
| 27 | + | ||
| 26 | #endif // UTILS_H | 28 | #endif // UTILS_H |