Commit 4eaca771e5d61546ff32008da54e86c419f9d02f

Authored by Wiebe Cazemier
1 parent bf961235

Add isValidUtf8()

Showing 2 changed files with 60 additions and 0 deletions
utils.cpp
@@ -48,3 +48,61 @@ bool topicsMatch(const std::string &subscribeTopic, const std::string &publishTo @@ -48,3 +48,61 @@ bool topicsMatch(const std::string &subscribeTopic, const std::string &publishTo
48 result = subscribe_itr == subscribeParts.end() && publish_itr == publishParts.end(); 48 result = subscribe_itr == subscribeParts.end() && publish_itr == publishParts.end();
49 return result; 49 return result;
50 } 50 }
  51 +
  52 +bool isValidUtf8(const std::string &s)
  53 +{
  54 + int multibyte_remain = 0;
  55 + int cur_code_point = 0;
  56 + for(const char &x : s)
  57 + {
  58 + if (x == 0)
  59 + return false;
  60 +
  61 + if(!multibyte_remain)
  62 + {
  63 + cur_code_point = 0;
  64 +
  65 + if((x & 0b11100000) == 0b11000000) // 2 byte char
  66 + {
  67 + multibyte_remain = 1;
  68 + cur_code_point += ((x & 0b00011111) << 6);
  69 + }
  70 + else if((x & 0b11110000) == 0b11100000) // 3 byte char
  71 + {
  72 + multibyte_remain = 2;
  73 + cur_code_point += ((x & 0b00001111) << 12);
  74 + }
  75 + else if((x & 0b11111000) == 0b11110000) // 4 byte char
  76 + {
  77 + multibyte_remain = 3;
  78 + cur_code_point += ((x & 0b00000111) << 18);
  79 + }
  80 + else if((x & 0b10000000) != 0)
  81 + return false;
  82 + else
  83 + cur_code_point += (x & 0b01111111);
  84 + }
  85 + else // All remainer bytes of this code point needs to start with 10
  86 + {
  87 + if((x & 0b11000000) != 0b10000000)
  88 + return false;
  89 + multibyte_remain--;
  90 + cur_code_point += ((x & 0b00111111) << (6*multibyte_remain));
  91 + }
  92 +
  93 + if (multibyte_remain == 0)
  94 + {
  95 + // Invalid range for MQTT. [MQTT-1.5.3-1]
  96 + if (cur_code_point >= 0xD800 && cur_code_point <= 0xDFFF) // Dec 55296-57343
  97 + return false;
  98 + if (cur_code_point >= 0x0001 && cur_code_point <= 0x001F)
  99 + return false;
  100 + if (cur_code_point >= 0x007F && cur_code_point <= 0x009F)
  101 + return false;
  102 + if (cur_code_point == 0xFFFF)
  103 + return false;
  104 + cur_code_point = 0;
  105 + }
  106 + }
  107 + return multibyte_remain == 0;
  108 +}
@@ -23,4 +23,6 @@ std::list&lt;std::string&gt; split(const std::string &amp;input, const char sep, size_t ma @@ -23,4 +23,6 @@ std::list&lt;std::string&gt; split(const std::string &amp;input, const char sep, size_t ma
23 23
24 bool topicsMatch(const std::string &subscribeTopic, const std::string &publishTopic); 24 bool topicsMatch(const std::string &subscribeTopic, const std::string &publishTopic);
25 25
  26 +bool isValidUtf8(const std::string &s);
  27 +
26 #endif // UTILS_H 28 #endif // UTILS_H