Commit 6473b379392aa3f4ef182a57f59a627bda9b18d5

Authored by Wiebe Cazemier
1 parent 1bca4cbb

Check all UTF8 strings in packet parsing for validity

Also fix some bugs:

- The password can be binary data.
- The will topic is now also checked.
mqttpacket.cpp
@@ -404,14 +404,12 @@ void MqttPacket::handleConnect() @@ -404,14 +404,12 @@ void MqttPacket::handleConnect()
404 break; 404 break;
405 case Mqtt5Properties::AuthenticationMethod: 405 case Mqtt5Properties::AuthenticationMethod:
406 { 406 {
407 - const uint16_t len = readTwoBytesToUInt16();  
408 - readBytes(len); 407 + readBytesToString();
409 break; 408 break;
410 } 409 }
411 case Mqtt5Properties::AuthenticationData: 410 case Mqtt5Properties::AuthenticationData:
412 { 411 {
413 - const uint16_t len = readTwoBytesToUInt16();  
414 - readBytes(len); 412 + readBytesToString(false);
415 break; 413 break;
416 } 414 }
417 default: 415 default:
@@ -420,8 +418,7 @@ void MqttPacket::handleConnect() @@ -420,8 +418,7 @@ void MqttPacket::handleConnect()
420 } 418 }
421 } 419 }
422 420
423 - uint16_t client_id_length = readTwoBytesToUInt16();  
424 - std::string client_id(readBytes(client_id_length), client_id_length); 421 + std::string client_id = readBytesToString();
425 422
426 std::string username; 423 std::string username;
427 std::string password; 424 std::string password;
@@ -453,15 +450,13 @@ void MqttPacket::handleConnect() @@ -453,15 +450,13 @@ void MqttPacket::handleConnect()
453 break; 450 break;
454 case Mqtt5Properties::ContentType: 451 case Mqtt5Properties::ContentType:
455 { 452 {
456 - const uint16_t len = readTwoBytesToUInt16();  
457 - const std::string contentType(readBytes(len), len); 453 + const std::string contentType = readBytesToString();
458 willpublish.propertyBuilder->writeContentType(contentType); 454 willpublish.propertyBuilder->writeContentType(contentType);
459 break; 455 break;
460 } 456 }
461 case Mqtt5Properties::ResponseTopic: 457 case Mqtt5Properties::ResponseTopic:
462 { 458 {
463 - const uint16_t len = readTwoBytesToUInt16();  
464 - const std::string responseTopic(readBytes(len), len); 459 + const std::string responseTopic = readBytesToString();
465 willpublish.propertyBuilder->writeResponseTopic(responseTopic); 460 willpublish.propertyBuilder->writeResponseTopic(responseTopic);
466 break; 461 break;
467 } 462 }
@@ -473,18 +468,13 @@ void MqttPacket::handleConnect() @@ -473,18 +468,13 @@ void MqttPacket::handleConnect()
473 } 468 }
474 case Mqtt5Properties::CorrelationData: 469 case Mqtt5Properties::CorrelationData:
475 { 470 {
476 - const uint16_t len = readTwoBytesToUInt16();  
477 - const std::string correlationData(readBytes(len), len); 471 + const std::string correlationData = readBytesToString(false);
478 willpublish.propertyBuilder->writeCorrelationData(correlationData); 472 willpublish.propertyBuilder->writeCorrelationData(correlationData);
479 break; 473 break;
480 } 474 }
481 case Mqtt5Properties::UserProperty: 475 case Mqtt5Properties::UserProperty:
482 { 476 {
483 - const uint16_t lenKey = readTwoBytesToUInt16();  
484 - std::string userPropKey(readBytes(lenKey), lenKey);  
485 - const uint16_t lenVal = readTwoBytesToUInt16();  
486 - std::string userPropVal(readBytes(lenVal), lenVal);  
487 - willpublish.propertyBuilder->writeUserProperty(std::move(userPropKey), std::move(userPropVal)); 477 + readUserProperty();
488 break; 478 break;
489 } 479 }
490 default: 480 default:
@@ -493,16 +483,14 @@ void MqttPacket::handleConnect() @@ -493,16 +483,14 @@ void MqttPacket::handleConnect()
493 } 483 }
494 } 484 }
495 485
496 - uint16_t will_topic_length = readTwoBytesToUInt16();  
497 - willpublish.topic = std::string(readBytes(will_topic_length), will_topic_length); 486 + willpublish.topic = readBytesToString(true, true);
498 487
499 uint16_t will_payload_length = readTwoBytesToUInt16(); 488 uint16_t will_payload_length = readTwoBytesToUInt16();
500 willpublish.payload = std::string(readBytes(will_payload_length), will_payload_length); 489 willpublish.payload = std::string(readBytes(will_payload_length), will_payload_length);
501 } 490 }
502 if (user_name_flag) 491 if (user_name_flag)
503 { 492 {
504 - uint16_t user_name_length = readTwoBytesToUInt16();  
505 - username = std::string(readBytes(user_name_length), user_name_length); 493 + username = readBytesToString(false);
506 494
507 if (username.empty()) 495 if (username.empty())
508 throw ProtocolError("Username flagged as present, but it's 0 bytes.", ReasonCodes::MalformedPacket); 496 throw ProtocolError("Username flagged as present, but it's 0 bytes.", ReasonCodes::MalformedPacket);
@@ -514,21 +502,21 @@ void MqttPacket::handleConnect() @@ -514,21 +502,21 @@ void MqttPacket::handleConnect()
514 throw ProtocolError("MQTT 3.1.1: If the User Name Flag is set to 0, the Password Flag MUST be set to 0."); 502 throw ProtocolError("MQTT 3.1.1: If the User Name Flag is set to 0, the Password Flag MUST be set to 0.");
515 } 503 }
516 504
517 - uint16_t password_length = readTwoBytesToUInt16();  
518 - password = std::string(readBytes(password_length), password_length); 505 + password = readBytesToString(false);
519 506
520 if (password.empty()) 507 if (password.empty())
521 throw ProtocolError("Password flagged as present, but it's 0 bytes.", ReasonCodes::MalformedPacket); 508 throw ProtocolError("Password flagged as present, but it's 0 bytes.", ReasonCodes::MalformedPacket);
522 } 509 }
523 510
524 - // The specs don't really say what to do when client id not UTF8, so including here.  
525 - if (!isValidUtf8(client_id) || !isValidUtf8(username) || !isValidUtf8(password) || !isValidUtf8(willpublish.topic)) 511 + // I deferred the initial UTF8 check on username to be able to give an appropriate connack here, but to me, the specs
  512 + // are actually vague whether 'BadUserNameOrPassword' should be given on invalid UTF8.
  513 + if (!isValidUtf8(username))
526 { 514 {
527 ConnAck connAck(protocolVersion, ReasonCodes::BadUserNameOrPassword); 515 ConnAck connAck(protocolVersion, ReasonCodes::BadUserNameOrPassword);
528 MqttPacket response(connAck); 516 MqttPacket response(connAck);
529 sender->setReadyForDisconnect(); 517 sender->setReadyForDisconnect();
530 sender->writeMqttPacket(response); 518 sender->writeMqttPacket(response);
531 - logger->logf(LOG_ERR, "Client ID, username, password or will topic has invalid UTF8: ", client_id.c_str()); 519 + logger->logf(LOG_ERR, "Username has invalid UTF8: %s", username.c_str());
532 return; 520 return;
533 } 521 }
534 522
@@ -679,14 +667,12 @@ void MqttPacket::handleDisconnect() @@ -679,14 +667,12 @@ void MqttPacket::handleDisconnect()
679 } 667 }
680 case Mqtt5Properties::ReasonString: 668 case Mqtt5Properties::ReasonString:
681 { 669 {
682 - const uint16_t len = readTwoBytesToUInt16();  
683 - reasonString = std::string(readBytes(len), len); 670 + reasonString = readBytesToString();
684 break; 671 break;
685 } 672 }
686 case Mqtt5Properties::ServerReference: 673 case Mqtt5Properties::ServerReference:
687 { 674 {
688 - const uint16_t len = readTwoBytesToUInt16();  
689 - readBytes(len); 675 + readBytesToString();
690 break; 676 break;
691 } 677 }
692 case Mqtt5Properties::UserProperty: 678 case Mqtt5Properties::UserProperty:
@@ -754,11 +740,10 @@ void MqttPacket::handleSubscribe() @@ -754,11 +740,10 @@ void MqttPacket::handleSubscribe()
754 std::list<ReasonCodes> subs_reponse_codes; 740 std::list<ReasonCodes> subs_reponse_codes;
755 while (remainingAfterPos() > 0) 741 while (remainingAfterPos() > 0)
756 { 742 {
757 - uint16_t topicLength = readTwoBytesToUInt16();  
758 - std::string topic(readBytes(topicLength), topicLength); 743 + std::string topic = readBytesToString(true);
759 744
760 - if (topic.empty() || !isValidUtf8(topic))  
761 - throw ProtocolError("Subscribe topic not valid UTF-8.", ReasonCodes::MalformedPacket); 745 + if (topic.empty())
  746 + throw ProtocolError("Subscribe topic is empty.", ReasonCodes::MalformedPacket);
762 747
763 if (!isValidSubscribePath(topic)) 748 if (!isValidSubscribePath(topic))
764 throw ProtocolError(formatString("Invalid subscribe path: %s", topic.c_str()), ReasonCodes::MalformedPacket); 749 throw ProtocolError(formatString("Invalid subscribe path: %s", topic.c_str()), ReasonCodes::MalformedPacket);
@@ -837,11 +822,10 @@ void MqttPacket::handleUnsubscribe() @@ -837,11 +822,10 @@ void MqttPacket::handleUnsubscribe()
837 { 822 {
838 numberOfUnsubs++; 823 numberOfUnsubs++;
839 824
840 - uint16_t topicLength = readTwoBytesToUInt16();  
841 - std::string topic(readBytes(topicLength), topicLength); 825 + std::string topic = readBytesToString();
842 826
843 - if (topic.empty() || !isValidUtf8(topic))  
844 - throw ProtocolError("Subscribe topic not valid UTF-8.", ReasonCodes::MalformedPacket); 827 + if (topic.empty())
  828 + throw ProtocolError("Subscribe topic is empty.", ReasonCodes::MalformedPacket);
845 829
846 sender->getThreadData()->getSubscriptionStore()->removeSubscription(sender, topic); 830 sender->getThreadData()->getSubscriptionStore()->removeSubscription(sender, topic);
847 logger->logf(LOG_UNSUBSCRIBE, "Client '%s' unsubscribed from '%s'", sender->repr().c_str(), topic.c_str()); 831 logger->logf(LOG_UNSUBSCRIBE, "Client '%s' unsubscribed from '%s'", sender->repr().c_str(), topic.c_str());
@@ -860,8 +844,6 @@ void MqttPacket::handleUnsubscribe() @@ -860,8 +844,6 @@ void MqttPacket::handleUnsubscribe()
860 844
861 void MqttPacket::parsePublishData() 845 void MqttPacket::parsePublishData()
862 { 846 {
863 - const uint16_t variable_header_length = readTwoBytesToUInt16();  
864 -  
865 publishData.retain = (first_byte & 0b00000001); 847 publishData.retain = (first_byte & 0b00000001);
866 bool dup = !!(first_byte & 0b00001000); 848 bool dup = !!(first_byte & 0b00001000);
867 publishData.qos = (first_byte & 0b00000110) >> 1; 849 publishData.qos = (first_byte & 0b00000110) >> 1;
@@ -872,7 +854,7 @@ void MqttPacket::parsePublishData() @@ -872,7 +854,7 @@ void MqttPacket::parsePublishData()
872 if (publishData.qos == 0 && dup) 854 if (publishData.qos == 0 && dup)
873 throw ProtocolError("Duplicate flag is set for QoS 0 packet. This is illegal.", ReasonCodes::MalformedPacket); 855 throw ProtocolError("Duplicate flag is set for QoS 0 packet. This is illegal.", ReasonCodes::MalformedPacket);
874 856
875 - publishData.topic = std::string(readBytes(variable_header_length), variable_header_length); 857 + publishData.topic = readBytesToString(true, true);
876 858
877 if (publishData.qos) 859 if (publishData.qos)
878 { 860 {
@@ -928,27 +910,20 @@ void MqttPacket::parsePublishData() @@ -928,27 +910,20 @@ void MqttPacket::parsePublishData()
928 case Mqtt5Properties::ResponseTopic: 910 case Mqtt5Properties::ResponseTopic:
929 { 911 {
930 publishData.constructPropertyBuilder(); 912 publishData.constructPropertyBuilder();
931 - const uint16_t len = readTwoBytesToUInt16();  
932 - const std::string responseTopic(readBytes(len), len); 913 + const std::string responseTopic = readBytesToString();
933 publishData.propertyBuilder->writeResponseTopic(responseTopic); 914 publishData.propertyBuilder->writeResponseTopic(responseTopic);
934 break; 915 break;
935 } 916 }
936 case Mqtt5Properties::CorrelationData: 917 case Mqtt5Properties::CorrelationData:
937 { 918 {
938 publishData.constructPropertyBuilder(); 919 publishData.constructPropertyBuilder();
939 - const uint16_t len = readTwoBytesToUInt16();  
940 - const std::string correlationData(readBytes(len), len); 920 + const std::string correlationData = readBytesToString(false);
941 publishData.propertyBuilder->writeCorrelationData(correlationData); 921 publishData.propertyBuilder->writeCorrelationData(correlationData);
942 break; 922 break;
943 } 923 }
944 case Mqtt5Properties::UserProperty: 924 case Mqtt5Properties::UserProperty:
945 { 925 {
946 - publishData.constructPropertyBuilder();  
947 - const uint16_t lenKey = readTwoBytesToUInt16();  
948 - std::string userPropKey(readBytes(lenKey), lenKey);  
949 - const uint16_t lenVal = readTwoBytesToUInt16();  
950 - std::string userPropVal(readBytes(lenVal), lenVal);  
951 - publishData.propertyBuilder->writeUserProperty(std::move(userPropKey), std::move(userPropVal)); 926 + readUserProperty();
952 break; 927 break;
953 } 928 }
954 case Mqtt5Properties::SubscriptionIdentifier: 929 case Mqtt5Properties::SubscriptionIdentifier:
@@ -981,13 +956,6 @@ void MqttPacket::handlePublish() @@ -981,13 +956,6 @@ void MqttPacket::handlePublish()
981 { 956 {
982 parsePublishData(); 957 parsePublishData();
983 958
984 - if (!isValidUtf8(publishData.topic, true))  
985 - {  
986 - const std::string err = formatString("Client '%s' published a message with invalid UTF8 or $/+/# in it. Dropping.", sender->repr().c_str());  
987 - logger->logf(LOG_WARNING, err.c_str());  
988 - throw ProtocolError(err, ReasonCodes::ProtocolError);  
989 - }  
990 -  
991 #ifndef NDEBUG 959 #ifndef NDEBUG
992 logger->logf(LOG_DEBUG, "Publish received, topic '%s'. QoS=%d. Retain=%d, dup=%d", publishData.topic.c_str(), publishData.qos, publishData.retain, dup); 960 logger->logf(LOG_DEBUG, "Publish received, topic '%s'. QoS=%d. Retain=%d, dup=%d", publishData.topic.c_str(), publishData.qos, publishData.retain, dup);
993 #endif 961 #endif
@@ -1350,14 +1318,29 @@ void MqttPacket::readUserProperty() @@ -1350,14 +1318,29 @@ void MqttPacket::readUserProperty()
1350 { 1318 {
1351 this->publishData.constructPropertyBuilder(); 1319 this->publishData.constructPropertyBuilder();
1352 1320
1353 - const uint16_t len = readTwoBytesToUInt16();  
1354 - std::string key(readBytes(len), len);  
1355 - const uint16_t len2 = readTwoBytesToUInt16();  
1356 - std::string value(readBytes(len2), len2); 1321 + std::string key = readBytesToString();
  1322 + std::string value = readBytesToString();
1357 1323
1358 this->publishData.propertyBuilder->writeUserProperty(std::move(key), std::move(value)); 1324 this->publishData.propertyBuilder->writeUserProperty(std::move(key), std::move(value));
1359 } 1325 }
1360 1326
  1327 +std::string MqttPacket::readBytesToString(bool validateUtf8, bool alsoCheckInvalidPublishChars)
  1328 +{
  1329 + const uint16_t len = readTwoBytesToUInt16();
  1330 + std::string result(readBytes(len), len);
  1331 +
  1332 + if (validateUtf8)
  1333 + {
  1334 + if (!isValidUtf8(result, alsoCheckInvalidPublishChars))
  1335 + {
  1336 + logger->logf(LOG_DEBUG, "Data of invalid UTF-8 string or publish topic: %s", result.c_str());
  1337 + throw ProtocolError("Invalid UTF8 string detected, or invalid publish characters.", ReasonCodes::MalformedPacket);
  1338 + }
  1339 + }
  1340 +
  1341 + return result;
  1342 +}
  1343 +
1361 const std::vector<std::pair<std::string, std::string>> *MqttPacket::getUserProperties() const 1344 const std::vector<std::pair<std::string, std::string>> *MqttPacket::getUserProperties() const
1362 { 1345 {
1363 if (this->publishData.propertyBuilder) 1346 if (this->publishData.propertyBuilder)
mqttpacket.h
@@ -82,6 +82,7 @@ class MqttPacket @@ -82,6 +82,7 @@ class MqttPacket
82 size_t remainingAfterPos(); 82 size_t remainingAfterPos();
83 size_t decodeVariableByteIntAtPos(); 83 size_t decodeVariableByteIntAtPos();
84 void readUserProperty(); 84 void readUserProperty();
  85 + std::string readBytesToString(bool validateUtf8 = true, bool alsoCheckInvalidPublishChars = false);
85 86
86 void calculateRemainingLength(); 87 void calculateRemainingLength();
87 88
threadlocalutils.cpp
@@ -51,6 +51,12 @@ std::vector&lt;std::string&gt; *SimdUtils::splitTopic(const std::string &amp;topic, std::v @@ -51,6 +51,12 @@ std::vector&lt;std::string&gt; *SimdUtils::splitTopic(const std::string &amp;topic, std::v
51 return &output; 51 return &output;
52 } 52 }
53 53
  54 +/**
  55 + * @brief SimdUtils::isValidUtf8 checks UTF-8 validity 16 bytes at a time, using SSE 4.2.
  56 + * @param s
  57 + * @param alsoCheckInvalidPublishChars is for checking the presence of '#' and '+' which is not allowed in publishes.
  58 + * @return
  59 + */
54 bool SimdUtils::isValidUtf8(const std::string &s, bool alsoCheckInvalidPublishChars) 60 bool SimdUtils::isValidUtf8(const std::string &s, bool alsoCheckInvalidPublishChars)
55 { 61 {
56 const int len = s.size(); 62 const int len = s.size();