Commit 6473b379392aa3f4ef182a57f59a627bda9b18d5

Authored by Wiebe Cazemier
1 parent 1bca4cbb

Check all UTF8 strings in packet parsing for validity

Also fix some bugs:

- The password can be binary data.
- The will topic is now also checked.
mqttpacket.cpp
... ... @@ -404,14 +404,12 @@ void MqttPacket::handleConnect()
404 404 break;
405 405 case Mqtt5Properties::AuthenticationMethod:
406 406 {
407   - const uint16_t len = readTwoBytesToUInt16();
408   - readBytes(len);
  407 + readBytesToString();
409 408 break;
410 409 }
411 410 case Mqtt5Properties::AuthenticationData:
412 411 {
413   - const uint16_t len = readTwoBytesToUInt16();
414   - readBytes(len);
  412 + readBytesToString(false);
415 413 break;
416 414 }
417 415 default:
... ... @@ -420,8 +418,7 @@ void MqttPacket::handleConnect()
420 418 }
421 419 }
422 420  
423   - uint16_t client_id_length = readTwoBytesToUInt16();
424   - std::string client_id(readBytes(client_id_length), client_id_length);
  421 + std::string client_id = readBytesToString();
425 422  
426 423 std::string username;
427 424 std::string password;
... ... @@ -453,15 +450,13 @@ void MqttPacket::handleConnect()
453 450 break;
454 451 case Mqtt5Properties::ContentType:
455 452 {
456   - const uint16_t len = readTwoBytesToUInt16();
457   - const std::string contentType(readBytes(len), len);
  453 + const std::string contentType = readBytesToString();
458 454 willpublish.propertyBuilder->writeContentType(contentType);
459 455 break;
460 456 }
461 457 case Mqtt5Properties::ResponseTopic:
462 458 {
463   - const uint16_t len = readTwoBytesToUInt16();
464   - const std::string responseTopic(readBytes(len), len);
  459 + const std::string responseTopic = readBytesToString();
465 460 willpublish.propertyBuilder->writeResponseTopic(responseTopic);
466 461 break;
467 462 }
... ... @@ -473,18 +468,13 @@ void MqttPacket::handleConnect()
473 468 }
474 469 case Mqtt5Properties::CorrelationData:
475 470 {
476   - const uint16_t len = readTwoBytesToUInt16();
477   - const std::string correlationData(readBytes(len), len);
  471 + const std::string correlationData = readBytesToString(false);
478 472 willpublish.propertyBuilder->writeCorrelationData(correlationData);
479 473 break;
480 474 }
481 475 case Mqtt5Properties::UserProperty:
482 476 {
483   - const uint16_t lenKey = readTwoBytesToUInt16();
484   - std::string userPropKey(readBytes(lenKey), lenKey);
485   - const uint16_t lenVal = readTwoBytesToUInt16();
486   - std::string userPropVal(readBytes(lenVal), lenVal);
487   - willpublish.propertyBuilder->writeUserProperty(std::move(userPropKey), std::move(userPropVal));
  477 + readUserProperty();
488 478 break;
489 479 }
490 480 default:
... ... @@ -493,16 +483,14 @@ void MqttPacket::handleConnect()
493 483 }
494 484 }
495 485  
496   - uint16_t will_topic_length = readTwoBytesToUInt16();
497   - willpublish.topic = std::string(readBytes(will_topic_length), will_topic_length);
  486 + willpublish.topic = readBytesToString(true, true);
498 487  
499 488 uint16_t will_payload_length = readTwoBytesToUInt16();
500 489 willpublish.payload = std::string(readBytes(will_payload_length), will_payload_length);
501 490 }
502 491 if (user_name_flag)
503 492 {
504   - uint16_t user_name_length = readTwoBytesToUInt16();
505   - username = std::string(readBytes(user_name_length), user_name_length);
  493 + username = readBytesToString(false);
506 494  
507 495 if (username.empty())
508 496 throw ProtocolError("Username flagged as present, but it's 0 bytes.", ReasonCodes::MalformedPacket);
... ... @@ -514,21 +502,21 @@ void MqttPacket::handleConnect()
514 502 throw ProtocolError("MQTT 3.1.1: If the User Name Flag is set to 0, the Password Flag MUST be set to 0.");
515 503 }
516 504  
517   - uint16_t password_length = readTwoBytesToUInt16();
518   - password = std::string(readBytes(password_length), password_length);
  505 + password = readBytesToString(false);
519 506  
520 507 if (password.empty())
521 508 throw ProtocolError("Password flagged as present, but it's 0 bytes.", ReasonCodes::MalformedPacket);
522 509 }
523 510  
524   - // The specs don't really say what to do when client id not UTF8, so including here.
525   - if (!isValidUtf8(client_id) || !isValidUtf8(username) || !isValidUtf8(password) || !isValidUtf8(willpublish.topic))
  511 + // I deferred the initial UTF8 check on username to be able to give an appropriate connack here, but to me, the specs
  512 + // are actually vague whether 'BadUserNameOrPassword' should be given on invalid UTF8.
  513 + if (!isValidUtf8(username))
526 514 {
527 515 ConnAck connAck(protocolVersion, ReasonCodes::BadUserNameOrPassword);
528 516 MqttPacket response(connAck);
529 517 sender->setReadyForDisconnect();
530 518 sender->writeMqttPacket(response);
531   - logger->logf(LOG_ERR, "Client ID, username, password or will topic has invalid UTF8: ", client_id.c_str());
  519 + logger->logf(LOG_ERR, "Username has invalid UTF8: %s", username.c_str());
532 520 return;
533 521 }
534 522  
... ... @@ -679,14 +667,12 @@ void MqttPacket::handleDisconnect()
679 667 }
680 668 case Mqtt5Properties::ReasonString:
681 669 {
682   - const uint16_t len = readTwoBytesToUInt16();
683   - reasonString = std::string(readBytes(len), len);
  670 + reasonString = readBytesToString();
684 671 break;
685 672 }
686 673 case Mqtt5Properties::ServerReference:
687 674 {
688   - const uint16_t len = readTwoBytesToUInt16();
689   - readBytes(len);
  675 + readBytesToString();
690 676 break;
691 677 }
692 678 case Mqtt5Properties::UserProperty:
... ... @@ -754,11 +740,10 @@ void MqttPacket::handleSubscribe()
754 740 std::list<ReasonCodes> subs_reponse_codes;
755 741 while (remainingAfterPos() > 0)
756 742 {
757   - uint16_t topicLength = readTwoBytesToUInt16();
758   - std::string topic(readBytes(topicLength), topicLength);
  743 + std::string topic = readBytesToString(true);
759 744  
760   - if (topic.empty() || !isValidUtf8(topic))
761   - throw ProtocolError("Subscribe topic not valid UTF-8.", ReasonCodes::MalformedPacket);
  745 + if (topic.empty())
  746 + throw ProtocolError("Subscribe topic is empty.", ReasonCodes::MalformedPacket);
762 747  
763 748 if (!isValidSubscribePath(topic))
764 749 throw ProtocolError(formatString("Invalid subscribe path: %s", topic.c_str()), ReasonCodes::MalformedPacket);
... ... @@ -837,11 +822,10 @@ void MqttPacket::handleUnsubscribe()
837 822 {
838 823 numberOfUnsubs++;
839 824  
840   - uint16_t topicLength = readTwoBytesToUInt16();
841   - std::string topic(readBytes(topicLength), topicLength);
  825 + std::string topic = readBytesToString();
842 826  
843   - if (topic.empty() || !isValidUtf8(topic))
844   - throw ProtocolError("Subscribe topic not valid UTF-8.", ReasonCodes::MalformedPacket);
  827 + if (topic.empty())
  828 + throw ProtocolError("Subscribe topic is empty.", ReasonCodes::MalformedPacket);
845 829  
846 830 sender->getThreadData()->getSubscriptionStore()->removeSubscription(sender, topic);
847 831 logger->logf(LOG_UNSUBSCRIBE, "Client '%s' unsubscribed from '%s'", sender->repr().c_str(), topic.c_str());
... ... @@ -860,8 +844,6 @@ void MqttPacket::handleUnsubscribe()
860 844  
861 845 void MqttPacket::parsePublishData()
862 846 {
863   - const uint16_t variable_header_length = readTwoBytesToUInt16();
864   -
865 847 publishData.retain = (first_byte & 0b00000001);
866 848 bool dup = !!(first_byte & 0b00001000);
867 849 publishData.qos = (first_byte & 0b00000110) >> 1;
... ... @@ -872,7 +854,7 @@ void MqttPacket::parsePublishData()
872 854 if (publishData.qos == 0 && dup)
873 855 throw ProtocolError("Duplicate flag is set for QoS 0 packet. This is illegal.", ReasonCodes::MalformedPacket);
874 856  
875   - publishData.topic = std::string(readBytes(variable_header_length), variable_header_length);
  857 + publishData.topic = readBytesToString(true, true);
876 858  
877 859 if (publishData.qos)
878 860 {
... ... @@ -928,27 +910,20 @@ void MqttPacket::parsePublishData()
928 910 case Mqtt5Properties::ResponseTopic:
929 911 {
930 912 publishData.constructPropertyBuilder();
931   - const uint16_t len = readTwoBytesToUInt16();
932   - const std::string responseTopic(readBytes(len), len);
  913 + const std::string responseTopic = readBytesToString();
933 914 publishData.propertyBuilder->writeResponseTopic(responseTopic);
934 915 break;
935 916 }
936 917 case Mqtt5Properties::CorrelationData:
937 918 {
938 919 publishData.constructPropertyBuilder();
939   - const uint16_t len = readTwoBytesToUInt16();
940   - const std::string correlationData(readBytes(len), len);
  920 + const std::string correlationData = readBytesToString(false);
941 921 publishData.propertyBuilder->writeCorrelationData(correlationData);
942 922 break;
943 923 }
944 924 case Mqtt5Properties::UserProperty:
945 925 {
946   - publishData.constructPropertyBuilder();
947   - const uint16_t lenKey = readTwoBytesToUInt16();
948   - std::string userPropKey(readBytes(lenKey), lenKey);
949   - const uint16_t lenVal = readTwoBytesToUInt16();
950   - std::string userPropVal(readBytes(lenVal), lenVal);
951   - publishData.propertyBuilder->writeUserProperty(std::move(userPropKey), std::move(userPropVal));
  926 + readUserProperty();
952 927 break;
953 928 }
954 929 case Mqtt5Properties::SubscriptionIdentifier:
... ... @@ -981,13 +956,6 @@ void MqttPacket::handlePublish()
981 956 {
982 957 parsePublishData();
983 958  
984   - if (!isValidUtf8(publishData.topic, true))
985   - {
986   - const std::string err = formatString("Client '%s' published a message with invalid UTF8 or $/+/# in it. Dropping.", sender->repr().c_str());
987   - logger->logf(LOG_WARNING, err.c_str());
988   - throw ProtocolError(err, ReasonCodes::ProtocolError);
989   - }
990   -
991 959 #ifndef NDEBUG
992 960 logger->logf(LOG_DEBUG, "Publish received, topic '%s'. QoS=%d. Retain=%d, dup=%d", publishData.topic.c_str(), publishData.qos, publishData.retain, dup);
993 961 #endif
... ... @@ -1350,14 +1318,29 @@ void MqttPacket::readUserProperty()
1350 1318 {
1351 1319 this->publishData.constructPropertyBuilder();
1352 1320  
1353   - const uint16_t len = readTwoBytesToUInt16();
1354   - std::string key(readBytes(len), len);
1355   - const uint16_t len2 = readTwoBytesToUInt16();
1356   - std::string value(readBytes(len2), len2);
  1321 + std::string key = readBytesToString();
  1322 + std::string value = readBytesToString();
1357 1323  
1358 1324 this->publishData.propertyBuilder->writeUserProperty(std::move(key), std::move(value));
1359 1325 }
1360 1326  
  1327 +std::string MqttPacket::readBytesToString(bool validateUtf8, bool alsoCheckInvalidPublishChars)
  1328 +{
  1329 + const uint16_t len = readTwoBytesToUInt16();
  1330 + std::string result(readBytes(len), len);
  1331 +
  1332 + if (validateUtf8)
  1333 + {
  1334 + if (!isValidUtf8(result, alsoCheckInvalidPublishChars))
  1335 + {
  1336 + logger->logf(LOG_DEBUG, "Data of invalid UTF-8 string or publish topic: %s", result.c_str());
  1337 + throw ProtocolError("Invalid UTF8 string detected, or invalid publish characters.", ReasonCodes::MalformedPacket);
  1338 + }
  1339 + }
  1340 +
  1341 + return result;
  1342 +}
  1343 +
1361 1344 const std::vector<std::pair<std::string, std::string>> *MqttPacket::getUserProperties() const
1362 1345 {
1363 1346 if (this->publishData.propertyBuilder)
... ...
mqttpacket.h
... ... @@ -82,6 +82,7 @@ class MqttPacket
82 82 size_t remainingAfterPos();
83 83 size_t decodeVariableByteIntAtPos();
84 84 void readUserProperty();
  85 + std::string readBytesToString(bool validateUtf8 = true, bool alsoCheckInvalidPublishChars = false);
85 86  
86 87 void calculateRemainingLength();
87 88  
... ...
threadlocalutils.cpp
... ... @@ -51,6 +51,12 @@ std::vector&lt;std::string&gt; *SimdUtils::splitTopic(const std::string &amp;topic, std::v
51 51 return &output;
52 52 }
53 53  
  54 +/**
  55 + * @brief SimdUtils::isValidUtf8 checks UTF-8 validity 16 bytes at a time, using SSE 4.2.
  56 + * @param s
  57 + * @param alsoCheckInvalidPublishChars is for checking the presence of '#' and '+' which is not allowed in publishes.
  58 + * @return
  59 + */
54 60 bool SimdUtils::isValidUtf8(const std::string &s, bool alsoCheckInvalidPublishChars)
55 61 {
56 62 const int len = s.size();
... ...