Commit 24c2575c11ce04a8493aa39a155ca2702420772f
1 parent
74f75339
Minor modifications to code dealing with base64 content decode for CMIS, intende…
…d to catch possible issues with unencoded data and decoders which may not accept content without the padding bits Committed by: Paul Barrett
Showing
2 changed files
with
36 additions
and
5 deletions
lib/api/ktcmis/util/CMISUtil.inc.php
| ... | ... | @@ -397,6 +397,8 @@ class CMISUtil { |
| 397 | 397 | return $temp; |
| 398 | 398 | } |
| 399 | 399 | |
| 400 | + // TODO more robust base64 encoding detection, if possible | |
| 401 | + | |
| 400 | 402 | /** |
| 401 | 403 | * Checks the contentStream and ensures that it is a correct base64 string; |
| 402 | 404 | * This is purely for clients such as CMISSpaces breaking the content into |
| ... | ... | @@ -415,11 +417,34 @@ class CMISUtil { |
| 415 | 417 | */ |
| 416 | 418 | static public function decodeChunkedContentStream($contentStream) |
| 417 | 419 | { |
| 420 | + // always trim content, just in case, as the AtomPub specification says content may be padded with whitespace at the start and end. | |
| 421 | + $contentStream = trim($contentStream); | |
| 422 | + | |
| 423 | + // check whether the content is encoded first, return as is if not | |
| 424 | + // A–Z, a–z, 0–9, +, / | |
| 425 | + // NOTE this makes the (fairly reasonable) assumption that text content contains at least one space or punctuation character. | |
| 426 | + // of course this may fail should something be sent in plain text such as a passwords file containing sha1 or md5 hashes only. | |
| 427 | + if (preg_match('/[^\w\/\+=\n]+/', $content)) return $contentStream; | |
| 428 | + | |
| 418 | 429 | $decoded = ''; |
| 419 | 430 | |
| 420 | 431 | // split the content stream on ={1,2} |
| 421 | - $parts = preg_split('/={1,2}/', $contentStream, null, PREG_SPLIT_NO_EMPTY); | |
| 422 | - foreach($parts as $part) { | |
| 432 | + $parts = preg_split('/(={1,2})/', $contentStream, null, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE); | |
| 433 | + foreach($parts as $part) | |
| 434 | + { | |
| 435 | + if (preg_match('/={1,2}/', $part)) { | |
| 436 | + continue; | |
| 437 | + } | |
| 438 | + | |
| 439 | + // lookahead for delimiter, because we may need it back. | |
| 440 | + // NOTE that decoding appears to work fine without this, so this is just an "in case". | |
| 441 | + // NOTE that even with this it seems the present function works faster than the alternative below. | |
| 442 | + if (isset($parts[$key+1])) { | |
| 443 | + if (preg_match('/={1,2}/', $parts[$key+1])) { | |
| 444 | + $part .= $parts[$key+1]; | |
| 445 | + } | |
| 446 | + } | |
| 447 | + | |
| 423 | 448 | // decode, append to output to be re-encoded |
| 424 | 449 | $decoded .= base64_decode($part); |
| 425 | 450 | } |
| ... | ... | @@ -443,6 +468,15 @@ class CMISUtil { |
| 443 | 468 | */ |
| 444 | 469 | static public function decodeChunkedContentStreamLong($contentStream) |
| 445 | 470 | { |
| 471 | + // always trim content, just in case, as the AtomPub specification says content may be padded with whitespace at the start and end. | |
| 472 | + $contentStream = trim($contentStream); | |
| 473 | + | |
| 474 | + // check whether the content is encoded first, return as is if not | |
| 475 | + // A–Z, a–z, 0–9, +, / | |
| 476 | + // NOTE this makes the (fairly reasonable) assumption that text content contains at least one space or punctuation character. | |
| 477 | + // of course this may fail should something be sent in plain text such as a passwords file containing sha1 or md5 hashes only. | |
| 478 | + if (preg_match('/[^\w\/\+=\n]+/', $content)) return $contentStream; | |
| 479 | + | |
| 446 | 480 | // check the content stream for any lines of unusual length (except the last line, which can be any length) |
| 447 | 481 | $count = -1; |
| 448 | 482 | $length = 0; | ... | ... |
webservice/atompub/cmis/KT_cmis_atom_server.services.inc.php
| ... | ... | @@ -160,9 +160,6 @@ class KT_cmis_atom_service_folder extends KT_atom_service { |
| 160 | 160 | // now check for content stream |
| 161 | 161 | $content = KT_cmis_atom_service_helper::getAtomValues($this->parsedXMLContent['@children'], 'content'); |
| 162 | 162 | |
| 163 | - // check content for weird chars - don't think this serves a purpose any longer, should probably be removed. | |
| 164 | - // was meant to check for any non-base64 characters in the content string. | |
| 165 | - // preg_match('/[^\w\d\/\+=\n]*/', $content); | |
| 166 | 163 | // TODO this will possibly need to change somewhat once Relationship Objects come into play. |
| 167 | 164 | if ((($action == 'create') && (is_null($content))) || ($typeId == 'Folder')) { |
| 168 | 165 | $type = 'folder'; | ... | ... |