From 24c2575c11ce04a8493aa39a155ca2702420772f Mon Sep 17 00:00:00 2001 From: Paul Barrett Date: Fri, 24 Jul 2009 17:17:54 +0200 Subject: [PATCH] Minor modifications to code dealing with base64 content decode for CMIS, intended to catch possible issues with unencoded data and decoders which may not accept content without the padding bits --- lib/api/ktcmis/util/CMISUtil.inc.php | 38 ++++++++++++++++++++++++++++++++++++-- webservice/atompub/cmis/KT_cmis_atom_server.services.inc.php | 3 --- 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/lib/api/ktcmis/util/CMISUtil.inc.php b/lib/api/ktcmis/util/CMISUtil.inc.php index f5991ce..ae786d4 100644 --- a/lib/api/ktcmis/util/CMISUtil.inc.php +++ b/lib/api/ktcmis/util/CMISUtil.inc.php @@ -397,6 +397,8 @@ class CMISUtil { return $temp; } + // TODO more robust base64 encoding detection, if possible + /** * Checks the contentStream and ensures that it is a correct base64 string; * This is purely for clients such as CMISSpaces breaking the content into @@ -415,11 +417,34 @@ class CMISUtil { */ static public function decodeChunkedContentStream($contentStream) { + // always trim content, just in case, as the AtomPub specification says content may be padded with whitespace at the start and end. + $contentStream = trim($contentStream); + + // check whether the content is encoded first, return as is if not + // A–Z, a–z, 0–9, +, / + // NOTE this makes the (fairly reasonable) assumption that text content contains at least one space or punctuation character. + // of course this may fail should something be sent in plain text such as a passwords file containing sha1 or md5 hashes only. + if (preg_match('/[^\w\/\+=\n]+/', $content)) return $contentStream; + $decoded = ''; // split the content stream on ={1,2} - $parts = preg_split('/={1,2}/', $contentStream, null, PREG_SPLIT_NO_EMPTY); - foreach($parts as $part) { + $parts = preg_split('/(={1,2})/', $contentStream, null, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE); + foreach($parts as $part) + { + if (preg_match('/={1,2}/', $part)) { + continue; + } + + // lookahead for delimiter, because we may need it back. + // NOTE that decoding appears to work fine without this, so this is just an "in case". + // NOTE that even with this it seems the present function works faster than the alternative below. + if (isset($parts[$key+1])) { + if (preg_match('/={1,2}/', $parts[$key+1])) { + $part .= $parts[$key+1]; + } + } + // decode, append to output to be re-encoded $decoded .= base64_decode($part); } @@ -443,6 +468,15 @@ class CMISUtil { */ static public function decodeChunkedContentStreamLong($contentStream) { + // always trim content, just in case, as the AtomPub specification says content may be padded with whitespace at the start and end. + $contentStream = trim($contentStream); + + // check whether the content is encoded first, return as is if not + // A–Z, a–z, 0–9, +, / + // NOTE this makes the (fairly reasonable) assumption that text content contains at least one space or punctuation character. + // of course this may fail should something be sent in plain text such as a passwords file containing sha1 or md5 hashes only. + if (preg_match('/[^\w\/\+=\n]+/', $content)) return $contentStream; + // check the content stream for any lines of unusual length (except the last line, which can be any length) $count = -1; $length = 0; diff --git a/webservice/atompub/cmis/KT_cmis_atom_server.services.inc.php b/webservice/atompub/cmis/KT_cmis_atom_server.services.inc.php index c565dbe..6e7b87c 100644 --- a/webservice/atompub/cmis/KT_cmis_atom_server.services.inc.php +++ b/webservice/atompub/cmis/KT_cmis_atom_server.services.inc.php @@ -160,9 +160,6 @@ class KT_cmis_atom_service_folder extends KT_atom_service { // now check for content stream $content = KT_cmis_atom_service_helper::getAtomValues($this->parsedXMLContent['@children'], 'content'); - // check content for weird chars - don't think this serves a purpose any longer, should probably be removed. - // was meant to check for any non-base64 characters in the content string. - // preg_match('/[^\w\d\/\+=\n]*/', $content); // TODO this will possibly need to change somewhat once Relationship Objects come into play. if ((($action == 'create') && (is_null($content))) || ($typeId == 'Folder')) { $type = 'folder'; -- libgit2 0.21.4