From 24c2575c11ce04a8493aa39a155ca2702420772f Mon Sep 17 00:00:00 2001
From: Paul Barrett <paul@knowledgetree.com>
Date: Fri, 24 Jul 2009 17:17:54 +0200
Subject: [PATCH] Minor modifications to code dealing with base64 content decode for CMIS, intended to catch possible issues with unencoded data and decoders which may not accept content without the padding bits

---
 lib/api/ktcmis/util/CMISUtil.inc.php                         | 38 ++++++++++++++++++++++++++++++++++++--
 webservice/atompub/cmis/KT_cmis_atom_server.services.inc.php |  3 ---
 2 files changed, 36 insertions(+), 5 deletions(-)

diff --git a/lib/api/ktcmis/util/CMISUtil.inc.php b/lib/api/ktcmis/util/CMISUtil.inc.php
index f5991ce..ae786d4 100644
--- a/lib/api/ktcmis/util/CMISUtil.inc.php
+++ b/lib/api/ktcmis/util/CMISUtil.inc.php
@@ -397,6 +397,8 @@ class CMISUtil {
         return $temp;
     }
     
+    // TODO more robust base64 encoding detection, if possible
+    
     /**
      * Checks the contentStream and ensures that it is a correct base64 string;
      * This is purely for clients such as CMISSpaces breaking the content into 
@@ -415,11 +417,34 @@ class CMISUtil {
      */
     static public function decodeChunkedContentStream($contentStream)
     {
+        // always trim content, just in case, as the AtomPub specification says content may be padded with whitespace at the start and end.
+        $contentStream = trim($contentStream);
+        
+        // check whether the content is encoded first, return as is if not
+        // A–Z, a–z, 0–9, +, /
+        // NOTE this makes the (fairly reasonable) assumption that text content contains at least one space or punctuation character.
+        //      of course this may fail should something be sent in plain text such as a passwords file containing sha1 or md5 hashes only.
+        if (preg_match('/[^\w\/\+=\n]+/', $content)) return $contentStream;
+        
         $decoded = '';
         
         // split the content stream on ={1,2}
-        $parts = preg_split('/={1,2}/', $contentStream, null, PREG_SPLIT_NO_EMPTY);
-        foreach($parts as $part) {
+        $parts = preg_split('/(={1,2})/', $contentStream, null, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
+        foreach($parts as $part)
+        {       
+            if (preg_match('/={1,2}/', $part)) {
+                continue;
+            }
+        
+            // lookahead for delimiter, because we may need it back.
+            // NOTE that decoding appears to work fine without this, so this is just an "in case".
+            // NOTE that even with this it seems the present function works faster than the alternative below.
+            if (isset($parts[$key+1])) {
+                if (preg_match('/={1,2}/', $parts[$key+1])) {
+                    $part .= $parts[$key+1];
+                }
+            }
+            
             // decode, append to output to be re-encoded
             $decoded .= base64_decode($part);
         }
@@ -443,6 +468,15 @@ class CMISUtil {
      */
     static public function decodeChunkedContentStreamLong($contentStream)
     {
+        // always trim content, just in case, as the AtomPub specification says content may be padded with whitespace at the start and end.
+        $contentStream = trim($contentStream);
+        
+        // check whether the content is encoded first, return as is if not
+        // A–Z, a–z, 0–9, +, /
+        // NOTE this makes the (fairly reasonable) assumption that text content contains at least one space or punctuation character.
+        //      of course this may fail should something be sent in plain text such as a passwords file containing sha1 or md5 hashes only.
+        if (preg_match('/[^\w\/\+=\n]+/', $content)) return $contentStream;
+        
         // check the content stream for any lines of unusual length (except the last line, which can be any length)
         $count = -1;
         $length = 0;
diff --git a/webservice/atompub/cmis/KT_cmis_atom_server.services.inc.php b/webservice/atompub/cmis/KT_cmis_atom_server.services.inc.php
index c565dbe..6e7b87c 100644
--- a/webservice/atompub/cmis/KT_cmis_atom_server.services.inc.php
+++ b/webservice/atompub/cmis/KT_cmis_atom_server.services.inc.php
@@ -160,9 +160,6 @@ class KT_cmis_atom_service_folder extends KT_atom_service {
         // now check for content stream
         $content = KT_cmis_atom_service_helper::getAtomValues($this->parsedXMLContent['@children'], 'content');        
         
-        // check content for weird chars - don't think this serves a purpose any longer, should probably be removed.
-        // was meant to check for any non-base64 characters in the content string.
-        // preg_match('/[^\w\d\/\+=\n]*/', $content);
         // TODO this will possibly need to change somewhat once Relationship Objects come into play.
         if ((($action == 'create') && (is_null($content))) || ($typeId == 'Folder')) {
             $type = 'folder';
--
libgit2 0.21.4