diff --git a/search2/ajax/ajax.inc.php b/search2/ajax/ajax.inc.php new file mode 100755 index 0000000..6d8c008 --- /dev/null +++ b/search2/ajax/ajax.inc.php @@ -0,0 +1,218 @@ +$status); + if (isset($message)) + { + $resp['message'] = $message; + } + if (isset($rsName)) + { + $resp[$rsName] = $rs; + } + print json_encode($resp); + exit; + } + + public static function parseQuery($txtQuery, $exitOnSuccess=true) + { + try + { + $expr = parseExpression($txtQuery); + if ($exitOnSuccess) + { + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_SUCCESS ); + } + return $expr; + } + catch(Exception $e) + { + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_PARSE_PROBLEM , $e->getMessage()); + } + } + + public static function updateQuery($iSavedId,$txtQuery, $userID) + { + $txtQuery = sanitizeForSQL($txtQuery); + $iSavedId = sanitizeForSQL($iSavedId); + + $sql = "UPDATE search_saved SET expression='$txtQuery' WHERE id=$iSavedId"; + if (!Permission::userIsSystemAdministrator($userID)) + { + $sql .= " AND user_id = $userID"; + } + $result = DBUtil::runQuery($sql); + if (PEAR::isError($result)) + { + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_INTERNAL ); + } + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_SUCCESS ); + } + + + public static function saveQuery($txtName,$txtQuery, $userID) + { + $lookup = sanitizeForSQL($txtName); + $sql = "select 1 from search_saved where name='$lookup'"; + $result = DBUtil::getResultArray($sql); + if (PEAR::isError($result)) + { + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_INTERNAL ); + } + if (count($result) > 0) + { + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_SAVED_SEARCH_EXISTS, _kt('Search with this name already exists') ); + } + + // autoInsert does escaping... + $values = array( + 'name'=>$txtName, + 'expression'=>$txtQuery, + 'type'=>'S', + 'shared'=>0, + 'user_id' => $userID + ); + + $result = DBUtil::autoInsert('search_saved', $values); + + if (PEAR::isError($result)) + { + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_INTERNAL ); + } + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_SUCCESS ); + } + + public static function getSavedSearches($userID) + { + $rs = SearchHelper::getSavedSearches($userID); + if (PEAR::isError($rs)) + { + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_INTERNAL ); + } + + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_SUCCESS , null, 'searches', $rs); + } + + + + public static function getDocumentTypes() + { + $rs = SearchHelper::getDocumentTypes(); + if (PEAR::isError($rs)) + { + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_INTERNAL, $rs->getMessage() ); + } + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_SUCCESS , null, 'documenttypes', $rs); + } + + public static function getDocumentTypeFieldsets($documentTypeID) + { + $rs = SearchHelper::getDocumentTypeFieldsets($documentTypeID); + + if (PEAR::isError($rs)) + { + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_INTERNAL, $rs->getMessage() ); + } + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_SUCCESS , null, 'fieldsets', $rs); + } + + + public static function getFieldsets() + { + $rs = SearchHelper::getFieldsets(); + if (PEAR::isError($rs)) + { + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_INTERNAL, $rs->getMessage() ); + } + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_SUCCESS , null, 'fieldsets', $rs); + } + + public static function getFields($fieldsetID) + { + $result = SearchHelper::getFields($fieldsetID); + + if (PEAR::isError($result)) + { + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_INTERNAL, $result->getMessage() ); + } + + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_SUCCESS , null, 'fields', $result); + } + + + public static function getFolder($folderID) + { + $userid = AjaxSearchHelper::getSessionUser(); + + $folders = SearchHelper::getFolder($folderID, $userid); + if (PEAR::isError($folders)) + { + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_MISSING_FOLDER, $folders->getMessage() ); + } + + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_SUCCESS , null, 'folders', $folders); + + } + + public static function getSearchFields() + { + $results = SearchHelper::getSearchFields(); + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_SUCCESS , null, 'fields', $results); + } + +} + +?> \ No newline at end of file diff --git a/search2/ajax/metadata.php b/search2/ajax/metadata.php new file mode 100755 index 0000000..26a5ae8 --- /dev/null +++ b/search2/ajax/metadata.php @@ -0,0 +1,24 @@ + \ No newline at end of file diff --git a/search2/ajax/parseExpr.php b/search2/ajax/parseExpr.php new file mode 100755 index 0000000..f469b33 --- /dev/null +++ b/search2/ajax/parseExpr.php @@ -0,0 +1,9 @@ + \ No newline at end of file diff --git a/search2/ajax/saveExpr.php b/search2/ajax/saveExpr.php new file mode 100755 index 0000000..39d63d9 --- /dev/null +++ b/search2/ajax/saveExpr.php @@ -0,0 +1,40 @@ + \ No newline at end of file diff --git a/search2/ajax/savedSearches.php b/search2/ajax/savedSearches.php new file mode 100755 index 0000000..1c0a010 --- /dev/null +++ b/search2/ajax/savedSearches.php @@ -0,0 +1,9 @@ + \ No newline at end of file diff --git a/search2/ajax/searchFields.php b/search2/ajax/searchFields.php new file mode 100755 index 0000000..c4c22ed --- /dev/null +++ b/search2/ajax/searchFields.php @@ -0,0 +1,8 @@ + \ No newline at end of file diff --git a/search2/ajax/treeNodes.php b/search2/ajax/treeNodes.php new file mode 100755 index 0000000..d8ec8a5 --- /dev/null +++ b/search2/ajax/treeNodes.php @@ -0,0 +1,8 @@ + \ No newline at end of file diff --git a/search2/images/kn.png b/search2/images/kn.png new file mode 100755 index 0000000..9773621 --- /dev/null +++ b/search2/images/kn.png diff --git a/search2/images/o-red.png b/search2/images/o-red.png new file mode 100755 index 0000000..15b4571 --- /dev/null +++ b/search2/images/o-red.png diff --git a/search2/images/o-yellow.png b/search2/images/o-yellow.png new file mode 100755 index 0000000..44d1a81 --- /dev/null +++ b/search2/images/o-yellow.png diff --git a/search2/images/wledgetree.png b/search2/images/wledgetree.png new file mode 100755 index 0000000..6854deb --- /dev/null +++ b/search2/images/wledgetree.png diff --git a/search2/indexing/bin/cronIndexer.php b/search2/indexing/bin/cronIndexer.php new file mode 100755 index 0000000..0370a8e --- /dev/null +++ b/search2/indexing/bin/cronIndexer.php @@ -0,0 +1,9 @@ +indexDocuments(); + +?> \ No newline at end of file diff --git a/search2/indexing/bin/diagnose.php b/search2/indexing/bin/diagnose.php new file mode 100755 index 0000000..d44105c --- /dev/null +++ b/search2/indexing/bin/diagnose.php @@ -0,0 +1,11 @@ +diagnose(); + +var_dump($diagnoses); + +?> \ No newline at end of file diff --git a/search2/indexing/bin/optimise.php b/search2/indexing/bin/optimise.php new file mode 100755 index 0000000..8c98457 --- /dev/null +++ b/search2/indexing/bin/optimise.php @@ -0,0 +1,9 @@ +optimise(); + +?> \ No newline at end of file diff --git a/search2/indexing/bin/recreateIndex.php b/search2/indexing/bin/recreateIndex.php new file mode 100755 index 0000000..2e5bb32 --- /dev/null +++ b/search2/indexing/bin/recreateIndex.php @@ -0,0 +1,20 @@ + \ No newline at end of file diff --git a/search2/indexing/bin/registerTypes.php b/search2/indexing/bin/registerTypes.php new file mode 100755 index 0000000..fda3fe6 --- /dev/null +++ b/search2/indexing/bin/registerTypes.php @@ -0,0 +1,9 @@ +registerTypes(true); + +?> \ No newline at end of file diff --git a/search2/indexing/extractorCore.inc.php b/search2/indexing/extractorCore.inc.php new file mode 100755 index 0000000..61f7c41 --- /dev/null +++ b/search2/indexing/extractorCore.inc.php @@ -0,0 +1,631 @@ +needsIntermediate=false; + $this->extractionStatus = null; + $this->indexStatus = null; + } + + /** + * Sets the status of the indexing. + * + * @param unknown_type $status + */ + public function setIndexingStatus($status) + { + $this->indexStatus = $status; + } + /** + * Returns the indexing status. + * + * @return boolean + */ + public function getIndexingStatus() + { + return $this->indexStatus; + } + + /** + * Sets the extraction status. + * + * @param boolean $status + */ + public function setExtractionStatus($status) + { + $this->extractionStatus = $status; + } + /** + * Return the extraction status. + * + * @return boolean + */ + public function getExtractionStatus() + { + return $this->extractionStatus; + } + + /** + * This associates all the mime types associated with the extractor class. + * + */ + public function registerMimeTypes() + { + $types = $this->getSupportedMimeTypes(); + if (empty($types)) + { + return; + } + $classname=get_class($this); + + foreach($types as $type) + { + $sql = "update mime_types set extractor='$classname' where mimetypes='$type' and extractor is null"; + DBUtil::runQuery($sql); + } + } + + /** + * Indicates if an intermediate file is required. + * + * @param $value boolean Optional. If set, we set the value. + * @return boolean + */ + public function needsIntermediateSourceFile($value = null) + { + if (!is_null($value)) + { + $this->needsIntermediate = $value; + } + return $this->needsIntermediate; + } + + /** + * Sets the source filename for the document extractor. + * + * @param string $sourcefile + */ + public function setSourceFile($sourcefile) + { + $this->sourcefile=$sourcefile; + } + + /** + * Returns the source file name. + * + * @return string + */ + public function getSourceFile() { return $this->sourcefile; } + + /** + * Sets the source file's mime type. + * + * @param string $mimetype + */ + public function setMimeType($mimetype) + { + $this->mimetype=$mimetype; + } + /** + * Returns the mime type for the source file. + * + * @return string + */ + public function getMimeType() { return $this->mimetype; } + + /** + * Indicates the extension for the source file. + * + * @param string $extension + */ + public function setExtension($extension) + { + $this->extension=$extension; + } + /** + * Returns the extension of the source file. + * + * @return string + */ + public function getExtension() { return $this->extension; } + + /** + * Sets the file name of the target text file. + * + * @param string $targetfile + */ + public function setTargetFile($targetfile) + { + $this->targetfile=$targetfile; + } + + /** + * Gets the file name of the target text file containing the extracted text. + * + * @return unknown + */ + public function getTargetFile() { return $this->targetfile; } + + /** + * Filter function that may be applied after extraction. This may be overridden. + * + * @param string $text + * @return string + */ + protected function filter($text) + { + return $text; + } + + /** + * Set the document that will be indexed. + * + * @param Document $document + */ + public function setDocument($document) + { + $this->document = $document; + } + + /** + * Returns a reference to the document. + * + * @return string + */ + public function getDocument() + { + return $this->document; + } + + /** + * Returns an array of supported mime types. + * e.g. return array('plain/text'); + * + * + * @return array + * + */ + public abstract function getSupportedMimeTypes(); + + /** + * Extracts the content from the source file. + * + * @return boolean + */ + public abstract function extractTextContent(); + + /** + * Returns a friendly name for the document text extractor. + * + * @return string + */ + public abstract function getDisplayName(); + + /** + * Attempts to diagnose any problems with the indexing process. + * + * @return string + */ + public abstract function diagnose(); + +} + +/** + * This class extends the document extractor to execute some command line application. + * The getCommandLine() method needs to be overridden. + * + */ +abstract class ExternalDocumentExtractor extends DocumentExtractor +{ + /** + * Initialise the extractor. + * + */ + public function __construct() + { + parent::__construct(); + putenv('LANG=en_US.UTF-8'); + } + + /** + * Executes a command. Returns true if successful. + * + * @param string $cmd A command line instruction. + * @return boolean + */ + protected function exec($cmd) + { + $aRet = KTUtil::pexec($cmd); + return $aRet['ret'] == 0; + } + + /** + * Returns the command line string to be executed. + * The command returned should include the target filename. + * + * @return string + */ + protected function getCommandLine() + { + throw new Exception('getCommandLine is not implemented'); + } + + /** + * Executes the command that executes the command. + * Returns true if success. + * + * @return boolean + */ + public function extractTextContent() + { + global $default; + + $cmdline = $this->getCommandLine(); + + $class = get_class($this); + $default->log->debug("$class: " . $cmdline); + + return $this->exec($cmdline); + } + +} + +/** + * An extension to the extenal document extractor. A derived class simply needs + * to implement a constructor and getSupportedMimeTypes(). + * + */ +abstract class ApplicationExtractor extends ExternalDocumentExtractor +{ + /** + * The full path to the application that will be run. This will be resolved from + * the path or using the config file. + * + * @var string + */ + private $application; + /** + * The command name of the application that can be run. + * + * @var string + */ + private $command; + /** + * This is the friendly name for the extractor. + * + * @var string + */ + private $displayname; + /** + * The command line parameters for the application. + * This may include {source} and {target} where substitutions will be done. + * + * @var string + */ + private $params; + + /** + * Initialise the extractor. + * + * @param string $section The section in the config file. + * @param string $appname The application name in the config file. + * @param string $command The command that can be run. + * @param string $displayname + * @param string $params + */ + public function __construct($section, $appname, $command, $displayname, $params) + { + parent::__construct(); + + $this->application = KTUtil::findCommand("$section/$appname", $command); + $this->command = $command; + $this->displayname = $displayname; + $this->params = $params; + } + + /** + * Return the display name. + * + * @return string + */ + public function getDisplayName() + { + return _kt($this->displayname); + } + + /** + * Returns the command line after performing substitutions. + * + * @return unknown + */ + protected function getCommandLine() + { + $sources = array('{source}','{target}'); + $target = array($this->sourcefile, $this->targetfile); + $cmdline = $this->command . ' ' . str_replace($sources,$target, $params); + + return $cmdline; + } + + /** + * Identifies if there are any circumstances why the command can not run that could result in the text extraction process + * failing. + * + * @return mixed Returns string if there is a problem, null otherwise. + */ + public function diagnose() + { + if (false === $this->application) + { + return _kt("Cannot locate binary for $this->displayname ($this->command)."); + } + + return null; + } +} + +abstract class TextExtractor extends DocumentExtractor +{ + /** + * This extracts the text from the document. + * + * @return boolean + */ + public function extractTextContent() + { + $content = file_get_contents($this->sourcefile); + if (false === $content) + { + return false; + } + + $result = file_put_contents($this->targetfile, $this->filter($content)); + + return false !== $result; + } + + /** + * There are no external dependancies to diagnose. + * + * @return null + */ + public function diagnose() + { + return null; + } + +} + +/** + * The composite extractor implies that a conversion is done to an intermediate form before another extractor is run. + * + */ +abstract class CompositeExtractor extends DocumentExtractor +{ + /** + * The initial extractor + * + * @var DocumentExtractor + */ + private $sourceExtractor; + /** + * The text extractor + * + * @var DocumentExtractor + */ + private $targetExtractor; + /** + * The extension for the initial extraction + * + * @var string + */ + private $targetExtension; + /** + * The mime type of the initial extraction. + * + * @var string + */ + private $targetMimeType; + + public function __construct($sourceExtractor, $targetExtension, $targetMimeType, $targetExtractor, $needsIntermediate) + { + $this->sourceExtractor = $sourceExtractor; + $this->targetExtractor = $targetExtractor; + $this->targetExtension = $targetExtension; + $this->targetMimeType = $targetMimeType; + $this->needsIntermediateSourceFile($needsIntermediate); + } + + /** + * Extracts the content of the document + * + * @return string + */ + public function extractTextContent() + { + $intermediateFile = $this->targetfile . '.' . $this->targetExtension; + + $this->sourceExtractor->setSourceFile($this->sourcefile); + $this->sourceExtractor->setTargetFile($intermediateFile); + $this->sourceExtractor->setMimeType($this->mimetype); + $this->sourceExtractor->setExtension($this->extension); + if ($this->sourceExtractor->extractTextContent()) + { + return false; + } + + $this->targetExtractor->setSourceFile($intermediateFile); + $this->targetExtractor->setTargetFile($this->targetfile); + $this->targetExtractor->setMimeType($this->targetMimeType); + $this->targetExtractor->setExtension($this->targetExtension); + $result = $this->targetExtractor->extractTextContent(); + + unlink(@$intermediateFile); + + return $result; + } + + /** + * Diagnose the extractors + * + * @return mixed + */ + public function diagnose() + { + $diagnosis = $this->sourceExtractor->diagnose(); + if (!empty($diagnosis)) + { + return $diagnosis; + } + + $diagnosis = $this->targetExtractor->diagnose(); + if (!empty($diagnosis)) + { + return $diagnosis; + } + + return null; + } +} + + +/** + * The purpose of an extractor hook is to effect the + * + */ +abstract class ExtractorHook +{ + /** + * Returns an array of supported mime types. + * e.g. return array('plain/text'); + * + * + * @return array + * + */ + public abstract function getSupportedMimeTypes(); + + /** + * Returns the friendly name for the hook. + * + * @return string + */ + public abstract function getDisplayName(); + + /** + * This does a basic diagnosis on the hook. + * + * @return string + */ + public function diagnose() + { + return null; + } + + /** + * Perform any pre extraction activities. + * + * @param DocumentExtractor $extractor + */ + public function pre_extract($extractor) + { + } + + /** + * Perform any post extraction activities. + * + * @param DocumentExtractor $extractor + */ + public function post_extract($extractor) + { + + } + + /** + * Perform any pre indexing activities. + * + * @param DocumentExtractor $extractor + */ + public function pre_index($extractor) + { + + } + + /** + * Perform any post indexing activities. + * + * @param DocumentExtractor $extractor + */ + public function post_index($extractor) + { + + } +} + +?> \ No newline at end of file diff --git a/search2/indexing/extractors/MailMimeExtractor.inc.php b/search2/indexing/extractors/MailMimeExtractor.inc.php new file mode 100755 index 0000000..9dee5df --- /dev/null +++ b/search2/indexing/extractors/MailMimeExtractor.inc.php @@ -0,0 +1,17 @@ + \ No newline at end of file diff --git a/search2/indexing/extractors/OOPDFTextExtractor.inc.php b/search2/indexing/extractors/OOPDFTextExtractor.inc.php new file mode 100755 index 0000000..442601f --- /dev/null +++ b/search2/indexing/extractors/OOPDFTextExtractor.inc.php @@ -0,0 +1,101 @@ +pdf2txt = new PDFExtractor(); + $this->text2pdf = new OOTextExtractor(); + } + + public function needsIntermediateSourceFile() + { + // we need the intermediate file because it + // has the correct extension. jodconverter uses the extension to determine mimetype + return true; + } + + public function getDisplayName() + { + throw new Exception('This should be overriden'); + } + + public function getSupportedMimeTypes() + { + return array(); + } + + public function extractTextContent() + { + $pdffile = $this->targetfile . '.pdf'; + + $this->text2pdf->setSourceFile($this->sourcefile); + $this->text2pdf->setTargetFile($pdffile); + $this->text2pdf->setMimeType($this->mimetype); + $this->text2pdf->setExtension($this->extension); + if ($this->extractTextContent()) + { + return false; + } + + $this->pdf2txt->setSourceFile($pdffile); + $this->pdf2txt->setTargetFile($this->targetfile); + $this->pdf2txt->setMimeType('application/pdf'); + $this->pdf2txt->setExtension('pdf'); + $result = $this->pdf2txt->extractTextContent(); + + unlink(@$pdffile); + + return $result; + } + + public function diagnose() + { + $diagnosis = $this->pdf2txt->diagnose(); + if (!empty($diagnosis)) + { + return $diagnosis; + } + + $diagnosis = $this->text2pdf->diagnose(); + if (!empty($diagnosis)) + { + return $diagnosis; + } + + return null; + } +} */ + +?> \ No newline at end of file diff --git a/search2/indexing/extractors/OOPresentationExtractor.inc.php b/search2/indexing/extractors/OOPresentationExtractor.inc.php new file mode 100755 index 0000000..e832cc9 --- /dev/null +++ b/search2/indexing/extractors/OOPresentationExtractor.inc.php @@ -0,0 +1,21 @@ + \ No newline at end of file diff --git a/search2/indexing/extractors/OOSpreadsheetExtractor.inc.php b/search2/indexing/extractors/OOSpreadsheetExtractor.inc.php new file mode 100755 index 0000000..67d6039 --- /dev/null +++ b/search2/indexing/extractors/OOSpreadsheetExtractor.inc.php @@ -0,0 +1,25 @@ + \ No newline at end of file diff --git a/search2/indexing/extractors/OOTextExtractor.inc.php b/search2/indexing/extractors/OOTextExtractor.inc.php new file mode 100755 index 0000000..dbdbfd0 --- /dev/null +++ b/search2/indexing/extractors/OOTextExtractor.inc.php @@ -0,0 +1,81 @@ +converter = KTUtil::findCommand('extractors/jodconverter', 'jodconverter'); + $this->javaPath = KTUtil::findCommand('extractors/java', 'java'); + $this->ooHost = $config->get('openoffice/host', 'localhost'); + $this->ooPort = $config->get('openoffice/port', 8100); + $this->targetMimeType = $targetMimeType; + } + + public function getDisplayName() + { + return _kt('OpenOffice Text Extractor'); + } + + public function getSupportedMimeTypes() + { + return array( + 'text/rtf', + 'application/vnd.oasis.opendocument.text', + 'application/vnd.oasis.opendocument.text-template', + 'application/vnd.oasis.opendocument.text-web', + 'application/vnd.oasis.opendocument.text-master', + 'application/vnd.sun.xml.writer', + 'application/vnd.sun.xml.writer.template', + 'application/vnd.sun.xml.writer.global', + ); + } + + public function needsIntermediateSourceFile() + { + // we need the intermediate file because it + // has the correct extension. jodconverter uses the extension to determine mimetype + return true; + } + + protected function getCommandLine() + { + $cmdline = "$this->javaPath -jar $this->converter $this->sourcefile $this->mimetype $this->targetfile $this->targetMimeType $this->ooHost $this->ooPort"; + return $cmdline; + } + + public function diagnose() + { + if (false === $this->converter) + { + return _kt('Cannot locate jodconverter'); + } + + if (false === $this->javaPath) + { + return _kt('Cannot locate java'); + } + + + + $connection = @fsockopen($this->ooHost, $this->ooPort,$errno, $errstr,5 ); + if (false === $connection) + { + return _kt('Cannot connect to openoffice host'); + } + fclose($connection); + + + return null; + } +} + +?> \ No newline at end of file diff --git a/search2/indexing/extractors/PDFExtractor.inc.php b/search2/indexing/extractors/PDFExtractor.inc.php new file mode 100755 index 0000000..a504071 --- /dev/null +++ b/search2/indexing/extractors/PDFExtractor.inc.php @@ -0,0 +1,16 @@ + \ No newline at end of file diff --git a/search2/indexing/extractors/PSExtractor.inc.php b/search2/indexing/extractors/PSExtractor.inc.php new file mode 100755 index 0000000..b9c5aa7 --- /dev/null +++ b/search2/indexing/extractors/PSExtractor.inc.php @@ -0,0 +1,16 @@ + \ No newline at end of file diff --git a/search2/indexing/extractors/PlainTextExtractor.inc.php b/search2/indexing/extractors/PlainTextExtractor.inc.php new file mode 100755 index 0000000..7ce4ea9 --- /dev/null +++ b/search2/indexing/extractors/PlainTextExtractor.inc.php @@ -0,0 +1,17 @@ + \ No newline at end of file diff --git a/search2/indexing/extractors/ScriptExtractor.inc.php b/search2/indexing/extractors/ScriptExtractor.inc.php new file mode 100755 index 0000000..09305d9 --- /dev/null +++ b/search2/indexing/extractors/ScriptExtractor.inc.php @@ -0,0 +1,17 @@ + \ No newline at end of file diff --git a/search2/indexing/extractors/XMLExtractor.inc.php b/search2/indexing/extractors/XMLExtractor.inc.php new file mode 100755 index 0000000..2d7a2fd --- /dev/null +++ b/search2/indexing/extractors/XMLExtractor.inc.php @@ -0,0 +1,21 @@ +]*>)+@", " ", $text); + } +} + +?> \ No newline at end of file diff --git a/search2/indexing/indexerCore.inc.php b/search2/indexing/indexerCore.inc.php new file mode 100755 index 0000000..e600e10 --- /dev/null +++ b/search2/indexing/indexerCore.inc.php @@ -0,0 +1,942 @@ +document_id=$document_id; + $this->rank= $rank; + $this->title=$title; + $this->text = $text; + $this->loadDocumentInfo(); + } + + protected function __isset($property) + { + switch($property) + { + case 'DocumentID': return isset($this->document_id); + case 'Rank': return isset($this->rank); + case 'Text': return isset($this->text); + case 'Title': return isset($this->title); + case null: break; + default: + throw new Exception("Unknown property '$property' to get on MatchResult"); + } + } + + private function loadDocumentInfo() + { + $sql = "SELECT + f.full_path, f.name, dcv.size as filesize, dcv.major_version, + dcv.minor_version, dcv.filename, cou.name as checkoutuser, w.human_name as workflow, ws.human_name as workflowstate + + FROM + documents d + INNER JOIN document_metadata_version dmv ON d.metadata_version_id = dmv.id + INNER JOIN document_content_version dcv ON dmv.content_version_id = dcv.id + LEFT JOIN folders f ON f.id=d.folder_id + LEFT JOIN users cou ON d.checked_out_user_id=cou.id + LEFT JOIN workflows w ON dmv.workflow_id=w.id + LEFT JOIN workflow_states ws ON dmv.workflow_state_id = ws.id + WHERE + d.id=$this->document_id"; + + $result = DBUtil::getOneResult($sql); + + if (PEAR::isError($result) || empty($result)) + { + $this->live = false; + return; + } + + $this->live = true; + if (is_null($result['name'])) + { + $this->fullpath = '(orphaned)'; + } + else + { + $this->fullpath = $result['full_path'] . '/' . $result['name']; + if (substr($this->fullpath,0,1) == '/') $this->fullpath = substr($this->fullpath,1); + } + + + $this->filesize = $result['filesize'] + 0; + + if ($this->filesize > 1024 * 1024 * 1024) + { + $this->filesize = floor($this->filesize / (1024 * 1024 * 1024)) . 'g'; + } + elseif ($this->filesize > 1024 * 1024) + { + $this->filesize = floor($this->filesize / (1024 * 1024)) . 'm'; + } + elseif ($this->filesize > 1024) + { + $this->filesize = floor($this->filesize / (1024)) . 'k'; + } + else + { + $this->filesize .= 'b'; + } + + $this->version = $result['major_version'] . '.' . $result['minor_version']; + $this->filename=$result['filename']; + $this->checkoutuser = $result['checkoutuser']; + $this->workflow = $result['workflow']; + $this->workflowstate = $result['workflowstate']; + + } + + + + protected function __get($property) + { + switch($property) + { + case 'DocumentID': return $this->document_id; + case 'Rank': return $this->rank; + case 'Text': return $this->text; + case 'Title': return $this->title; + case 'FullPath': return $this->fullpath; + case 'IsLive': return $this->live; + case 'Filesize': return $this->filesize; + case 'Version': return $this->version; + case 'Filename': return $this->filename; + case 'Document': + if (is_null($this->document)) + $this->document = Document::get($this->document_id); + return $this->document; + case 'IsAvailable': + return $this->Document->isLive(); + + case 'CheckedOutUser': + return $this->checkoutuser; + case 'Workflow': + if (is_null($this->workflow)) + { + return ''; + } + return "$this->workflow - $this->workflowstate"; + case null: break; + default: + throw new Exception("Unknown property '$property' to get on MatchResult"); + } + } + + protected function __set($property, $value) + { + switch($property) + { + case 'Rank': $this->rank = number_format($value,2,'.',','); break; + case 'Text': $this->text = $value; break; + default: + throw new Exception("Unknown property '$property' to set on MatchResult"); + } + } +} + +function MatchResultCompare($a, $b) +{ + if ($a->Rank == $b->Rank) { + return 0; + } + return ($a->Rank < $b->Rank) ? -1 : 1; +} + +class QueryResultItem extends MatchResult +{ + protected $discussion; + + public function __construct($document_id, $rank, $title, $text, $discussion) + { + parent::__construct($document_id, $rank, $title, $text); + $this->discussion=$discussion; + } + + protected function __isset($property) + { + switch($property) + { + case 'Discussion': return isset($this->discussion); + default: return parent::__isset($property); + } + } + + protected function __get($property) + { + switch($property) + { + case 'Discussion': return $this->discussion; + default: return parent::__get($property); + } + } +} + +abstract class Indexer +{ + /** + * Cache of extractors + * + * @var array + */ + private $extractorCache; + + /** + * Indicates if the indexer will do logging. + * + * @var boolean + */ + private $debug; + /** + * Cache on mime related hooks + * + * @var unknown_type + */ + private $mimeHookCache; + /** + * Cache on general hooks. + * + * @var array + */ + private $generalHookCache; + + /** + * This is a path to the extractors. + * + * @var string + */ + private $extractorPath; + /** + * This is a path to the hooks. + * + * @var string + */ + private $hookPath; + + /** + * Initialise the indexer + * + */ + protected function __construct() + { + $this->extractorCache=array(); + $this->debug=true; + $this->hookCache = array(); + $this->generalHookCache = array(); + + $config = KTConfig::getSingleton(); + + $this->extractorPath = $config->get('indexer/extractorPath', 'extractors'); + $this->hookPath = $config->get('indexer/extractorHookPath','extractorHooks'); + } + + /** + * Returns a reference to the main class + * + * @return Indexer + */ + public static function get() + { + static $singleton = null; + + if (is_null($singleton)) + { + $config = KTConfig::getSingleton(); + $classname = $config->get('indexer/coreClass'); + + require_once('indexing/indexers/' . $classname . '.inc.php'); + + if (!class_exists($classname)) + { + throw new Exception("Class '$classname' does not exist."); + } + + $singleton = new $classname; + } + + return $singleton; + } + + public abstract function deleteDocument($docid); + + /** + * Remove the association of all extractors to mime types on the database. + * + */ + public function clearExtractors() + { + global $default; + $sql = "update mime_types set extractor=null"; + DBUtil::runQuery($sql); + + $default->log->debug('clearExtractors'); + } + + /** + * lookup the name of the extractor class based on the mime type. + * + * @param string $type + * @return string + */ + public static function resolveExtractor($type) + { + global $default; + $sql = "select extractor from mime_types where filetypes='$type'"; + $class = DBUtil::getOneResultKey($sql,'extractor'); + if (PEAR::isError($class)) + { + $default->log->error("resolveExtractor: cannot resolve $type"); + return $class; + } + if ($this->debug) $default->log->debug("resolveExtractor: Resolved '$class' from mime type '$type'."); + return $class; + } + + /** + * Return all the discussion text. + * + * @param int $docid + * @return string + */ + public static function getDiscussionText($docid) + { + $sql = "SELECT + dc.subject, dc.body + FROM + discussion_threads dt + INNER JOIN discussion_comments dc ON dc.thread_id=dt.id AND dc.id BETWEEN dt.first_comment_id AND dt.last_comment_id + WHERE + dt.document_id=$docid"; + $result = DBUtil::getResultArray($sql); + $text = ''; + + foreach($result as $record) + { + $text .= $record['subject'] . "\n" . $record['body'] . "\n"; + } + + return $text; + } + + /** + * Schedule the indexing of a document. + * + * @param string $document + * @param string $what + */ + public static function index($document, $what='C') + { + global $default; + + $document_id = $document->getId(); + $userid=$_SESSION['userID']; + if (empty($userid)) $userid=1; + + // we dequeue the document so that there are no issues when enqueuing + Indexer::unqueueDocument($document_id); + + // enqueue item + $sql = "INSERT INTO index_files(document_id, user_id, what) VALUES($document_id, $userid, '$what')"; + DBUtil::runQuery($sql); + +// if ($this->debug) $default->log->debug("index: Queuing indexing of $document_id"); + } + + + public static function indexAll() + { + $userid=$_SESSION['userID']; + if (empty($userid)) $userid=1; + $sql = "INSERT INTO index_files(document_id, user_id, what) SELECT id, $userid, 'C' FROM documents WHERE status_id=1"; + DBUtil::runQuery($sql); + } + + /** + * Clearout the scheduling of documents that no longer exist. + * + */ + public static function clearoutDeleted() + { + global $default; + + $sql = 'DELETE FROM + index_files AS iff USING index_files AS iff, documents + WHERE + NOT EXISTS( + SELECT + d.id + FROM + documents AS d + INNER JOIN document_metadata_version dmv ON d.metadata_version_id=dmv.id + WHERE + iff.document_id = d.id OR dmv.status_id=3 + );'; + DBUtil::runQuery($sql); + + // if ($this->debug) $default->log->debug("clearoutDeleted: remove documents"); + } + + + /** + * Check if a document is scheduled to be indexed + * + * @param mixed $document This may be a document or document id + * @return boolean + */ + public static function isDocumentScheduled($document) + { + if (is_numeric($document)) + { + $docid = $document; + } + else if ($document instanceof Document) + { + $docid = $document->getId(); + } + else + { + return false; + } + $sql = "SELECT 1 FROM index_files WHERE document_id=$docid"; + $result = DBUtil::getResultArray($sql); + return count($result) > 0; + } + + /** + * Filters text removing redundant characters such as continuous newlines and spaces. + * + * @param string $filename + */ + private function filterText($filename) + { + $content = file_get_contents($filename); + + $src = array("([\r\n])","([\n][\n])","([\n])","([\t])",'([ ][ ])'); + $tgt = array("\n","\n",' ',' ',' '); + + // shrink what is being stored. + do + { + $orig = $content; + $content = preg_replace($src, $tgt, $content); + } while ($content != $orig); + + return file_put_contents($filename, $content); + } + + /** + * Load hooks for text extraction process. + * + */ + private function loadExtractorHooks() + { + $this->generalHookCache = array(); + $this->mimeHookCache = array(); + + $dir = opendir($this->hookPath); + while (($file = readdir($dir)) !== false) + { + if (substr($file,-12) == 'Hook.inc.php') + { + require_once($this->hookPath . '/' . $file); + $class = substr($file, 0, -8); + + if (!class_exists($class)) + { + continue; + } + + $hook = new $class; + if (!($class instanceof ExtractorHook)) + { + continue; + } + + $mimeTypes = $hook->registerMimeTypes(); + if (is_null($mimeTypes)) + { + $this->generalHookCache[] = & $hook; + } + else + { + foreach($mimeTypes as $type) + { + $this->mimeHookCache[$type][] = & $hook; + } + } + + } + } + closedir($dir); + } + + /** + * This is a refactored function to execute the hooks. + * + * @param DocumentExtractor $extractor + * @param string $phase + * @param string $mimeType Optional. If set, indicates which hooks must be used, else assume general. + */ + private function executeHook($extractor, $phase, $mimeType = null) + { + $hooks = array(); + if (is_null($mimeType)) + { + $hooks = $this->generalHookCache; + } + else + { + if (array_key_exists($mimeType, $this->mimeHookCache)) + { + $hooks = $this->mimeHookCache[$mimeType]; + } + } + if (empty($hooks)) + { + return; + } + + foreach($hooks as $hook) + { + $hook->$phase($extractor); + } + } + + /** + * The main function that may be called repeatedly to index documents. + * + * @param int $max Default 20 + */ + public function indexDocuments($max=null) + { + global $default; + + $config =& KTConfig::getSingleton(); + + if (is_null($max)) + { + $max = $config->get('indexer/batchDocuments',20); + } + + $this->loadExtractorHooks(); + + Indexer::clearoutDeleted(); + + // identify the indexers that must run + // mysql specific limit! + $sql = "SELECT + iff.document_id, mt.filetypes, mt.mimetypes, mt.extractor, iff.what + FROM + index_files iff + INNER JOIN documents d ON iff.document_id=d.id + INNER JOIN document_metadata_version dmv ON d.metadata_version_id=dmv.id + INNER JOIN document_content_version dcv ON dmv.content_version_id=dcv.id + INNER JOIN mime_types mt ON dcv.mime_id=mt.id + WHERE + iff.processdate IS NULL AND dmv.status_id=1 + ORDER BY indexdate + LIMIT $max"; + $result = DBUtil::getResultArray($sql); + if (PEAR::isError($result)) + { + return; + } + + // bail if no work to do + if (count($result) == 0) + { + return; + } + + // identify any documents that need indexing and mark them + // so they are not taken in a followup run + $ids = array(); + foreach($result as $docinfo) + { + $ids[] = $docinfo['document_id']; + } + + // mark the documents as being processed + $date = date('Y-m-d H:j:s'); + $ids=implode(',',$ids); + $sql = "UPDATE index_files SET processdate='$date' WHERE document_id in ($ids)"; + DBUtil::runQuery($sql); + + $extractorCache = array(); + $storageManager = KTStorageManagerUtil::getSingleton(); + + $tempPath = $config->get("urls/tmpDirectory"); + + foreach($result as $docinfo) + { + $docId=$docinfo['document_id']; + $extension=$docinfo['filetypes']; + $mimeType=$docinfo['mimetypes']; + $extractorClass=$docinfo['extractor']; + $indexDocument = in_array($docinfo['what'], array('A','C')); + $indexDiscussion = in_array($docinfo['what'], array('A','D')); + + if ($this->debug) $default->log->debug("Indexing docid: $docId extension: '$extension' mimetype: '$mimeType' extractor: '$extractorClass'"); + + if (empty($extractorClass)) + { + if ($this->debug) $default->log->debug("No extractor for docid: $docId"); + + Indexer::unqueueDocument($docId); + continue; + } + + if ($this->debug) print "Processing document $docId.\n"; + if ($indexDocument) + { + if (array_key_exists($extractorClass, $extractorCache)) + { + $extractor = $extractorCache[$extractorClass]; + } + else + { + require_once('extractors/' . $extractorClass . '.inc.php'); + + if (!class_exists($extractorClass)) + { + $default->log->error("indexDocuments: extractor '$extractorClass' does not exist."); + continue; + } + + $extractor = $extractorCache[$extractorClass] = new $extractorClass(); + } + + if (is_null($extractor)) + { + $default->log->error("indexDocuments: extractor '$extractorClass' not resolved - it is null."); + continue; + } + + if (!($extractor instanceof DocumentExtractor)) + { + $default->log->error("indexDocuments: extractor '$extractorClass' is not a document extractor class."); + continue; + } + + $document = Document::get($docId); + $sourceFile = $storageManager->temporaryFile($document); + + if (empty($sourceFile) || !is_file($sourceFile)) + { + $default->log->error("indexDocuments: source file '$sourceFile' for document $docId does not exist."); + Indexer::unqueueDocument($docId); + continue; + } + + if ($extractor->needsIntermediateSourceFile()) + { + $intermediate = $tempPath . '/'. $document->getFileName(); + $result = @copy($sourceFile, $intermediate); + if ($result === false) + { + $default->log->error("Could not create intermediate file from document $docid"); + // problem. lets try again later. probably permission related. log the issue. + continue; + } + $sourceFile = $intermediate; + } + + $targetFile = tempnam($tempPath, 'ktindexer') . '.txt'; + + $extractor->setSourceFile($sourceFile); + $extractor->setMimeType($mimeType); + $extractor->setExtension($extension); + $extractor->setTargetFile($targetFile); + $extractor->setDocument($document); + $extractor->setIndexingStatus(null); + $extractor->setExtractionStatus(null); + if ($this->debug) $default->log->debug("Extra Info docid: $docId Source File: '$sourceFile' Target File: '$targetFile'"); + + $this->executeHook($extractor, 'pre_extract'); + $this->executeHook($extractor, 'pre_extract', $mimeType); + + if ($extractor->extractTextContent()) + { + $extractor->setExtractionStatus(true); + $this->executeHook($extractor, 'pre_index'); + $this->executeHook($extractor, 'pre_index', $mimeType); + + $title = $document->getName(); + if ($indexDiscussion) + { + $indexStatus = $this->indexDocumentAndDiscussion($docId, $targetFile, $title); + + if (!$indexStatus) $default->log->error("Problem indexing document $docId"); + + $extractor->setIndexingStatus($indexStatus); + } + else + { + if (!$this->filterText($targetFile)) + { + $default->log->error("Problem filtering document $docId"); + } + else + { + $indexStatus = $this->indexDocument($docId, $targetFile, $title); + + if (!$indexStatus) $default->log->error("Problem indexing document $docId"); + + $extractor->setIndexingStatus($indexStatus); + } + } + + $this->executeHook($extractor, 'post_index', $mimeType); + $this->executeHook($extractor, 'post_index'); + } + else + { + $extractor->setExtractionStatus(false); + $default->log->error("Could not extract contents from document $docId"); + } + + $this->executeHook($extractor, 'post_extract', $mimeType); + $this->executeHook($extractor, 'post_extract'); + + if ($extractor->needsIntermediateSourceFile()) + { + @unlink($sourceFile); + } + + @unlink($targetFile); + } + else + { + $this->indexDiscussion($docId); + } + + Indexer::unqueueDocument($docId); + if ($this->debug) $default->log->debug("Done indexing docid: $docId"); + + } + if ($this->debug) print "Done.\n"; + } + + /** + * Index a document. The base class must override this function. + * + * @param int $docId + * @param string $textFile + */ + protected abstract function indexDocument($docId, $textFile, $title=''); + + /** + * Index a discussion. The base class must override this function. + * + * @param int $docId + */ + protected abstract function indexDiscussion($docId); + + /** + * Diagnose the extractors. + * + * @return array + */ + public function diagnose() + { + $diagnosis = $this->_diagnose($this->extractorPath, 'DocumentExtractor', 'Extractor.inc.php'); + $diagnosis = array_merge($diagnosis, $this->_diagnose($this->hookPath, 'Hook', 'Hook.inc.php')); + + return $diagnosis; + } + + /** + * This is a refactored diagnose function. + * + * @param string $path + * @param string $class + * @param string $extension + * @return array + */ + private function _diagnose($path, $baseclass, $extension) + { + global $default; + + $diagnoses = array(); + $dir = opendir($path); + $extlen = - strlen($extension); + while (($file = readdir($dir)) !== false) + { + if (substr($file,$extlen) != $extension) + { + $default->log->error("diagnose: '$file' does not have extension '$extension'."); + continue; + } + + require_once($path . '/' . $file); + + $class = substr($file, 0, -8); + if (!class_exists($class)) + { + $default->log->error("diagnose: class '$class' does not exist."); + continue; + } + + $extractor = new $class(); + if (!is_a($extractor, $baseclass)) + { + $default->log->error("diagnose(): '$class' is not of type DocumentExtractor"); + continue; + } + + $types = $extractor->getSupportedMimeTypes(); + if (empty($types)) + { + if ($this->debug) $default->log->debug("diagnose: class '$class' does not support any types."); + continue; + } + + $diagnosis=$extractor->diagnose(); + if (empty($diagnosis)) + { + continue; + } + $diagnoses[$class] = array( + 'name'=>$extractor->getDisplayName(), + 'diagnosis'=>$diagnosis + ); + + } + closedir($dir); + + return $diagnoses; + } + + + /** + * Register the extractor types. + * + * @param boolean $clear. Optional. Defaults to false. + */ + public function registerTypes($clear=false) + { + if ($clear) + { + $this->clearExtractors(); + } + $dir = opendir($this->extractorPath); + while (($file = readdir($dir)) !== false) + { + if (substr($file,-17) == 'Extractor.inc.php') + { + require_once($this->extractorPath . '/' . $file); + $class = substr($file, 0, -8); + + if (class_exists($class)) + { + continue; + } + + $extractor = new $class; + if (!($class instanceof DocumentExtractor)) + { + continue; + } + + $extractor->registerMimeTypes(); + } + } + closedir($dir); + } + + /** + * This is used as a possible obtimisation effort. It may be overridden in that case. + * + * @param int $docId + * @param string $textFile + */ + protected function indexDocumentAndDiscussion($docId, $textFile, $title='') + { + $this->indexDocument($docId, $textFile, $title); + $this->indexDiscussion($docId); + } + + /** + * Remove the document from the queue. This is normally called when it has been processed. + * + * @param int $docid + */ + public static function unqueueDocument($docid) + { + $sql = "DELETE FROM index_files WHERE document_id=$docid"; + DBUtil::runQuery($sql); + } + + /** + * Run a query on the index. + * + * @param string $query + * @return array + */ + public abstract function query($query); + + /** + * Converts an integer to a string that can be easily compared and reversed. + * + * @param int $int + * @return string + */ + public static function longToString($int) + { + $maxlen = 14; + + $a2z = array('a','b','c','d','e','f','g','h','i','j'); + $o29 = array('0','1','2','3','4','5','6','7','8','9'); + $l = str_pad('',$maxlen - strlen("$int"),'0') . $int; + + return str_replace($o29, $a2z, $l); + } + + /** + * Converts a string to an integer. + * + * @param string $str + * @return int + */ + public static function stringToLong($str) + { + $a2z = array('a','b','c','d','e','f','g','h','i','j'); + $o29 = array('0','1','2','3','4','5','6','7','8','9'); + + $int = str_replace($a2z, $o29, $str) + 0; + + return $int; + } + + /** + * Possibly we can optimise indexes. This method must be overriden. + * + */ + public function optimise() + { + // do nothing + } +} + +?> \ No newline at end of file diff --git a/search2/indexing/indexers/JavaXMLRPCLuceneIndexer.inc.php b/search2/indexing/indexers/JavaXMLRPCLuceneIndexer.inc.php new file mode 100755 index 0000000..4e68882 --- /dev/null +++ b/search2/indexing/indexers/JavaXMLRPCLuceneIndexer.inc.php @@ -0,0 +1,11 @@ + \ No newline at end of file diff --git a/search2/indexing/indexers/PHPLuceneIndexer.inc.php b/search2/indexing/indexers/PHPLuceneIndexer.inc.php new file mode 100755 index 0000000..8d1c17e --- /dev/null +++ b/search2/indexing/indexers/PHPLuceneIndexer.inc.php @@ -0,0 +1,183 @@ +get('indexer/luceneDirectory'); + $this->lucene = new Zend_Search_Lucene($indexPath, false); + } + + /** + * Creates an index to be used. + * + */ + public static function createIndex() + { + $config =& KTConfig::getSingleton(); + $indexPath = $config->get('indexer/luceneDirectory'); + $lucene = new Zend_Search_Lucene($indexPath, true); + } + + + /** + * A refactored method to add the document to the index.. + * + * @param int $docid + * @param string $content + * @param string $discussion + */ + private function addDocument($docid, $content, $discussion, $title='') + { + $doc = new Zend_Search_Lucene_Document(); + $doc->addField(Zend_Search_Lucene_Field::Text('DocumentID', PHPLuceneIndexer::longToString($docid))); + $doc->addField(Zend_Search_Lucene_Field::Text('Content', $content, 'UTF-8')); + $doc->addField(Zend_Search_Lucene_Field::Text('Discussion', $discussion, 'UTF-8')); + $doc->addField(Zend_Search_Lucene_Field::Text('Title', $title, 'UTF-8')); + $this->lucene->addDocument($doc); + } + + /** + * Indexes a document based on a text file. + * + * @param int $docid + * @param string $textfile + * @return boolean + */ + protected function indexDocument($docid, $textfile, $title='') + { + global $default; + + if (!is_file($textfile)) + { + $default->log->error("Attempting to index $docid $textfile but it is not available."); + return false; + } + + list($content, $discussion) = $this->deleteDocument($docid); + + $this->addDocument($docid, file_get_contents($textfile), $discussion, $title); + + return true; + } + + /** + * Indexes the content and discussions on a document. + * + * @param int $docid + * @param string $textfile + * @return boolean + */ + protected function indexDocumentAndDiscussion($docid, $textfile, $title='') + { + global $default; + + if (!is_file($textfile)) + { + $default->log->error("Attempting to index $docid $textfile but it is not available."); + return false; + } + + $this->deleteDocument($docid); + + $this->addDocument($docid, file_get_contents($textfile), Indexer::getDiscussionText($docid), $title); + + return true; + } + + /** + * Indexes a discussion on a document.. + * + * @param int $docid + * @return boolean + */ + protected function indexDiscussion($docid) + { + list($content, $discussion, $title) = $this->deleteDocument($docid); + + $this->addDocument($docid, $content, Indexer::getDiscussionText($docid), $title); + + return true; + } + + /** + * Optimise the lucene index. + * This can be called periodically to optimise performance and size of the lucene index. + * + */ + public function optimise() + { + $this->lucene->optimize(); + } + + /** + * Removes a document from the index. + * + * @param int $docid + * @return array containing (content, discussion, title) + */ + public function deleteDocument($docid) + { + $content = ''; + $discussion = ''; + $query = Zend_Search_Lucene_Search_QueryParser::parse('DocumentID:' . PHPLuceneIndexer::longToString($docid)); + $hits = $this->lucene->find($query); + // there should only be one, but we'll loop for safety + foreach ($hits as $hit) + { + $content = $hit->Content; + $discussion = $hit->Discussion; + $title = $hit->Title; + $title=''; + + $this->lucene->delete($hit); + } + return array($content, $discussion, $title); + } + + /** + * Enter description here... + * + * @param string $query + * @return array + */ + public function query($query) + { + $results = array(); + $query = Zend_Search_Lucene_Search_QueryParser::parse($query); + + $hits = $this->lucene->find($query); + foreach ($hits as $hit) + { + $document = $hit->getDocument(); + + $document_id = PHPLuceneIndexer::stringToLong($document->DocumentID); + $content = $document->Content ; + $discussion = $document->Discussion ; + $title = $document->Title; + $score = $hit->score; + + // avoid adding duplicates. If it is in already, it has higher priority. + if (!array_key_exists($document_id, $results) || $score > $results[$document_id]->Score) + { + $results[$document_id] = new QueryResultItem($document_id, $score, $title, $content, $discussion); + } + } + return $results; + } +} +?> \ No newline at end of file diff --git a/search2/search/SearchCommandLexer.php b/search2/search/SearchCommandLexer.php new file mode 100755 index 0000000..1ba5fab --- /dev/null +++ b/search2/search/SearchCommandLexer.php @@ -0,0 +1,268 @@ +offset=0; + $this->data=$data; + $this->token=null; + $this->value=''; + $this->length=strlen($data); + $this->state = 0; + $this->escaped=false; + $this->exit=false; + $this->lookahead=null; + $this->char=null; + } + + private function processNormalChar() + { + $append=true; + $clear=false; + $checkwords=false; + $word=''; + + if (in_array($this->char, array('=','(',')','[',']',',','!','<','>','"')) && !empty($this->value)) + { + $word=$this->value; + $checkwords=true; + $this->offset--; + $append=false; + $clear=false; + } + else + switch ($this->char) + { + case ' ': + case "\t": + case "\r": + case "\n": + if (!empty($this->value)) + { + $word=$this->value; + $checkwords=true; + } + $append=false; + $clear=true; + break; + case '=': + $this->token=SearchCommandParser::IS; + break; + case '(': + $this->token=SearchCommandParser::PAR_OPEN; + break; + case ')': + $this->token=SearchCommandParser::PAR_CLOSE; + break; + case ',': + $this->token=SearchCommandParser::COMMA; + break; + case ':': + $this->token=SearchCommandParser::COLON; + break; + case '[': + $this->token=SearchCommandParser::SQUARE_OPEN; + break; + case ']': + $this->token=SearchCommandParser::SQUARE_CLOSE; + break; + case '!': + if ($this->lookahead == '=') + { + $this->zap(); + $this->token=SearchCommandParser::IS_NOT; + } + else + { + throw new Exception('Unexpected token: ' . $this->lookahead); + } + break; + case '<': + case '>': + if ($this->lookahead == '>') + { + $this->zap(); + $this->token=SearchCommandParser::IS_NOT; + } + elseif ($this->lookahead == '=') + { + $this->zap(); + $this->token=($this->char == '<')?(SearchCommandParser::LE):(SearchCommandParser::GE); + } + else + { + $this->token=($this->char == '<')?(SearchCommandParser::LT):(SearchCommandParser::GT); + } + break; + case '"': + $clear=true; + $this->state=1; + break; + + } + if ($clear) + { + $this->char=''; + $this->value=''; + $this->token=null; + } + if ($append) + { + $this->value .= $this->char; + } + if (!is_null($this->token)) + { + $this->exit=true; + } + if ($checkwords) + { + $this->exit=true; + $this->value = $word; + switch (strtolower($word)) + { + case 'not': + $this->token = SearchCommandParser::NOT; + break; + case 'with': + $this->token = SearchCommandParser::WITH; + break; + case 'like': + $this->token = SearchCommandParser::LIKE; + break; + case 'contains': + case 'contain': + $this->token = SearchCommandParser::CONTAINS ; + break; + case 'starts': + case 'start': + $this->token = SearchCommandParser::START ; + break; + case 'ends': + case 'end': + $this->token = SearchCommandParser::END ; + break; + case 'does': + $this->token = SearchCommandParser::DOES ; + break; + case 'is': + $this->token = SearchCommandParser::IS ; + break; + case 'between': + $this->token = SearchCommandParser::BETWEEN ; + break; + case 'or': + $this->token = SearchCommandParser::OPOR ; + break; + case 'and': + $this->token = SearchCommandParser::OPAND ; + break; + + default: + + $this->token = SearchCommandParser::TERMINAL; + break; + + } + } + + } + + private function processStringChar() + { + if ($this->escaped) + { + switch($this->char) + { + case 'r': + $this->value .= "\r"; + break; + case 'n': + $this->value .= "\n"; + break; + case 't': + $this->value .= "\t"; + break; + default: + $this->value .= $this->char; + } + $this->escaped=false; + } + else + { + switch($this->char) + { + case '\\': + $this->escaped=true; + break; + case '"': + $this->escaped=false; + $this->state=0; + $this->exit=true; + $this->token = SearchCommandParser::VALUE; + break; + default: + $this->value .= $this->char; + } + } + } + + private function zap() + { + $this->char = substr($this->data,$this->offset++,1); + if ($this->offset <= $this->length) + { + $this->lookahead= substr($this->data,$this->offset,1); + } + else + { + $this->lookahead=null; + } + } + + public function yylex() + { + $this->exit=false; + $this->token=null; + $this->value=''; + while (!$this->exit) + { + if ($this->length <= $this->offset) + { + return false; + } + + $this->zap(); + switch($this->state) + { + case 0: // initial + $this->processNormalChar(); + break; + case 1: // instring + $this->processStringChar(); + break; + } + + if (is_null($this->lookahead) || !is_null($this->token)) + { + $this->exit=true; + } + } + return true; + } +} + +?> \ No newline at end of file diff --git a/search2/search/SearchCommandParser.php b/search2/search/SearchCommandParser.php new file mode 100755 index 0000000..55eb4b2 --- /dev/null +++ b/search2/search/SearchCommandParser.php @@ -0,0 +1,1321 @@ +string = $s->string; + $this->metadata = $s->metadata; + } else { + $this->string = (string) $s; + if ($m instanceof SearchCommandParseryyToken) { + $this->metadata = $m->metadata; + } elseif (is_array($m)) { + $this->metadata = $m; + } + } + } + + function __toString() + { + return $this->_string; + } + + function offsetExists($offset) + { + return isset($this->metadata[$offset]); + } + + function offsetGet($offset) + { + return $this->metadata[$offset]; + } + + function offsetSet($offset, $value) + { + if ($offset === null) { + if (isset($value[0])) { + $x = ($value instanceof SearchCommandParseryyToken) ? + $value->metadata : $value; + $this->metadata = array_merge($this->metadata, $x); + return; + } + $offset = count($this->metadata); + } + if ($value === null) { + return; + } + if ($value instanceof SearchCommandParseryyToken) { + if ($value->metadata) { + $this->metadata[$offset] = $value->metadata; + } + } elseif ($value) { + $this->metadata[$offset] = $value; + } + } + + function offsetUnset($offset) + { + unset($this->metadata[$offset]); + } +} + +/** The following structure represents a single element of the + * parser's stack. Information stored includes: + * + * + The state number for the parser at this level of the stack. + * + * + The value of the token stored at this level of the stack. + * (In other words, the "major" token.) + * + * + The semantic value stored at this level of the stack. This is + * the information used by the action routines in the grammar. + * It is sometimes called the "minor" token. + */ +class SearchCommandParseryyStackEntry +{ + public $stateno; /* The state-number */ + public $major; /* The major token value. This is the code + ** number for the token at this stack level */ + public $minor; /* The user-supplied minor token value. This + ** is the value of the token */ +}; + +// code external to the class is included here + +// declare_class is output here +#line 2 "SearchCommandParser.y" +class SearchCommandParser#line 102 "SearchCommandParser.php" +{ +/* First off, code is included which follows the "include_class" declaration +** in the input file. */ +#line 4 "SearchCommandParser.y" + + + private $expr_result; + private $parse_result; + + public function __construct() + { + $this->parse_result = 'ok'; + } + + public function getExprResult() + { + return $this->expr_result; + } + + public function isExprOk() + { + return $this->parse_result == 'ok'; + } + +#line 128 "SearchCommandParser.php" + +/* Next is all token values, as class constants +*/ +/* +** These constants (all generated automatically by the parser generator) +** specify the various kinds of tokens (terminals) that the parser +** understands. +** +** Each symbol here is a terminal symbol in the grammar. +*/ + const OPOR = 1; + const OPAND = 2; + const NOT = 3; + const IS = 4; + const CONTAIN = 5; + const LIKE = 6; + const BETWEEN = 7; + const START = 8; + const END = 9; + const GT = 10; + const LE = 11; + const LT = 12; + const GE = 13; + const PAR_OPEN = 14; + const PAR_CLOSE = 15; + const DOES = 16; + const COLON = 17; + const SQUARE_OPEN = 18; + const SQUARE_CLOSE = 19; + const TERMINAL = 20; + const VALUE = 21; + const COMMA = 22; + const CONTAINS = 23; + const WITH = 24; + const IS_NOT = 25; + const YY_NO_ACTION = 84; + const YY_ACCEPT_ACTION = 83; + const YY_ERROR_ACTION = 82; + +/* Next are that tables used to determine what action to take based on the +** current state and lookahead token. These tables are used to implement +** functions that take a state number and lookahead value and return an +** action integer. +** +** Suppose the action integer is N. Then the action is determined as +** follows +** +** 0 <= N < self::YYNSTATE Shift N. That is, +** push the lookahead +** token onto the stack +** and goto state N. +** +** self::YYNSTATE <= N < self::YYNSTATE+self::YYNRULE Reduce by rule N-YYNSTATE. +** +** N == self::YYNSTATE+self::YYNRULE A syntax error has occurred. +** +** N == self::YYNSTATE+self::YYNRULE+1 The parser accepts its +** input. (and concludes parsing) +** +** N == self::YYNSTATE+self::YYNRULE+2 No such action. Denotes unused +** slots in the yy_action[] table. +** +** The action table is constructed as a single large static array $yy_action. +** Given state S and lookahead X, the action is computed as +** +** self::$yy_action[self::$yy_shift_ofst[S] + X ] +** +** If the index value self::$yy_shift_ofst[S]+X is out of range or if the value +** self::$yy_lookahead[self::$yy_shift_ofst[S]+X] is not equal to X or if +** self::$yy_shift_ofst[S] is equal to self::YY_SHIFT_USE_DFLT, it means that +** the action is not in the table and that self::$yy_default[S] should be used instead. +** +** The formula above is for computing the action when the lookahead is +** a terminal symbol. If the lookahead is a non-terminal (as occurs after +** a reduce action) then the static $yy_reduce_ofst array is used in place of +** the static $yy_shift_ofst array and self::YY_REDUCE_USE_DFLT is used in place of +** self::YY_SHIFT_USE_DFLT. +** +** The following are the tables generated in this section: +** +** self::$yy_action A single table containing all actions. +** self::$yy_lookahead A table containing the lookahead for each entry in +** yy_action. Used to detect hash collisions. +** self::$yy_shift_ofst For each state, the offset into self::$yy_action for +** shifting terminals. +** self::$yy_reduce_ofst For each state, the offset into self::$yy_action for +** shifting non-terminals after a reduce. +** self::$yy_default Default action for each state. +*/ + const YY_SZ_ACTTAB = 70; +static public $yy_action = array( + /* 0 */ 52, 15, 8, 7, 4, 23, 22, 37, 34, 54, + /* 10 */ 33, 3, 5, 16, 9, 2, 21, 83, 1, 13, + /* 20 */ 50, 32, 36, 3, 5, 44, 17, 26, 47, 1, + /* 30 */ 19, 39, 1, 41, 14, 46, 20, 1, 45, 38, + /* 40 */ 1, 6, 35, 10, 42, 27, 31, 12, 5, 24, + /* 50 */ 18, 53, 28, 52, 63, 63, 63, 30, 63, 63, + /* 60 */ 63, 49, 48, 29, 40, 43, 51, 63, 11, 25, + ); + static public $yy_lookahead = array( + /* 0 */ 3, 4, 6, 7, 3, 8, 9, 10, 11, 12, + /* 10 */ 13, 1, 2, 16, 17, 14, 27, 28, 29, 18, + /* 20 */ 23, 20, 25, 1, 2, 15, 14, 27, 33, 29, + /* 30 */ 27, 24, 29, 21, 30, 27, 32, 29, 27, 24, + /* 40 */ 29, 2, 19, 18, 15, 21, 19, 5, 2, 33, + /* 50 */ 22, 31, 31, 3, 34, 34, 34, 31, 34, 34, + /* 60 */ 34, 31, 31, 31, 31, 31, 31, 34, 32, 32, +); + const YY_SHIFT_USE_DFLT = -5; + const YY_SHIFT_MAX = 31; + static public $yy_shift_ofst = array( + /* 0 */ 1, -3, 1, 1, 1, 1, 12, 12, 12, 12, + /* 10 */ 12, 12, 12, 12, 12, 50, 50, 24, 24, 10, + /* 20 */ -4, 22, 15, 7, 29, 42, 46, 28, 27, 39, + /* 30 */ 23, 25, +); + const YY_REDUCE_USE_DFLT = -12; + const YY_REDUCE_MAX = 18; + static public $yy_reduce_ofst = array( + /* 0 */ -11, 4, 3, 0, 8, 11, 31, 32, 33, 30, + /* 10 */ 26, 20, 35, 21, 34, 36, 37, 16, -5, +); + static public $yyExpectedTokens = array( + /* 0 */ array(3, 14, 18, 20, ), + /* 1 */ array(3, 4, 8, 9, 10, 11, 12, 13, 16, 17, 23, 25, ), + /* 2 */ array(3, 14, 18, 20, ), + /* 3 */ array(3, 14, 18, 20, ), + /* 4 */ array(3, 14, 18, 20, ), + /* 5 */ array(3, 14, 18, 20, ), + /* 6 */ array(14, 21, ), + /* 7 */ array(14, 21, ), + /* 8 */ array(14, 21, ), + /* 9 */ array(14, 21, ), + /* 10 */ array(14, 21, ), + /* 11 */ array(14, 21, ), + /* 12 */ array(14, 21, ), + /* 13 */ array(14, 21, ), + /* 14 */ array(14, 21, ), + /* 15 */ array(3, ), + /* 16 */ array(3, ), + /* 17 */ array(21, ), + /* 18 */ array(21, ), + /* 19 */ array(1, 2, 15, ), + /* 20 */ array(6, 7, ), + /* 21 */ array(1, 2, ), + /* 22 */ array(24, ), + /* 23 */ array(24, ), + /* 24 */ array(15, ), + /* 25 */ array(5, ), + /* 26 */ array(2, ), + /* 27 */ array(22, ), + /* 28 */ array(19, ), + /* 29 */ array(2, ), + /* 30 */ array(19, ), + /* 31 */ array(18, ), + /* 32 */ array(), + /* 33 */ array(), + /* 34 */ array(), + /* 35 */ array(), + /* 36 */ array(), + /* 37 */ array(), + /* 38 */ array(), + /* 39 */ array(), + /* 40 */ array(), + /* 41 */ array(), + /* 42 */ array(), + /* 43 */ array(), + /* 44 */ array(), + /* 45 */ array(), + /* 46 */ array(), + /* 47 */ array(), + /* 48 */ array(), + /* 49 */ array(), + /* 50 */ array(), + /* 51 */ array(), + /* 52 */ array(), + /* 53 */ array(), + /* 54 */ array(), +); + static public $yy_default = array( + /* 0 */ 82, 66, 82, 82, 82, 82, 82, 82, 82, 82, + /* 10 */ 82, 82, 82, 82, 82, 66, 66, 82, 82, 82, + /* 20 */ 82, 55, 82, 82, 82, 82, 57, 73, 82, 82, + /* 30 */ 82, 82, 69, 78, 77, 68, 81, 76, 80, 79, + /* 40 */ 62, 70, 71, 60, 59, 56, 58, 72, 61, 65, + /* 50 */ 74, 64, 67, 63, 75, +); +/* The next thing included is series of defines which control +** various aspects of the generated parser. +** self::YYNOCODE is a number which corresponds +** to no legal terminal or nonterminal number. This +** number is used to fill in empty slots of the hash +** table. +** self::YYFALLBACK If defined, this indicates that one or more tokens +** have fall-back values which should be used if the +** original value of the token will not parse. +** self::YYSTACKDEPTH is the maximum depth of the parser's stack. +** self::YYNSTATE the combined number of states. +** self::YYNRULE the number of rules in the grammar +** self::YYERRORSYMBOL is the code number of the error symbol. If not +** defined, then do no error processing. +*/ + const YYNOCODE = 35; + const YYSTACKDEPTH = 100; + const YYNSTATE = 55; + const YYNRULE = 27; + const YYERRORSYMBOL = 26; + const YYERRSYMDT = 'yy0'; + const YYFALLBACK = 0; + /** The next table maps tokens into fallback tokens. If a construct + * like the following: + * + * %fallback ID X Y Z. + * + * appears in the grammer, then ID becomes a fallback token for X, Y, + * and Z. Whenever one of the tokens X, Y, or Z is input to the parser + * but it does not parse, the type of the token is changed to ID and + * the parse is retried before an error is thrown. + */ + static public $yyFallback = array( + ); + /** + * Turn parser tracing on by giving a stream to which to write the trace + * and a prompt to preface each trace message. Tracing is turned off + * by making either argument NULL + * + * Inputs: + * + * - A stream resource to which trace output should be written. + * If NULL, then tracing is turned off. + * - A prefix string written at the beginning of every + * line of trace output. If NULL, then tracing is + * turned off. + * + * Outputs: + * + * - None. + * @param resource + * @param string + */ + static function Trace($TraceFILE, $zTracePrompt) + { + if (!$TraceFILE) { + $zTracePrompt = 0; + } elseif (!$zTracePrompt) { + $TraceFILE = 0; + } + self::$yyTraceFILE = $TraceFILE; + self::$yyTracePrompt = $zTracePrompt; + } + + /** + * Output debug information to output (php://output stream) + */ + static function PrintTrace() + { + self::$yyTraceFILE = fopen('php://output', 'w'); + self::$yyTracePrompt = ''; + } + + /** + * @var resource|0 + */ + static public $yyTraceFILE; + /** + * String to prepend to debug output + * @var string|0 + */ + static public $yyTracePrompt; + /** + * @var int + */ + public $yyidx; /* Index of top element in stack */ + /** + * @var int + */ + public $yyerrcnt; /* Shifts left before out of the error */ + /** + * @var array + */ + public $yystack = array(); /* The parser's stack */ + + /** + * For tracing shifts, the names of all terminals and nonterminals + * are required. The following table supplies these names + * @var array + */ + static public $yyTokenName = array( + '$', 'OPOR', 'OPAND', 'NOT', + 'IS', 'CONTAIN', 'LIKE', 'BETWEEN', + 'START', 'END', 'GT', 'LE', + 'LT', 'GE', 'PAR_OPEN', 'PAR_CLOSE', + 'DOES', 'COLON', 'SQUARE_OPEN', 'SQUARE_CLOSE', + 'TERMINAL', 'VALUE', 'COMMA', 'CONTAINS', + 'WITH', 'IS_NOT', 'error', 'expr', + 'cmdline', 'terminal', 'operator', 'value', + 'notop', 'valuelist', + ); + + /** + * For tracing reduce actions, the names of all rules are required. + * @var array + */ + static public $yyRuleName = array( + /* 0 */ "cmdline ::= expr", + /* 1 */ "expr ::= expr OPAND expr", + /* 2 */ "expr ::= expr OPOR expr", + /* 3 */ "expr ::= NOT expr", + /* 4 */ "expr ::= PAR_OPEN expr PAR_CLOSE", + /* 5 */ "expr ::= terminal operator value", + /* 6 */ "expr ::= terminal notop BETWEEN value OPAND value", + /* 7 */ "expr ::= terminal notop LIKE value", + /* 8 */ "expr ::= terminal IS notop value", + /* 9 */ "expr ::= terminal DOES notop CONTAIN value", + /* 10 */ "expr ::= terminal COLON value", + /* 11 */ "notop ::=", + /* 12 */ "notop ::= NOT", + /* 13 */ "terminal ::= SQUARE_OPEN value SQUARE_CLOSE SQUARE_OPEN value SQUARE_CLOSE", + /* 14 */ "terminal ::= TERMINAL", + /* 15 */ "value ::= VALUE", + /* 16 */ "value ::= PAR_OPEN valuelist PAR_CLOSE", + /* 17 */ "valuelist ::= VALUE COMMA valuelist", + /* 18 */ "valuelist ::= VALUE", + /* 19 */ "operator ::= CONTAINS", + /* 20 */ "operator ::= LT", + /* 21 */ "operator ::= GT", + /* 22 */ "operator ::= LE", + /* 23 */ "operator ::= GE", + /* 24 */ "operator ::= START WITH", + /* 25 */ "operator ::= END WITH", + /* 26 */ "operator ::= IS_NOT", + ); + + /** + * This function returns the symbolic name associated with a token + * value. + * @param int + * @return string + */ + function tokenName($tokenType) + { + if ($tokenType === 0) { + return 'End of Input'; + } + if ($tokenType > 0 && $tokenType < count(self::$yyTokenName)) { + return self::$yyTokenName[$tokenType]; + } else { + return "Unknown"; + } + } + + /** + * The following function deletes the value associated with a + * symbol. The symbol can be either a terminal or nonterminal. + * @param int the symbol code + * @param mixed the symbol's value + */ + static function yy_destructor($yymajor, $yypminor) + { + switch ($yymajor) { + /* Here is inserted the actions which take place when a + ** terminal or non-terminal is destroyed. This can happen + ** when the symbol is popped from the stack during a + ** reduce or during error processing or when a parser is + ** being destroyed before it is finished parsing. + ** + ** Note: during a reduce, the only symbols destroyed are those + ** which appear on the RHS of the rule, but which are not used + ** inside the C code. + */ + default: break; /* If no destructor action specified: do nothing */ + } + } + + /** + * Pop the parser's stack once. + * + * If there is a destructor routine associated with the token which + * is popped from the stack, then call it. + * + * Return the major token number for the symbol popped. + * @param SearchCommandParseryyParser + * @return int + */ + function yy_pop_parser_stack() + { + if (!count($this->yystack)) { + return; + } + $yytos = array_pop($this->yystack); + if (self::$yyTraceFILE && $this->yyidx >= 0) { + fwrite(self::$yyTraceFILE, + self::$yyTracePrompt . 'Popping ' . self::$yyTokenName[$yytos->major] . + "\n"); + } + $yymajor = $yytos->major; + self::yy_destructor($yymajor, $yytos->minor); + $this->yyidx--; + return $yymajor; + } + + /** + * Deallocate and destroy a parser. Destructors are all called for + * all stack elements before shutting the parser down. + */ + function __destruct() + { + while ($this->yyidx >= 0) { + $this->yy_pop_parser_stack(); + } + if (is_resource(self::$yyTraceFILE)) { + fclose(self::$yyTraceFILE); + } + } + + /** + * Based on the current state and parser stack, get a list of all + * possible lookahead tokens + * @param int + * @return array + */ + function yy_get_expected_tokens($token) + { + $state = $this->yystack[$this->yyidx]->stateno; + $expected = self::$yyExpectedTokens[$state]; + if (in_array($token, self::$yyExpectedTokens[$state], true)) { + return $expected; + } + $stack = $this->yystack; + $yyidx = $this->yyidx; + do { + $yyact = $this->yy_find_shift_action($token); + if ($yyact >= self::YYNSTATE && $yyact < self::YYNSTATE + self::YYNRULE) { + // reduce action + $done = 0; + do { + if ($done++ == 100) { + $this->yyidx = $yyidx; + $this->yystack = $stack; + // too much recursion prevents proper detection + // so give up + return array_unique($expected); + } + $yyruleno = $yyact - self::YYNSTATE; + $this->yyidx -= self::$yyRuleInfo[$yyruleno]['rhs']; + $nextstate = $this->yy_find_reduce_action( + $this->yystack[$this->yyidx]->stateno, + self::$yyRuleInfo[$yyruleno]['lhs']); + if (isset(self::$yyExpectedTokens[$nextstate])) { + $expected += self::$yyExpectedTokens[$nextstate]; + if (in_array($token, + self::$yyExpectedTokens[$nextstate], true)) { + $this->yyidx = $yyidx; + $this->yystack = $stack; + return array_unique($expected); + } + } + if ($nextstate < self::YYNSTATE) { + // we need to shift a non-terminal + $this->yyidx++; + $x = new SearchCommandParseryyStackEntry; + $x->stateno = $nextstate; + $x->major = self::$yyRuleInfo[$yyruleno]['lhs']; + $this->yystack[$this->yyidx] = $x; + continue 2; + } elseif ($nextstate == self::YYNSTATE + self::YYNRULE + 1) { + $this->yyidx = $yyidx; + $this->yystack = $stack; + // the last token was just ignored, we can't accept + // by ignoring input, this is in essence ignoring a + // syntax error! + return array_unique($expected); + } elseif ($nextstate === self::YY_NO_ACTION) { + $this->yyidx = $yyidx; + $this->yystack = $stack; + // input accepted, but not shifted (I guess) + return $expected; + } else { + $yyact = $nextstate; + } + } while (true); + } + break; + } while (true); + return array_unique($expected); + } + + /** + * Based on the parser state and current parser stack, determine whether + * the lookahead token is possible. + * + * The parser will convert the token value to an error token if not. This + * catches some unusual edge cases where the parser would fail. + * @param int + * @return bool + */ + function yy_is_expected_token($token) + { + if ($token === 0) { + return true; // 0 is not part of this + } + $state = $this->yystack[$this->yyidx]->stateno; + if (in_array($token, self::$yyExpectedTokens[$state], true)) { + return true; + } + $stack = $this->yystack; + $yyidx = $this->yyidx; + do { + $yyact = $this->yy_find_shift_action($token); + if ($yyact >= self::YYNSTATE && $yyact < self::YYNSTATE + self::YYNRULE) { + // reduce action + $done = 0; + do { + if ($done++ == 100) { + $this->yyidx = $yyidx; + $this->yystack = $stack; + // too much recursion prevents proper detection + // so give up + return true; + } + $yyruleno = $yyact - self::YYNSTATE; + $this->yyidx -= self::$yyRuleInfo[$yyruleno]['rhs']; + $nextstate = $this->yy_find_reduce_action( + $this->yystack[$this->yyidx]->stateno, + self::$yyRuleInfo[$yyruleno]['lhs']); + if (isset(self::$yyExpectedTokens[$nextstate]) && + in_array($token, self::$yyExpectedTokens[$nextstate], true)) { + $this->yyidx = $yyidx; + $this->yystack = $stack; + return true; + } + if ($nextstate < self::YYNSTATE) { + // we need to shift a non-terminal + $this->yyidx++; + $x = new SearchCommandParseryyStackEntry; + $x->stateno = $nextstate; + $x->major = self::$yyRuleInfo[$yyruleno]['lhs']; + $this->yystack[$this->yyidx] = $x; + continue 2; + } elseif ($nextstate == self::YYNSTATE + self::YYNRULE + 1) { + $this->yyidx = $yyidx; + $this->yystack = $stack; + if (!$token) { + // end of input: this is valid + return true; + } + // the last token was just ignored, we can't accept + // by ignoring input, this is in essence ignoring a + // syntax error! + return false; + } elseif ($nextstate === self::YY_NO_ACTION) { + $this->yyidx = $yyidx; + $this->yystack = $stack; + // input accepted, but not shifted (I guess) + return true; + } else { + $yyact = $nextstate; + } + } while (true); + } + break; + } while (true); + $this->yyidx = $yyidx; + $this->yystack = $stack; + return true; + } + + /** + * Find the appropriate action for a parser given the terminal + * look-ahead token iLookAhead. + * + * If the look-ahead token is YYNOCODE, then check to see if the action is + * independent of the look-ahead. If it is, return the action, otherwise + * return YY_NO_ACTION. + * @param int The look-ahead token + */ + function yy_find_shift_action($iLookAhead) + { + $stateno = $this->yystack[$this->yyidx]->stateno; + + /* if ($this->yyidx < 0) return self::YY_NO_ACTION; */ + if (!isset(self::$yy_shift_ofst[$stateno])) { + // no shift actions + return self::$yy_default[$stateno]; + } + $i = self::$yy_shift_ofst[$stateno]; + if ($i === self::YY_SHIFT_USE_DFLT) { + return self::$yy_default[$stateno]; + } + if ($iLookAhead == self::YYNOCODE) { + return self::YY_NO_ACTION; + } + $i += $iLookAhead; + if ($i < 0 || $i >= self::YY_SZ_ACTTAB || + self::$yy_lookahead[$i] != $iLookAhead) { + if (count(self::$yyFallback) && $iLookAhead < count(self::$yyFallback) + && ($iFallback = self::$yyFallback[$iLookAhead]) != 0) { + if (self::$yyTraceFILE) { + fwrite(self::$yyTraceFILE, self::$yyTracePrompt . "FALLBACK " . + self::$yyTokenName[$iLookAhead] . " => " . + self::$yyTokenName[$iFallback] . "\n"); + } + return $this->yy_find_shift_action($iFallback); + } + return self::$yy_default[$stateno]; + } else { + return self::$yy_action[$i]; + } + } + + /** + * Find the appropriate action for a parser given the non-terminal + * look-ahead token $iLookAhead. + * + * If the look-ahead token is self::YYNOCODE, then check to see if the action is + * independent of the look-ahead. If it is, return the action, otherwise + * return self::YY_NO_ACTION. + * @param int Current state number + * @param int The look-ahead token + */ + function yy_find_reduce_action($stateno, $iLookAhead) + { + /* $stateno = $this->yystack[$this->yyidx]->stateno; */ + + if (!isset(self::$yy_reduce_ofst[$stateno])) { + return self::$yy_default[$stateno]; + } + $i = self::$yy_reduce_ofst[$stateno]; + if ($i == self::YY_REDUCE_USE_DFLT) { + return self::$yy_default[$stateno]; + } + if ($iLookAhead == self::YYNOCODE) { + return self::YY_NO_ACTION; + } + $i += $iLookAhead; + if ($i < 0 || $i >= self::YY_SZ_ACTTAB || + self::$yy_lookahead[$i] != $iLookAhead) { + return self::$yy_default[$stateno]; + } else { + return self::$yy_action[$i]; + } + } + + /** + * Perform a shift action. + * @param int The new state to shift in + * @param int The major token to shift in + * @param mixed the minor token to shift in + */ + function yy_shift($yyNewState, $yyMajor, $yypMinor) + { + $this->yyidx++; + if ($this->yyidx >= self::YYSTACKDEPTH) { + $this->yyidx--; + if (self::$yyTraceFILE) { + fprintf(self::$yyTraceFILE, "%sStack Overflow!\n", self::$yyTracePrompt); + } + while ($this->yyidx >= 0) { + $this->yy_pop_parser_stack(); + } + /* Here code is inserted which will execute if the parser + ** stack ever overflows */ + return; + } + $yytos = new SearchCommandParseryyStackEntry; + $yytos->stateno = $yyNewState; + $yytos->major = $yyMajor; + $yytos->minor = $yypMinor; + array_push($this->yystack, $yytos); + if (self::$yyTraceFILE && $this->yyidx > 0) { + fprintf(self::$yyTraceFILE, "%sShift %d\n", self::$yyTracePrompt, + $yyNewState); + fprintf(self::$yyTraceFILE, "%sStack:", self::$yyTracePrompt); + for($i = 1; $i <= $this->yyidx; $i++) { + fprintf(self::$yyTraceFILE, " %s", + self::$yyTokenName[$this->yystack[$i]->major]); + } + fwrite(self::$yyTraceFILE,"\n"); + } + } + + /** + * The following table contains information about every rule that + * is used during the reduce. + * + *
+     * array(
+     *  array(
+     *   int $lhs;         Symbol on the left-hand side of the rule
+     *   int $nrhs;     Number of right-hand side symbols in the rule
+     *  ),...
+     * );
+     * 
+ */ + static public $yyRuleInfo = array( + array( 'lhs' => 28, 'rhs' => 1 ), + array( 'lhs' => 27, 'rhs' => 3 ), + array( 'lhs' => 27, 'rhs' => 3 ), + array( 'lhs' => 27, 'rhs' => 2 ), + array( 'lhs' => 27, 'rhs' => 3 ), + array( 'lhs' => 27, 'rhs' => 3 ), + array( 'lhs' => 27, 'rhs' => 6 ), + array( 'lhs' => 27, 'rhs' => 4 ), + array( 'lhs' => 27, 'rhs' => 4 ), + array( 'lhs' => 27, 'rhs' => 5 ), + array( 'lhs' => 27, 'rhs' => 3 ), + array( 'lhs' => 32, 'rhs' => 0 ), + array( 'lhs' => 32, 'rhs' => 1 ), + array( 'lhs' => 29, 'rhs' => 6 ), + array( 'lhs' => 29, 'rhs' => 1 ), + array( 'lhs' => 31, 'rhs' => 1 ), + array( 'lhs' => 31, 'rhs' => 3 ), + array( 'lhs' => 33, 'rhs' => 3 ), + array( 'lhs' => 33, 'rhs' => 1 ), + array( 'lhs' => 30, 'rhs' => 1 ), + array( 'lhs' => 30, 'rhs' => 1 ), + array( 'lhs' => 30, 'rhs' => 1 ), + array( 'lhs' => 30, 'rhs' => 1 ), + array( 'lhs' => 30, 'rhs' => 1 ), + array( 'lhs' => 30, 'rhs' => 2 ), + array( 'lhs' => 30, 'rhs' => 2 ), + array( 'lhs' => 30, 'rhs' => 1 ), + ); + + /** + * The following table contains a mapping of reduce action to method name + * that handles the reduction. + * + * If a rule is not set, it has no handler. + */ + static public $yyReduceMap = array( + 0 => 0, + 1 => 1, + 2 => 2, + 3 => 3, + 4 => 4, + 16 => 4, + 5 => 5, + 6 => 6, + 7 => 7, + 8 => 8, + 9 => 9, + 10 => 10, + 11 => 11, + 12 => 12, + 13 => 13, + 14 => 14, + 15 => 15, + 17 => 17, + 18 => 18, + 19 => 19, + 20 => 20, + 21 => 21, + 22 => 22, + 23 => 23, + 24 => 24, + 25 => 25, + 26 => 26, + ); + /* Beginning here are the reduction cases. A typical example + ** follows: + ** #line + ** function yy_r0($yymsp){ ... } // User supplied code + ** #line + */ +#line 53 "SearchCommandParser.y" + function yy_r0(){ + $this->expr_result = $this->yystack[$this->yyidx + 0]->minor; + } +#line 900 "SearchCommandParser.php" +#line 58 "SearchCommandParser.y" + function yy_r1(){ + $this->_retvalue = new OpExpr($this->yystack[$this->yyidx + -2]->minor, ExprOp::OP_AND, $this->yystack[$this->yyidx + 0]->minor); + } +#line 905 "SearchCommandParser.php" +#line 63 "SearchCommandParser.y" + function yy_r2(){ + $this->_retvalue = new OpExpr($this->yystack[$this->yyidx + -2]->minor, ExprOp::OP_OR, $this->yystack[$this->yyidx + 0]->minor); + } +#line 910 "SearchCommandParser.php" +#line 68 "SearchCommandParser.y" + function yy_r3(){ + $expr = $this->yystack[$this->yyidx + 0]->minor; + $expr->not(!$expr->not()); + $this->_retvalue = $expr; + } +#line 917 "SearchCommandParser.php" +#line 75 "SearchCommandParser.y" + function yy_r4(){ + $this->_retvalue = $this->yystack[$this->yyidx + -1]->minor; + } +#line 922 "SearchCommandParser.php" +#line 80 "SearchCommandParser.y" + function yy_r5(){ + $op = $this->yystack[$this->yyidx + -1]->minor; + $not = false; + if ($op == ExprOp::IS_NOT) + { + $op = ExprOp::IS; + $not = true; + } + + $fld = new OpExpr($this->yystack[$this->yyidx + -2]->minor, $op, $this->yystack[$this->yyidx + 0]->minor); + $fld->not($not); + $this->_retvalue = $fld; + } +#line 937 "SearchCommandParser.php" +#line 95 "SearchCommandParser.y" + function yy_r6(){ + $expr = new OpExpr($this->yystack[$this->yyidx + -5]->minor, ExprOp::BETWEEN, new BetweenValueExpr($this->yystack[$this->yyidx + -2]->minor, $this->yystack[$this->yyidx + 0]->minor)); + $expr->not($this->yystack[$this->yyidx + -4]->minor); + $this->_retvalue=$expr; + } +#line 944 "SearchCommandParser.php" +#line 102 "SearchCommandParser.y" + function yy_r7(){ + $expr = new OpExpr($this->yystack[$this->yyidx + -3]->minor, ExprOp::LIKE, $this->yystack[$this->yyidx + 0]->minor); + $expr->not($this->yystack[$this->yyidx + -2]->minor); + $this->_retvalue=$expr; + } +#line 951 "SearchCommandParser.php" +#line 109 "SearchCommandParser.y" + function yy_r8(){ + $expr = new OpExpr($this->yystack[$this->yyidx + -3]->minor, ExprOp::IS, $this->yystack[$this->yyidx + 0]->minor); + $expr->not($this->yystack[$this->yyidx + -1]->minor); + $this->_retvalue=$expr; + } +#line 958 "SearchCommandParser.php" +#line 116 "SearchCommandParser.y" + function yy_r9(){ + $expr = new OpExpr($this->yystack[$this->yyidx + -4]->minor, ExprOp::CONTAINS, $this->yystack[$this->yyidx + 0]->minor); + $expr->not($this->yystack[$this->yyidx + -2]->minor); + $this->_retvalue=$expr; + } +#line 965 "SearchCommandParser.php" +#line 123 "SearchCommandParser.y" + function yy_r10(){ + $this->_retvalue = new OpExpr($this->yystack[$this->yyidx + -2]->minor, ExprOp::CONTAINS, $this->yystack[$this->yyidx + 0]->minor); + } +#line 970 "SearchCommandParser.php" +#line 129 "SearchCommandParser.y" + function yy_r11(){ + $this->_retvalue = false; + } +#line 975 "SearchCommandParser.php" +#line 134 "SearchCommandParser.y" + function yy_r12(){ + $this->_retvalue = true; + } +#line 980 "SearchCommandParser.php" +#line 139 "SearchCommandParser.y" + function yy_r13(){ + $registry = ExprFieldRegistry::getRegistry(); + $field = $registry->resolveMetadataField($this->yystack[$this->yyidx + -4]->minor, $this->yystack[$this->yyidx + -1]->minor); + $this->_retvalue = $field; + } +#line 987 "SearchCommandParser.php" +#line 146 "SearchCommandParser.y" + function yy_r14(){ + $registry = ExprFieldRegistry::getRegistry(); + $field=$registry->resolveAlias($this->yystack[$this->yyidx + 0]->minor); + $this->_retvalue = $field; + } +#line 994 "SearchCommandParser.php" +#line 153 "SearchCommandParser.y" + function yy_r15(){ + $this->_retvalue = $this->yystack[$this->yyidx + 0]->minor; + } +#line 999 "SearchCommandParser.php" +#line 163 "SearchCommandParser.y" + function yy_r17(){ + $this->yystack[$this->yyidx + 0]->minor->addValue($this->yystack[$this->yyidx + -2]->minor); + $this->_retvalue = $this->yystack[$this->yyidx + 0]->minor; + } +#line 1005 "SearchCommandParser.php" +#line 169 "SearchCommandParser.y" + function yy_r18(){ + $this->_retvalue = new ValueListExpr($this->yystack[$this->yyidx + 0]->minor); + } +#line 1010 "SearchCommandParser.php" +#line 174 "SearchCommandParser.y" + function yy_r19(){ + $this->_retvalue = ExprOp::CONTAINS; + } +#line 1015 "SearchCommandParser.php" +#line 179 "SearchCommandParser.y" + function yy_r20(){ + $this->_retvalue = ExprOp::LESS_THAN; + } +#line 1020 "SearchCommandParser.php" +#line 184 "SearchCommandParser.y" + function yy_r21(){ + $this->_retvalue = ExprOp::GREATER_THAN; + } +#line 1025 "SearchCommandParser.php" +#line 189 "SearchCommandParser.y" + function yy_r22(){ + $this->_retvalue = ExprOp::LESS_THAN_EQUAL; + } +#line 1030 "SearchCommandParser.php" +#line 194 "SearchCommandParser.y" + function yy_r23(){ + $this->_retvalue = ExprOp::GREATER_THAN_EQUAL; + } +#line 1035 "SearchCommandParser.php" +#line 199 "SearchCommandParser.y" + function yy_r24(){ + $this->_retvalue = ExprOp::STARTS_WITH; + } +#line 1040 "SearchCommandParser.php" +#line 204 "SearchCommandParser.y" + function yy_r25(){ + $this->_retvalue = ExprOp::ENDS_WITH; + } +#line 1045 "SearchCommandParser.php" +#line 209 "SearchCommandParser.y" + function yy_r26(){ + $this->_retvalue = ExprOp::IS_NOT; + } +#line 1050 "SearchCommandParser.php" + + /** + * placeholder for the left hand side in a reduce operation. + * + * For a parser with a rule like this: + *
+     * rule(A) ::= B. { A = 1; }
+     * 
+ * + * The parser will translate to something like: + * + * + * function yy_r0(){$this->_retvalue = 1;} + * + */ + private $_retvalue; + + /** + * Perform a reduce action and the shift that must immediately + * follow the reduce. + * + * For a rule such as: + * + *
+     * A ::= B blah C. { dosomething(); }
+     * 
+ * + * This function will first call the action, if any, ("dosomething();" in our + * example), and then it will pop three states from the stack, + * one for each entry on the right-hand side of the expression + * (B, blah, and C in our example rule), and then push the result of the action + * back on to the stack with the resulting state reduced to (as described in the .out + * file) + * @param int Number of the rule by which to reduce + */ + function yy_reduce($yyruleno) + { + //int $yygoto; /* The next state */ + //int $yyact; /* The next action */ + //mixed $yygotominor; /* The LHS of the rule reduced */ + //SearchCommandParseryyStackEntry $yymsp; /* The top of the parser's stack */ + //int $yysize; /* Amount to pop the stack */ + $yymsp = $this->yystack[$this->yyidx]; + if (self::$yyTraceFILE && $yyruleno >= 0 + && $yyruleno < count(self::$yyRuleName)) { + fprintf(self::$yyTraceFILE, "%sReduce (%d) [%s].\n", + self::$yyTracePrompt, $yyruleno, + self::$yyRuleName[$yyruleno]); + } + + $this->_retvalue = $yy_lefthand_side = null; + if (array_key_exists($yyruleno, self::$yyReduceMap)) { + // call the action + $this->_retvalue = null; + $this->{'yy_r' . self::$yyReduceMap[$yyruleno]}(); + $yy_lefthand_side = $this->_retvalue; + } + $yygoto = self::$yyRuleInfo[$yyruleno]['lhs']; + $yysize = self::$yyRuleInfo[$yyruleno]['rhs']; + $this->yyidx -= $yysize; + for($i = $yysize; $i; $i--) { + // pop all of the right-hand side parameters + array_pop($this->yystack); + } + $yyact = $this->yy_find_reduce_action($this->yystack[$this->yyidx]->stateno, $yygoto); + if ($yyact < self::YYNSTATE) { + /* If we are not debugging and the reduce action popped at least + ** one element off the stack, then we can push the new element back + ** onto the stack here, and skip the stack overflow test in yy_shift(). + ** That gives a significant speed improvement. */ + if (!self::$yyTraceFILE && $yysize) { + $this->yyidx++; + $x = new SearchCommandParseryyStackEntry; + $x->stateno = $yyact; + $x->major = $yygoto; + $x->minor = $yy_lefthand_side; + $this->yystack[$this->yyidx] = $x; + } else { + $this->yy_shift($yyact, $yygoto, $yy_lefthand_side); + } + } elseif ($yyact == self::YYNSTATE + self::YYNRULE + 1) { + $this->yy_accept(); + } + } + + /** + * The following code executes when the parse fails + * + * Code from %parse_fail is inserted here + */ + function yy_parse_failed() + { + if (self::$yyTraceFILE) { + fprintf(self::$yyTraceFILE, "%sFail!\n", self::$yyTracePrompt); + } + while ($this->yyidx >= 0) { + $this->yy_pop_parser_stack(); + } + /* Here code is inserted which will be executed whenever the + ** parser fails */ +#line 46 "SearchCommandParser.y" + + $this->parse_result = 'syntax'; +#line 1155 "SearchCommandParser.php" + } + + /** + * The following code executes when a syntax error first occurs. + * + * %syntax_error code is inserted here + * @param int The major type of the error token + * @param mixed The minor type of the error token + */ + function yy_syntax_error($yymajor, $TOKEN) + { +#line 35 "SearchCommandParser.y" + + $this->parse_result = 'syntax'; + $this->parse_message = ""; +#line 1172 "SearchCommandParser.php" + } + + /** + * The following is executed when the parser accepts + * + * %parse_accept code is inserted here + */ + function yy_accept() + { + if (self::$yyTraceFILE) { + fprintf(self::$yyTraceFILE, "%sAccept!\n", self::$yyTracePrompt); + } + while ($this->yyidx >= 0) { + $stack = $this->yy_pop_parser_stack(); + } + /* Here code is inserted which will be executed whenever the + ** parser accepts */ +#line 41 "SearchCommandParser.y" + + $this->parse_result = 'ok'; +#line 1194 "SearchCommandParser.php" + } + + /** + * The main parser program. + * + * The first argument is the major token number. The second is + * the token value string as scanned from the input. + * + * @param int the token number + * @param mixed the token value + * @param mixed any extra arguments that should be passed to handlers + */ + function doParse($yymajor, $yytokenvalue) + { +// $yyact; /* The parser action. */ +// $yyendofinput; /* True if we are at the end of input */ + $yyerrorhit = 0; /* True if yymajor has invoked an error */ + + /* (re)initialize the parser, if necessary */ + if ($this->yyidx === null || $this->yyidx < 0) { + /* if ($yymajor == 0) return; // not sure why this was here... */ + $this->yyidx = 0; + $this->yyerrcnt = -1; + $x = new SearchCommandParseryyStackEntry; + $x->stateno = 0; + $x->major = 0; + $this->yystack = array(); + array_push($this->yystack, $x); + } + $yyendofinput = ($yymajor==0); + + if (self::$yyTraceFILE) { + fprintf(self::$yyTraceFILE, "%sInput %s\n", + self::$yyTracePrompt, self::$yyTokenName[$yymajor]); + } + + do { + $yyact = $this->yy_find_shift_action($yymajor); + if ($yymajor < self::YYERRORSYMBOL && + !$this->yy_is_expected_token($yymajor)) { + // force a syntax error + $yyact = self::YY_ERROR_ACTION; + } + if ($yyact < self::YYNSTATE) { + $this->yy_shift($yyact, $yymajor, $yytokenvalue); + $this->yyerrcnt--; + if ($yyendofinput && $this->yyidx >= 0) { + $yymajor = 0; + } else { + $yymajor = self::YYNOCODE; + } + } elseif ($yyact < self::YYNSTATE + self::YYNRULE) { + $this->yy_reduce($yyact - self::YYNSTATE); + } elseif ($yyact == self::YY_ERROR_ACTION) { + if (self::$yyTraceFILE) { + fprintf(self::$yyTraceFILE, "%sSyntax Error!\n", + self::$yyTracePrompt); + } + if (self::YYERRORSYMBOL) { + /* A syntax error has occurred. + ** The response to an error depends upon whether or not the + ** grammar defines an error token "ERROR". + ** + ** This is what we do if the grammar does define ERROR: + ** + ** * Call the %syntax_error function. + ** + ** * Begin popping the stack until we enter a state where + ** it is legal to shift the error symbol, then shift + ** the error symbol. + ** + ** * Set the error count to three. + ** + ** * Begin accepting and shifting new tokens. No new error + ** processing will occur until three tokens have been + ** shifted successfully. + ** + */ + if ($this->yyerrcnt < 0) { + $this->yy_syntax_error($yymajor, $yytokenvalue); + } + $yymx = $this->yystack[$this->yyidx]->major; + if ($yymx == self::YYERRORSYMBOL || $yyerrorhit ){ + if (self::$yyTraceFILE) { + fprintf(self::$yyTraceFILE, "%sDiscard input token %s\n", + self::$yyTracePrompt, self::$yyTokenName[$yymajor]); + } + $this->yy_destructor($yymajor, $yytokenvalue); + $yymajor = self::YYNOCODE; + } else { + while ($this->yyidx >= 0 && + $yymx != self::YYERRORSYMBOL && + ($yyact = $this->yy_find_shift_action(self::YYERRORSYMBOL)) >= self::YYNSTATE + ){ + $this->yy_pop_parser_stack(); + } + if ($this->yyidx < 0 || $yymajor==0) { + $this->yy_destructor($yymajor, $yytokenvalue); + $this->yy_parse_failed(); + $yymajor = self::YYNOCODE; + } elseif ($yymx != self::YYERRORSYMBOL) { + $u2 = 0; + $this->yy_shift($yyact, self::YYERRORSYMBOL, $u2); + } + } + $this->yyerrcnt = 3; + $yyerrorhit = 1; + } else { + /* YYERRORSYMBOL is not defined */ + /* This is what we do if the grammar does not define ERROR: + ** + ** * Report an error message, and throw away the input token. + ** + ** * If the input token is $, then fail the parse. + ** + ** As before, subsequent error messages are suppressed until + ** three input tokens have been successfully shifted. + */ + if ($this->yyerrcnt <= 0) { + $this->yy_syntax_error($yymajor, $yytokenvalue); + } + $this->yyerrcnt = 3; + $this->yy_destructor($yymajor, $yytokenvalue); + if ($yyendofinput) { + $this->yy_parse_failed(); + } + $yymajor = self::YYNOCODE; + } + } else { + $this->yy_accept(); + $yymajor = self::YYNOCODE; + } + } while ($yymajor != self::YYNOCODE && $this->yyidx >= 0); + } +} \ No newline at end of file diff --git a/search2/search/SearchCommandParser.y b/search2/search/SearchCommandParser.y new file mode 100755 index 0000000..8b1f4d7 --- /dev/null +++ b/search2/search/SearchCommandParser.y @@ -0,0 +1,211 @@ +%name SearchCommandParser +%declare_class {class SearchCommandParser} + +%include_class { + + private $expr_result; + private $parse_result; + + public function __construct() + { + $this->parse_result = 'ok'; + } + + public function getExprResult() + { + return $this->expr_result; + } + + public function isExprOk() + { + return $this->parse_result == 'ok'; + } + +} + +%type expr {Expr} + +%left OPOR. +%left OPAND. +%right NOT. +%left IS CONTAIN LIKE BETWEEN START END. +%left GT LE LT GE. + +%syntax_error +{ + $this->parse_result = 'syntax'; + $this->parse_message = ""; +} + +%parse_accept +{ + $this->parse_result = 'ok'; +} + +%parse_failure +{ + $this->parse_result = 'syntax'; +} + +%start_symbol cmdline + +cmdline ::= expr(A). +{ + $this->expr_result = A; +} + +expr(A) ::= expr(B) OPAND expr(C). +{ + A = new OpExpr(B, ExprOp::OP_AND, C); +} + +expr(A) ::= expr(B) OPOR expr(C). +{ + A = new OpExpr(B, ExprOp::OP_OR, C); +} + +expr(A) ::= NOT expr(B). +{ + $expr = B; + $expr->not(!$expr->not()); + A = $expr; +} + +expr(A) ::= PAR_OPEN expr(B) PAR_CLOSE. +{ + A = B; +} + +expr(A) ::= terminal(B) operator(C) value(D). +{ + $op = C; + $not = false; + if ($op == ExprOp::IS_NOT) + { + $op = ExprOp::IS; + $not = true; + } + + $fld = new OpExpr(B, $op, D); + $fld->not($not); + A = $fld; +} + +expr(A) ::= terminal(B) notop(C) BETWEEN value(D) OPAND value(E). [BETWEEN] +{ + $expr = new OpExpr(B, ExprOp::BETWEEN, new BetweenValueExpr(D, E)); + $expr->not(C); + A=$expr; +} + +expr(A) ::= terminal(B) notop(C) LIKE value(D). +{ + $expr = new OpExpr(B, ExprOp::LIKE, D); + $expr->not(C); + A=$expr; +} + +expr(A) ::= terminal(B) IS notop(C) value(D). +{ + $expr = new OpExpr(B, ExprOp::IS, D); + $expr->not(C); + A=$expr; +} + +expr(A) ::= terminal(B) DOES notop(C) CONTAIN value(D). +{ + $expr = new OpExpr(B, ExprOp::CONTAINS, D); + $expr->not(C); + A=$expr; +} + +expr(A) ::= terminal(B) COLON value(C). +{ + A = new OpExpr(B, ExprOp::CONTAINS, C); +} + + +notop(A) ::= . +{ + A = false; +} + +notop(A) ::= NOT. +{ + A = true; +} + +terminal(A) ::= SQUARE_OPEN value(B) SQUARE_CLOSE SQUARE_OPEN value(C) SQUARE_CLOSE. +{ + $registry = ExprFieldRegistry::getRegistry(); + $field = $registry->resolveMetadataField(B, C); + A = $field; +} + +terminal(A) ::= TERMINAL(B). +{ + $registry = ExprFieldRegistry::getRegistry(); + $field=$registry->resolveAlias(B); + A = $field; +} + +value(A) ::= VALUE(B). +{ + A = B; +} + +value(A) ::= PAR_OPEN valuelist(B) PAR_CLOSE. +{ + A = B; +} + +valuelist(A) ::= VALUE(B) COMMA valuelist(C). +{ + C->addValue(B); + A = C; +} + +valuelist(A) ::= VALUE(B). +{ + A = new ValueListExpr(B); +} + +operator(A) ::= CONTAINS. +{ + A = ExprOp::CONTAINS; +} + +operator(A) ::= LT. +{ + A = ExprOp::LESS_THAN; +} + +operator(A) ::= GT. +{ + A = ExprOp::GREATER_THAN; +} + +operator(A) ::= LE. +{ + A = ExprOp::LESS_THAN_EQUAL; +} + +operator(A) ::= GE. +{ + A = ExprOp::GREATER_THAN_EQUAL; +} + +operator(A) ::= START WITH. +{ + A = ExprOp::STARTS_WITH; +} + +operator(A) ::= END WITH. +{ + A = ExprOp::ENDS_WITH; +} + +operator(A) ::= IS_NOT. +{ + A = ExprOp::IS_NOT; +} diff --git a/search2/search/bin/cronSavedSearch.php b/search2/search/bin/cronSavedSearch.php new file mode 100644 index 0000000..11a2420 --- /dev/null +++ b/search2/search/bin/cronSavedSearch.php @@ -0,0 +1,21 @@ + \ No newline at end of file diff --git a/search2/search/expr.inc.php b/search2/search/expr.inc.php new file mode 100755 index 0000000..60c69dd --- /dev/null +++ b/search2/search/expr.inc.php @@ -0,0 +1,2182 @@ +dbfields=array(); + $sql = "SELECT groupname, itemname, ranking, type FROM search_ranking"; + $rs = DBUtil::getResultArray($sql); + foreach($rs as $item) + { + switch ($item['type']) + { + case 'T': + $this->db[$item['groupname']][$item['itemname']] = $item['ranking']+0; + break; + case 'M': + $this->metadata[$item['groupname']][$item['itemname']] = $item['ranking']+0; + break; + case 'S': + switch($item['groupname']) + { + case 'Discussion': + $this->discussion = $item['ranking']+0; + break; + case 'DocumentText': + $this->text = $item['ranking']+0; + break; + } + break; + } + } + } + + /** + * Enter description here... + * + * @return RankManager + */ + public static function get() + { + static $singleton = null; + if (is_null($singleton)) + { + $singleton = new RankManager(); + } + return $singleton; + } + + public function scoreField($groupname, $type='T', $itemname='') + { + switch($type) + { + case 'T': + return $this->db[$groupname][$itemname]; + case 'M': + return $this->metadata[$groupname][$itemname]; + case 'S': + switch($groupname) + { + case 'Discussion': + return $this->discussion; + case 'DocumentText': + return $this->text; + default: + return 0; + } + default: + return 0; + } + } +} + + +class Expr +{ + /** + * The parent expression + * + * @var Expr + */ + protected $parent; + + protected static $node_id = 0; + + protected $expr_id; + + public function __construct() + { + $this->expr_id = Expr::$node_id++; + } + + public function getExprId() + { + return $this->expr_id; + } + + /** + * Coverts the expression to a string + * + * @return string + */ + public function __toString() + { + throw new Exception('Not yet implemented in ' . get_class($this)); + } + + /** + * Reference to the parent expression + * + * @return Expr + */ + public function &getParent() + { + return $this->parent; + } + + /** + * Sets the parent expiression + * + * @param Expr $parent + */ + public function setParent(&$parent) + { + $this->parent = &$parent; + } + + /** + * Is the expression valid + * + * @return boolean + */ + public function is_valid() + { + return true; + } + + public function isExpr() + { + return $this instanceof OpExpr; + } + + public function isOpExpr() + { + return $this instanceof OpExpr; + } + public function isValueExpr() + { + return $this instanceof ValueExpr; + } + public function isValueListExpr() + { + return $this instanceof ValueListExpr; + } + + public function isDbExpr() + { + return $this instanceof DBFieldExpr; + } + + public function isFieldExpr() + { + return $this instanceof FieldExpr; + } + + public function isSearchableText() + { + return $this instanceof SearchableText ; + } + + public function isMetadataField() + { + return $this instanceof MetadataField; + } + + + + + + public function toViz(&$str, $phase) + { + throw new Exception('To be implemented' . get_class($this)); + } + + public function toVizGraph($options=array()) + { + $str = "digraph tree {\n"; + if (isset($options['left-to-right']) && $options['left-to-right']) + { + $str .= "rankdir=LR\n"; + } + + $this->toViz($str, 0); + $this->toViz($str, 1); + + $str .= "}\n"; + + if (isset($options['tofile'])) + { + $path=dirname($options['tofile']); + $filename=basename($options['tofile']); + $ext = pathinfo($filename, PATHINFO_EXTENSION); + $base = substr($filename, 0, -strlen($ext)-1); + + $dotfile="$path/$base.$ext"; + $jpgfile="$path/$base.jpg"; + $fp = fopen($dotfile,'wt'); + fwrite($fp, $str); + fclose($fp); + + system("dot -Tjpg -o$jpgfile $dotfile"); + + if (isset($options['view']) && $options['view']) + { + system("eog $jpgfile"); + } + } + + return $str; + } +} + +class FieldExpr extends Expr +{ + /** + * Name of the field + * + * @var string + */ + protected $field; + + protected $alias; + + protected $display; + + + /** + * Constructor for the field expression + * + * @param string $field + */ + public function __construct($field, $display=null) + { + parent::__construct(); + $this->field=$field; + if (is_null($display)) + { + $display=get_class($this); + } + $this->display = $display; + $this->setAlias(get_class($this)); + } + + public function setAlias($alias) + { + $this->alias=$alias; + } + + public function getDisplay() + { + return $this->display; + } + + public function getAlias() + { + return $this->alias; + } + + public function getFullName() + { + return $this->alias . '.' . $this->field; + } + + /** + * Returns the field + * + * @return string + */ + public function getField() + { + return $this->field; + } + + /** + * Coverts the expression to a string + * + * @return string + */ + public function __toString() + { + return $this->alias; + } + + public function toViz(&$str, $phase) + { + if ($phase == 0) + { + $expr_id = $this->getExprId(); + $str .= "struct$expr_id [style=rounded, label=\"$expr_id: FIELD[$this->alias]\"]\n"; + } + } + + public function rewrite(&$left, &$op, &$right, $not=false) + { + $input = $left->getInputRequirements(); + + if ($input['value']['type'] != FieldInputType::FULLTEXT) + { + return; + } + + + if ($right->isValueExpr()) + { + $value = $right->getValue(); + } + else + { + $value = $right; + } + + if (substr($value,0,1) != '\'' || substr($value,-1) != '\'') + { + OpExpr::rewriteString($left, $op, $right, $not); + } + else + { + $right = new ValueExpr(trim(substr($value,1,-1))); + } + } +} + +class DBFieldExpr extends FieldExpr +{ + /** + * The table the field is associated with + * + * @var string + */ + protected $table; + + protected $jointable; + protected $joinfield; + protected $matchfield; + protected $quotedvalue; + + + /** + * Constructor for the database field + * + * @param string $field + * @param string $table + */ + public function __construct($field, $table, $display=null) + { + if (is_null($display)) + { + $display = get_class($this); + } + + parent::__construct($field, $display); + + $this->table=$table; + $this->jointable = null; + $this->joinfield = null; + $this->matchfield = null; + $this->quotedvalue=true; + } + + /** + * Returns the table name + * + * @return string + */ + public function getTable() + { + return $this->table; + } + + public function joinTo($table, $field) + { + $this->jointable=$table; + $this->joinfield=$field; + } + public function matchField($field) + { + $this->matchfield = $field; + } + + public function modifyName($name) + { + return $name; + } + + public function modifyValue($value) + { + return $value; + } + + + public function getJoinTable() { return $this->jointable; } + public function getJoinField() { return $this->joinfield; } + public function getMatchingField() { return $this->matchfield; } + public function isValueQuoted($quotedvalue = null) + { + if (isset($quotedvalue)) + { + $this->quotedvalue = $quotedvalue; + } + return $this->quotedvalue; + } +} + +class MetadataField extends DBFieldExpr +{ + protected $fieldset; + protected $fieldid; + protected $fieldsetid; + + public function __construct($fieldset, $field, $fieldsetid, $fieldid) + { + parent::__construct($field, 'document_fields_link'); + $this->fieldset=$fieldset; + $this->fieldid=$fieldid; + $this->fieldsetid=$fieldsetid; + } + + public function getFieldSet() + { + return $this->fieldset; + } + + public function getFieldId() + { + return $this->fieldid; + } + + public function getFieldSetId() + { + return $this->fieldsetid; + } + + public function getInputRequirements() + { + return array('value'=>array('type'=>FieldInputType::TEXT)); + } + + /** + * Coverts the expression to a string + * + * @return string + */ + public function __toString() + { + return "METADATA[$this->fieldset][$this->field]"; + } + +} + +class SearchableText extends FieldExpr +{ +} + +class ValueExpr extends Expr +{ + /** + * The value + * + * @var mixed + */ + protected $value; + + /** + * Constructor for the value expression + * + * @param mixed $value + */ + public function __construct($value) + { + parent::__construct(); + $this->value=$value; + } + + public function getValue() + { + return $this->value; + } + + /** + * Converts the value to a string + * + * @return unknown + */ + public function __toString() + { + return (string) "\"$this->value\""; + } + + public function toViz(&$str, $phase) + { + if ($phase == 0) + { + $expr_id = $this->getExprId(); + $value = addslashes($this->value); + $str .= "struct$expr_id [style=ellipse, label=\"$expr_id: \\\"$value\\\"\"]\n"; + } + } + + public function getSQL($field, $fieldname, $op, $not=false) + { + $val = $field->modifyValue($this->getValue()); + $quote = ''; + if ($field->isValueQuoted()) + { + $val = addslashes($val); + $quote = '\''; + } + + switch($op) + { + case ExprOp::CONTAINS: + $sql = "$fieldname LIKE '%$val%'"; + break; + case ExprOp::STARTS_WITH: + $sql = "$fieldname LIKE '$val%'"; + break; + case ExprOp::ENDS_WITH: + $sql = "$fieldname LIKE '%$val'"; + break; + case ExprOp::IS: + $sql = "$fieldname = $quote$val$quote"; + break; + case ExprOp::GREATER_THAN : + $sql = "$fieldname > $quote$val$quote"; + break; + case ExprOp::GREATER_THAN_EQUAL : + $sql = "$fieldname >= $quote$val$quote"; + break; + case ExprOp::LESS_THAN : + $sql = "$fieldname < $quote$val$quote"; + break; + case ExprOp::LESS_THAN_EQUAL : + $sql = "$fieldname <= $quote$val$quote"; + break; + default: + throw new Exception('Unknown op: ' . $op); + } + + if ($not) + { + $sql = "not ($sql)"; + } + + return $sql; + } + +} + +class ValueListExpr extends Expr +{ + /** + * The value + * + * @var mixed + */ + protected $values; + + /** + * Constructor for the value expression + * + * @param mixed $value + */ + public function __construct($value) + { + parent::__construct($value); + $this->values=array($value); + } + + public function addValue($value) + { + $this->values[] = $value; + } + + + public function getValue($param=null) + { + if (!empty($param)) + { + return $this->values[$param]; + } + $str = ''; + + foreach($this->values as $value) + { + if ($str != '') $str .= ','; + $str .= "\"$value\""; + } + + return $str; + } + + /** + * Converts the value to a string + * + * @return unknown + */ + public function __toString() + { + return $this->getValue(); + } + + public function toViz(&$str, $phase) + { + if ($phase == 0) + { + $expr_id = $this->getExprId(); + + $str .= "struct$expr_id [style=ellipse, label=\"$expr_id: "; + $i=0; + foreach($this->values as $value) + { + if ($i++>0) $str .= ','; + $value = addslashes($value); + $str .= "\\\"$value\\\""; + } + $str .= "\"]\n"; + } + } + + + + public function rewrite(&$left, &$op, &$right, &$not) + { + if (count($this->values) == 1) + { + $right = new ValueExpr($this->values[0]); + return; + } + $newops = array(); + foreach($this->values as $value) + { + $classname = get_class($left); + $class = new $classname; + $newop = new OpExpr($class, $op, $value); + $newops[] = $newop; + } + + $result = $newops[0]; + for($i=1;$ileft(); + $op = $result->op(); + $right = $result->right(); + } + +} + + +class BetweenValueExpr extends ValueExpr +{ + protected $endvalue; + + public function __construct($start, $end) + { + parent::__construct($start); + $this->endvalue = $end; + } + + public function getStart() + { + return $this->getValue(); + } + + public function getEnd() + { + return $this->endvalue; + } + + /** + * Converts the value to a string + * + * @return unknown + */ + public function __toString() + { + return (string) $this->value . ' AND ' . $this->endvalue; + } + + public function toViz(&$str, $phase) + { + if ($phase == 0) + { + $value = addslashes($this->value); + $value2 = addslashes($this->endvalue); + + $expr_id = $this->getExprId(); + $str .= "struct$expr_id [style=rounded, label=\"$expr_id: $value AND $value2\"]\n"; + } + } + + public function getSQL($field, $fieldname, $op, $not=false) + { + if ($op != ExprOp::BETWEEN) + { + throw new Exception('Unexpected operator: ' . $op); + } + + $quote = ''; + + $start = $field->modifyValue($this->getStart()); + $end = $field->modifyValue($this->getEnd()); + + if ($field->isValueQuoted()) + { + $start = addslashes($start); + $end = addslashes($end); + $quote = '\''; + } + + + $not = $not?' NOT ':''; + return "$not ($fieldname $op $quote$start$quote AND $quote$end$quote) "; + } +} + +interface QueryBuilder +{ + function buildComplexQuery($expr); + + function buildSimpleQuery($op, $group); + + function getRanking($result); + + function getResultText($result); + +} + +class TextQueryBuilder implements QueryBuilder +{ + private $text; + private $query; + + public function buildComplexQuery($expr) + { + $left = $expr->left(); + $right = $expr->right(); + if (DefaultOpCollection::isBoolean($expr)) + { + $query = '(' . $this->buildComplexQuery($left) . ' ' . $expr->op() . ' ' . $this->buildComplexQuery($right) . ')'; + + if ($expr->not()) + { + $query = "NOT $query"; + } + } + else + { + $fieldname = $left->getField(); + $value = addslashes($right->getValue()); + + $not = $expr->not()?' NOT ':''; + + $query = "$not$fieldname: \"$value\""; + } + + return $query; + } + + public function buildSimpleQuery($op, $group) + { + $query = ''; + foreach($group as $expr) + { + if (!empty($query)) + { + $query .= " $op "; + } + + $left = $expr->left(); + $right = $expr->right(); + + $fieldname = $left->getField(); + $value = addslashes($right->getValue()); + + $not = $expr->not()?' NOT ':''; + + $query .= "$not$fieldname: \"$value\""; + } + + return $query; + } + + public function getRanking($result) + { + $init = $result->Rank; + $score=0; + $ranker = RankManager::get(); + $discussion = $result->Discussion; + if (!empty($discussion)) + { + $score += $init *$ranker->scoreField('Discussion', 'S'); + } + else + { + $score += $init *$ranker->scoreField('DocumentText', 'S'); + + } + return $score; + } + + public function setQuery($query) + { + $this->query = $query; + } + + private function extractText($word, $maxwords=40, $maxlen=512) + { + $offset=stripos($this->text, $word); + + if ($offset == false) + { + return array(false, false); + } + + $text = substr($this->text, 0 , $offset); + + $lastsentence = strrpos($text, '.'); + if (!$lastsentence) $lastsentence=0; + + if ($offset - $lastsentence > $maxlen) + { + $lastsentence = $offset - $maxlen; + } + + $text = substr($this->text, $lastsentence, $offset - $lastsentence); + + $wordoffset= strlen($text)-1; + $words = $maxwords; + while ($words > 0) + { + $text = substr($text, 0, $wordoffset); + $foundoffset = strrpos($text, ' '); + if ($foundoffset === false) + { + break; + } + $wordoffset = $foundoffset; + $words--; + } + + $startOffset = $lastsentence + $wordoffset; + + $nextsentence = strpos($this->text, '.', $offset); + + $words = $maxwords; + $endOffset = $offset; + while ($words > 0) + { + $foundoffset = strpos($this->text, ' ', $endOffset+1); + if ($foundoffset === false) + { + break; + } + if ($endOffset > $offset + $maxlen) + { + break; + } + if ($endOffset > $nextsentence) + { + $endOffset = $nextsentence-1; + break; + } + $endOffset = $foundoffset; + + $words--; + } + + return array($startOffset, substr($this->text, $startOffset, $endOffset - $startOffset + 1)); + } + + + public function getResultText($result) + { + $this->text = substr($result->Text,0,40960); + $words = array(); + $sentences = array(); + + preg_match_all('("[^"]*")',$this->query, $matches,PREG_OFFSET_CAPTURE); + + foreach($matches[0] as $word) + { + list($word,$offset) = $word; + $word = substr($word,1,-1); + $wordlen = strlen($word); + $res = $this->extractText($word); + list($sentenceOffset,$sentence) = $res; + + if ($sentenceOffset === false) + { + continue; + } + + if (array_key_exists($sentenceOffset, $sentences)) + { + $sentences[$sentenceOffset]['score']++; + } + else + { + $sentences[$sentenceOffset] = array( + 'sentence'=>$sentence, + 'score'=>1 + ); + } + + $sentence = $sentences[$sentenceOffset]['sentence']; + + preg_match_all("@$word@i",$sentence, $swords,PREG_OFFSET_CAPTURE); + foreach($swords[0] as $wordx) + { + list($wordx,$offset) = $wordx; + + $sentence = substr($sentence,0, $offset) . '' . substr($sentence, $offset, $wordlen) . '' . substr($sentence, $offset + $wordlen); + } + + $sentences[$sentenceOffset]['sentence'] = $sentence; + + $words[$word] = array( + 'sentence'=>$sentenceOffset + ); + } + + ksort($sentences); + $result = ''; + + foreach($sentences as $o=>$i) + { + if (!empty($result)) $result .= '   ...    '; + $result .= $i['sentence']; + } + + return $result; + } + +} + +class SQLQueryBuilder implements QueryBuilder +{ + private $used_tables; + private $aliases; + private $sql; + private $db; + private $metadata; + + public function __construct() + { + $this->used_tables = array( + 'documents'=>1, + 'document_metadata_version'=>1, + 'document_content_version'=>0, + 'tag_words'=>0, + 'document_fields_link'=>0 + ); + + $this->aliases = array( + 'documents'=>'d', + 'document_metadata_version'=>'dmv', + 'document_content_version'=>'dcv', + 'tag_words'=>'tw', + 'document_fields_link'=>'pdfl' + ); + + $this->sql = ''; + $this->db = array(); + $this->metadata = array(); + } + + /** + * This looks up a table name to find the appropriate alias. + * + * @param string $tablename + * @return string + */ + private function resolveTableToAlias($tablename) + { + if (array_key_exists($tablename, $this->aliases)) + { + return $this->aliases[$tablename]; + } + throw new Exception("Unknown tablename '$tablename'"); + } + + private function exploreExprs($expr, $parent=null) + { + if ($expr->isMetadataField()) + { + $this->metadata[] = & $parent; + } + elseif ($expr->isDBExpr()) + { + $this->db[] = & $parent; + $this->used_tables[$expr->getTable()]++; + } + elseif ($expr->isOpExpr()) + { + $left = & $expr->left(); + $right = & $expr->right(); + if (DefaultOpCollection::isBoolean($expr)) + { + $this->exploreExprs($left, $expr); + $this->exploreExprs($right, $expr); + } + else + { + // if it is not a boolean, we only need to explore left as it is the one where the main field is defined. + $this->exploreExprs($left, $expr); + } + } + } + + private function exploreGroup($group) + { + // split up metadata and determine table usage + foreach($group as $expr) + { + $field = $expr->left(); + + if ($field->isMetadataField()) + { + $this->metadata[] = $expr->getParent(); + } + elseif ($field->isDBExpr()) + { + $this->db[] = $expr->getParent(); + $this->used_tables[$field->getTable()]++; + } + } + } + + private function getFieldnameFromExpr($expr) + { + $field = $expr->left(); + if (is_null($field->getJoinTable())) + { + $alias = $this->resolveTableToAlias($field->getTable()); + $fieldname = $alias . '.' . $field->getField(); + } + else + { + $offset = $this->resolveJoinOffset($expr); + $matching = $field->getMatchingField(); + $tablename = $field->getJoinTable(); + $fieldname = "$tablename$offset.$matching"; + } + + return $fieldname; + } + + private function getSQLEvalExpr($expr) + { + $left = $expr->left(); + $right = $expr->right(); + if ($left->isMetadataField()) + { + $offset = $this->resolveMetadataOffset($expr) + 1; + + $fieldset = $left->getField(); + $query = '(' . "df$offset.name='$fieldset' AND " . $right->getSQL($left, "dfl$offset.value", $expr->op(), false) . ')'; + + } + else + { + $fieldname = $this->getFieldnameFromExpr($expr); + + $query = $right->getSQL($left, $left->modifyName($fieldname), $expr->op(), $expr->not());; + } + return $query; + } + + private function buildCoreSQL() + { + if (count($this->metadata) + count($this->db) == 0) + { + throw new Exception('nothing to do'); + } + + // we are doing this because content table is dependant on metadata table + if ($this->used_tables['document_content_version'] > 0) $this->used_tables['document_metadata_version']++; + + $sql = + 'SELECT ' . "\n"; + + $sql .= + ' DISTINCT d.id, dmv.name as title'; + + $offset=0; + foreach($this->db as $expr) + { + $offset++; + $sql .= ", ifnull(" . $this->getSQLEvalExpr($expr) . ",0) as expr$offset "; + } + + foreach($this->metadata as $expr) + { + $offset++; + $sql .= ", ifnull(" . $this->getSQLEvalExpr($expr) . ",0) as expr$offset "; + } + + $sql .= + "\n" . 'FROM ' ."\n" . + ' documents d ' ."\n"; + + if ($this->used_tables['document_metadata_version'] > 0) + { + $sql .= ' INNER JOIN document_metadata_version dmv ON d.metadata_version_id=dmv.id' . "\n"; + } + if ($this->used_tables['document_content_version'] > 0) + { + $sql .= ' INNER JOIN document_content_version dcv ON dmv.content_version_id=dcv.id ' . "\n"; + } + if ($this->used_tables['document_fields_link'] > 0) + { + $sql .= ' LEFT JOIN document_fields_link pdfl ON dmv.id=pdfl.metadata_version_id ' . "\n"; + } + + if ($this->used_tables['tag_words'] > 0) + { + $sql .= ' LEFT OUTER JOIN document_tags dt ON dt.document_id=d.id ' . "\n" . + ' LEFT OUTER JOIN tag_words tw ON dt.tag_id = tw.id ' . "\n"; + } + + $offset = 0; + foreach($this->db as $expr) + { + $field = $expr->left(); + $jointable=$field->getJoinTable(); + if (!is_null($jointable)) + { + $fieldname = $this->resolveTableToAlias($field->getTable()) . '.' . $field->getField(); + + $joinalias = "$jointable$offset"; + $joinfield = $field->getJoinField(); + $sql .= " LEFT OUTER JOIN $jointable $joinalias ON $fieldname=$joinalias.$joinfield\n"; + } + $offset++; + } + + + + $offset=0; + foreach($this->metadata as $expr) + { + $offset++; + $field = $expr->left(); + + $fieldid = $field->getFieldId(); + $sql .= " LEFT JOIN document_fields_link dfl$offset ON dfl$offset.metadata_version_id=d.metadata_version_id AND dfl$offset.document_field_id=$fieldid" . "\n"; + $sql .= " LEFT JOIN document_fields df$offset ON df$offset.id=dfl$offset.document_field_id" . "\n"; + } + + + $sql .= + 'WHERE dmv.status_id=1 AND d.status_id=1 AND ' . "\n "; + + return $sql; + } + + private function resolveMetadataOffset($expr) + { + assert($expr->left()->isMetadataField() ); + + $offset=0; + foreach($this->metadata as $item) + { + if ($item->getExprId() == $expr->getExprId()) + { + return $offset; + } + $offset++; + } + throw new Exception('metadata field not found'); + } + + private function resolveJoinOffset($expr) + { + + + $offset=0; + foreach($this->db as $item) + { + if ($item->getExprId() == $expr->getExprId()) + { + return $offset; + } + $offset++; + } + throw new Exception('join field not found'); + } + + private function buildCoreSQLExpr($expr) + { + $left = $expr->left(); + $right = $expr->right(); + if (DefaultOpCollection::isBoolean($expr)) + { + $query = '(' . $this->buildCoreSQLExpr($left) . ' ' . $expr->op() . ' ' . $this->buildCoreSQLExpr($right) . ')'; + } + else + { + $query = $this->getSQLEvalExpr($expr); + } + + if ($expr->not()) + { + $query = "NOT $query"; + } + + return $query; + } + + public function buildComplexQuery($expr) + { +// print "building complex \n\n"; + $this->exploreExprs($expr); + + $sql = $this->buildCoreSQL(); + + $sql .= $this->buildCoreSQLExpr($expr); + + return $sql; + } + + public function buildSimpleQuery($op, $group) + { +// print "building simple \n\n"; + $this->exploreGroup($group); + + $sql = $this->buildCoreSQL(); + + $offset=0; + foreach($this->db as $expr) + { + if ($offset++) + { + $sql .= " $op\n " ; + } + + $field = $expr->left(); + + if (is_null($field->getJoinTable())) + { + $alias = $this->resolveTableToAlias($field->getTable()); + $fieldname = $alias . '.' . $field->getField(); + } + else + { + $offset = $this->resolveJoinOffset($expr); + $matching = $field->getMatchingField(); + $tablename = $field->getJoinTable(); + $fieldname = "$tablename$offset.$matching"; + } + + + $value = $expr->right(); + $sql .= $value->getSQL($field, $left->modifyName($fieldname), $expr->op(), $expr->not()); + } + + $moffset=0; + foreach($this->metadata as $expr) + { + $moffset++; + if ($offset++) + { + $sql .= " $op\n " ; + } + + $field = $expr->left(); + $value = $expr->right(); + + $sql .= $value->getSQL($field, "dfl$moffset.value", $expr->getOp()); + } + + return $sql; + } + + public function getRanking($result) + { + $ranker = RankManager::get(); + $score = 0; + foreach($result as $col=>$val) + { + if ($val + 0 == 0) + { + // we are not interested if the expression failed + continue; + } + + if (substr($col, 0, 4) == 'expr' && is_numeric(substr($col, 4))) + { + + $exprno = substr($col, 4); + if ($exprno <= count($this->db)) + { + $expr = $this->db[$exprno-1]; + $left=$expr->left(); + $score += $ranker->scoreField($left->getTable(), 'T', $left->getField()); + } + else + { + $exprno -= count($this->db); + $expr = $this->metadata[$exprno-1]; + $left=$expr->left(); + $score += $ranker->scoreField($left->getTable(), 'M', $left->getField()); + } + } + } + + return $score; + } + + public function getResultText($result) + { + $text = array(); + foreach($result as $col=>$val) + { + if (substr($col, 0, 4) == 'expr' && is_numeric(substr($col, 4))) + { + if ($val + 0 == 0) + { + // we are not interested if the expression failed + continue; + } + $exprno = substr($col, 4); + if ($exprno <= count($this->db)) + { + $expr = $this->db[$exprno-1]; + } + else + { + $exprno -= count($this->db); + $expr = $this->metadata[$exprno-1]; + } + $text[] = (string) $expr; + } + } + return '(' . implode(') AND (', $text) . ')'; + } + + +} + + + +class OpExpr extends Expr +{ + /** + * The left side of the expression + * + * @var Expr + */ + protected $left_expr; + + /** + * The operator on the left and right + * + * @var ExprOp + */ + protected $op; + /** + * The right side of the expression + * + * @var Expr + */ + protected $right_expr; + + /** + * This indicates that the expression is negative + * + * @var boolean + */ + protected $not; + + protected $point; + + protected $has_text; + protected $has_db; + + private $debug = false; + +// protected $flattened; + + protected $results; + + public function setResults($results) + { + $this->results=$results; + } + public function getResults() + { + return $this->results; + } + + public function setHasDb($value=true) + { + $this->has_db=$value; + } + + public function setHasText($value=true) + { + $this->has_text=$value; + } + + public function getHasDb() + { + return $this->has_db; + } + public function getHasText() + { + return $this->has_text; + } + public function setPoint($point) + { + $this->point = $point; + /* if (!is_null($point)) + { + $this->flattened = new FlattenedGroup($this); + } + else + { + if (!is_null($this->flattened)) + { + unset($this->flattened); + } + $this->flattened = null; + }*/ + } + + public function getPoint() + { + return $this->point; + } + + public function hasSameOpAs($expr) + { + return $this->op() == $expr->op(); + } + + public static function rewriteString(&$left, &$op, &$right, $not=false) + { + if ($right->isValueExpr()) + { + $value = $right->getValue(); + } + else + { + $value = $right; + } + + $text = array(); + + + preg_match_all('/[\']([^\']*)[\']/',$value, $matches); + + foreach($matches[0] as $item) + { + $text [] = $item; + + $value = str_replace($item, '', $value); + } + + $matches = explode(' ', $value); + + foreach($matches as $item) + { + if (empty($item)) continue; + $text[] = $item; + } + + if (count($text) == 1) + { + return; + } + + $doctext = $left; + + $left = new OpExpr($doctext, $op, new ValueExpr($text[0])); + + for($i=1;$iisValueListExpr()) + { + $right->rewrite($left, $op, $right, $not); + } + else + // rewriting is based on the FieldExpr, and can expand a simple expression + // into something a little bigger. + if ($left->isFieldExpr()) + { + $left->rewrite($left, $op, $right, $not); + } + + // transformation is required to optimise the expression tree so that + // the queries on the db and full text search are optimised. + if (DefaultOpCollection::isBoolean($op)) + { + $this->transform($left, $op, $right, $not); + } + + parent::__construct(); + + $left->setParent($this); + $right->setParent($this); + $this->left_expr=&$left; + $this->op = $op; + $this->right_expr=&$right; + $this->not = $not; + $this->has_text=false; + + // $this->setPoint('point'); + + if ($left->isSearchableText()) + { + $this->setHasText(); + } + else if ($left->isDBExpr()) + { + $this->setHasDb(); + } + elseif ($left->isOpExpr()) + { + if ($left->getHasText()) { $this->setHasText(); } + if ($left->getHasDb()) { $this->setHasDb(); } + } + + if ($right->isOpExpr()) + { + if ($right->getHasText()) { $this->setHasText(); } + if ($right->getHasDb()) { $this->setHasDb(); } + } + // $this->flattened=null; + + // $left_op, etc indicates that $left expression is a logical expression + $left_op = ($left->isOpExpr() && DefaultOpCollection::isBoolean($left)); + $right_op = ($right->isOpExpr() && DefaultOpCollection::isBoolean($right)); + + // check which trees match + $left_op_match = ($left_op && $this->hasSameOpAs($left)) ; + $right_op_match = ($right_op && $this->hasSameOpAs($left)) ; + + $point = null; + + + if ($left_op_match && $right_op_match) { $point = 'point'; } + + $left_op_match_flex = $left_op_match || ($left->isOpExpr()); + $right_op_match_flex = $right_op_match || ($right->isOpExpr()); + + if ($left_op_match_flex && $right_op_match_flex) { $point = 'point'; } + + if (!is_null($point)) + { + if ($left_op_match && $left->getPoint() == 'point') { $left->setPoint(null); } + if ($right_op_match && $right->getPoint() == 'point') { $right->setPoint(null); } + + if ($left->isMergePoint() && is_null($right->getPoint())) { $right->setPoint('point'); } + if ($right->isMergePoint() && is_null($left->getPoint())) { $left->setPoint('point'); } + + if ($left->isMergePoint() || $right->isMergePoint()) + { + $point = 'merge'; + + if (!$left->isMergePoint()) { $left->setPoint('point'); } + if (!$right->isMergePoint()) { $right->setPoint('point'); } + + if ($this->isDBonly() || $this->isTextOnly()) + { + $this->clearPoint(); + $point = 'point'; + } + } + } + + if ($point == 'point') + { + if ($this->isDBandText()) + { + $point = 'merge'; + $left->setPoint('point'); + $right->setPoint('point'); + } + } + if (is_null($point) && !DefaultOpCollection::isBoolean($op)) + { + $point = 'point'; + } + + $this->setPoint($point); + } + + private function isDBonly() + { + return $this->getHasDb() && !$this->getHasText(); + } + + private function isTextOnly() + { + return !$this->getHasDb() && $this->getHasText(); + } + + private function isDBandText() + { + return $this->getHasDb() && $this->getHasText(); + } + + /** + * Enter description here... + * + * @param OpExpr $expr + */ + protected function clearPoint() + { + if (DefaultOpCollection::isBoolean($this)) + { + $this->left()->clearPoint(); + $this->right()->clearPoint(); + } + if ($this->isMergePoint()) + { + $this->setPoint(null); + } + } + + + protected function isMergePoint() + { + return in_array($this->getPoint(), array('merge','point')); + } + + /** + * Returns the operator on the expression + * + * @return ExprOp + */ + public function op() + { + return $this->op; + } + + /** + * Returns true if the negative of the operator should be used in evaluation + * + * @param boolean $not + * @return boolean + */ + public function not($not=null) + { + if (!is_null($not)) + { + $this->not = $not; + } + + return $this->not; + } + + /** + * The left side of the expression + * + * @return Expr + */ + public function &left() + { + return $this->left_expr; + } + + /** + * The right side of the expression + * + * @return Expr + */ + public function &right() + { + return $this->right_expr; + } + + /** + * Converts the expression to a string + * + * @return string + */ + public function __toString() + { + $expr = $this->left_expr . ' ' . $this->op .' ' . $this->right_expr; + + if (is_null($this->parent)) + { + return $expr; + } + + if ($this->parent->isOpExpr()) + { + if ($this->parent->op != $this->op && in_array($this->op, DefaultOpCollection::$boolean)) + { + $expr = "($expr)"; + } + } + + if ($this->not()) + { + $expr = "!($expr)"; + } + + return $expr; + } + + /** + * Is the expression valid + * + * @return boolean + */ + public function is_valid() + { + $left = $this->left(); + $right = $this->right(); + return $left->is_valid() && $right->is_valid(); + } + + /** + * Finds the results that are in both record sets. + * + * @param array $leftres + * @param array $rightres + * @return array + */ + protected static function intersect($leftres, $rightres) + { + if (empty($leftres) || empty($rightres)) + { + return array(); // small optimisation + } + $result = array(); + foreach($leftres as $item) + { + $document_id = $item->DocumentID; + + if (!$item->IsLive) + { + continue; + } + + if (array_key_exists($document_id, $rightres)) + { + $check = $rightres[$document_id]; + + $result[$document_id] = ($item->Rank < $check->Rank)?$check:$item; + } + } + return $result; + } + + /** + * The objective of this function is to merge the results so that there is a union of the results, + * but there should be no duplicates. + * + * @param array $leftres + * @param array $rightres + * @return array + */ + protected static function union($leftres, $rightres) + { + if (empty($leftres)) + { + return $rightres; // small optimisation + } + if (empty($rightres)) + { + return $leftres; // small optimisation + } + $result = array(); + + foreach($leftres as $item) + { + if ($item->IsLive) + { + $result[$item->DocumentID] = $item; + } + } + + foreach($rightres as $item) + { + if (!array_key_exists($item->DocumentID, $result) || $item->Rank > $result[$item->DocumentID]->Rank) + { + $result[$item->DocumentID] = $item; + } + } + return $result; + } + + /** + * Enter description here... + * + * @param OpExpr $left + * @param ExprOp $op + * @param OpExpr $right + * @param boolean $not + */ + public function transform(& $left, & $op, & $right, & $not) + { + + if (!$left->isOpExpr() || !$right->isOpExpr() || !DefaultOpCollection::isBoolean($op)) + { + return; + } + + if ($left->isTextOnly() && $right->isDBonly()) + { + // we just swap the items around, to ease other transformations + $tmp = $left; + $left = $right; + $right = $tmp; + return; + } + + if ($op != $right->op() || !DefaultOpCollection::isBoolean($right)) + { + return; + } + + if ($op == ExprOp::OP_OR && ($not || $right->not())) + { + // NOTE: we can't transform. e.g. + // db or !(db or txt) => db or !db and !txt + // so nothing to do + + // BUT: db and !(db and txt) => db and !db and !txt + return; + } + + $rightLeft = $right->left(); + $rightRight = $right->right(); + + if ($left->isDBonly() && $rightLeft->isDBonly()) + { + $newLeft = new OpExpr( $left, $op, $rightLeft ); + + $right = $rightRight; + $left = $newLeft; + return; + } + + if ($left->isTextOnly() && $rightRight->isTextOnly()) + { + $newRight = new OpExpr($left, $op, $rightRight); + $left = $rightLeft; + $right = $newRight; + return; + } + + } + + private function findDBNode($start, $op, $what) + { + if ($start->op() != $op) + { + return null; + } + switch($what) + { + case 'db': + if ($start->isDBonly()) + { + return $start; + } + break; + case 'txt': + if ($start->isTextOnly()) + { + return $start; + } + break; + } + $node = $this->findDBNode($start->left(), $op, $what); + if (is_null($left)) + { + $node = $this->findDBNode($start->right(), $op, $what); + } + return $node; + + } + + public function traverse($object, $method, $param) + { + if ($this->isOpExpr()) + { + $object->$method($param); + } + } + + private function exploreItem($item, & $group, $interest) + { + if (($interest == 'db' && $item->getHasDb()) || + ($interest == 'text' && $item->getHasText())) + { + if (in_array($item->op(), array(ExprOp::OP_OR, ExprOp::OP_AND))) + { + $this->exploreItem($item->left(), $group, $interest); + $this->exploreItem($item->right(), $group, $interest); + } + else + { + $group[] = $item; + } + } + } + + private function explore($left, $right, & $group, $interest) + { + $this->exploreItem($left, $group, $interest); + $this->exploreItem($right, $group, $interest); + } + + private function exec_db_query($op, $group) + { + if (empty($group)) { return array(); } + + $exprbuilder = new SQLQueryBuilder(); + + if (count($group) == 1) + { + $sql = $exprbuilder->buildComplexQuery($group[0]); + } + else + { + $sql = $exprbuilder->buildSimpleQuery($op, $group); + } + + $results = array(); + + if ($this->debug) print "\n\n$sql\n\n"; + $rs = DBUtil::getResultArray($sql); + + if (PEAR::isError($rs)) + { + throw new Exception($rs->getMessage()); + } + + foreach($rs as $item) + { + $document_id = $item['id']; + $rank = $exprbuilder->getRanking($item); + if (!array_key_exists($document_id, $results) || $rank > $results[$document_id]->Rank) + { + $results[$document_id] = new MatchResult($document_id, $rank, $item['title'], $exprbuilder->getResultText($item)); + } + } + + return $results; + + } + + private function exec_text_query($op, $group) + { + if (empty($group)) { return array(); } + + $exprbuilder = new TextQueryBuilder(); + + if (count($group) == 1) + { + $query = $exprbuilder->buildComplexQuery($group[0]); + } + else + { + $query = $exprbuilder->buildSimpleQuery($op, $group); + } + + $indexer = Indexer::get(); + if ($this->debug) print "\n\n$query\n\n"; + $results = $indexer->query($query); + foreach($results as $item) + { + $item->Rank = $exprbuilder->getRanking($item); + $exprbuilder->setQuery($query); + $item->Text = $exprbuilder->getResultText($item); + } + + return $results; + + + } + + public function evaluate() + { + $left = $this->left(); + $right = $this->right(); + $op = $this->op(); + $point = $this->getPoint(); + $result = array(); + if (empty($point)) + { + $point = 'point'; + } + + if ($point == 'merge') + { + + $leftres = $left->evaluate(); + $rightres = $right->evaluate(); + switch ($op) + { + case ExprOp::OP_AND: + if ($this->debug) print "\n\nmerge: intersect\n\n"; + $result = OpExpr::intersect($leftres, $rightres); + break; + case ExprOp::OP_OR: + if ($this->debug) print "\n\nmerge: union\n\n"; + $result = OpExpr::union($leftres, $rightres); + break; + default: + throw new Exception("this condition should not happen"); + } + } + elseif ($point == 'point') + { + if ($this->isDBonly()) + { + $result = $this->exec_db_query($op, array($this)); + } + elseif ($this->isTextOnly()) + { + $result = $this->exec_text_query($op, array($this)); + } + elseif (in_array($op, array(ExprOp::OP_OR, ExprOp::OP_AND))) + { + $db_group = array(); + $text_group = array(); + $this->explore($left, $right, $db_group, 'db'); + $this->explore($left, $right, $text_group, 'text'); + + $db_result = $this->exec_db_query($op, $db_group); + $text_result = $this->exec_text_query($op, $text_group); + + switch ($op) + { + case ExprOp::OP_AND: + if ($this->debug) print "\n\npoint: intersect\n\n"; + $result = OpExpr::intersect($db_result, $text_result); + break; + case ExprOp::OP_OR: + if ($this->debug) print "\n\nmerge: union\n\n"; + $result = OpExpr::union($db_result, $text_result); + break; + default: + throw new Exception('how did this happen??'); + } + } + else + { + throw new Exception('and this?'); + } + } + else + { + // we don't have to do anything + //throw new Exception('Is this reached ever?'); + } + + $permResults = array(); + foreach($result as $idx=>$item) + { + $doc = Document::get($item->DocumentID); + if (Permission::userHasDocumentReadPermission($doc)) + { + $permResults[$idx] = $item; + } + } + + return $permResults; + } + + public function toViz(&$str, $phase) + { + $expr_id = $this->getExprId(); + $left = $this->left(); + $right = $this->right(); + $hastext = $this->getHasText()?'TEXT':''; + $hasdb = $this->getHasDb()?'DB':''; + switch ($phase) + { + case 0: + $not = $this->not()?'NOT':''; + $str .= "struct$expr_id [style=box, label=\"$expr_id: $not $this->op $this->point $hastext$hasdb\"]\n"; + break; + case 1: + $left_id = $left->getExprId(); + $str .= "struct$expr_id -> struct$left_id\n"; + $right_id = $right->getExprId(); + $str .= "struct$expr_id -> struct$right_id\n"; + break; + } + $left->toViz($str, $phase); + $right->toViz($str, $phase); + } + +} + + + + +?> \ No newline at end of file diff --git a/search2/search/exprConstants.inc.php b/search2/search/exprConstants.inc.php new file mode 100755 index 0000000..caedacb --- /dev/null +++ b/search2/search/exprConstants.inc.php @@ -0,0 +1,88 @@ +'; + const LESS_THAN_EQUAL = '<='; + const GREATER_THAN_EQUAL = '>='; + const OP_AND = 'AND'; + const OP_OR = 'OR'; + const IS_NOT = 'is not'; + +} + + + +/** + * This is a collection of various operators that may be used + */ +class DefaultOpCollection +{ + public static $is = array(ExprOp::IS); + public static $contains = array(ExprOp::CONTAINS, ExprOp::STARTS_WITH , ExprOp::ENDS_WITH ); + public static $between = array(ExprOp::BETWEEN); + public static $boolean = array(ExprOp::OP_OR , ExprOp::OP_AND ); + + /** + * Validates if the operator on the expression's parent is allowed + * + * @param Expr $expr + * @param array $collection + * @return boolean + */ + public static function validateParent(&$expr, &$collection) + { + $parent = $expr->getParent(); + if ($parent instanceof OpExpr) + { + return in_array($parent->op(), $collection); + } + return false; + } + + public static function validate(&$expr, &$collection) + { + if ($expr instanceof OpExpr) + { + return in_array($expr->op(), $collection); + } + return false; + } + + public static function isBoolean(&$expr) + { + if ($expr instanceof OpExpr) + { + return in_array($expr->op(), DefaultOpCollection::$boolean); + } + elseif(is_string($expr)) + { + return in_array($expr, DefaultOpCollection::$boolean); + } + return false; + } +} + +class FieldInputType +{ + const TEXT = 'STRING'; + const INT = 'INT'; + const REAL = 'FLOAT'; + const BOOLEAN = 'BOOL'; + const USER_LIST = 'USERLIST'; + const DATE = 'DATE'; + const MIME_TYPES = 'MIMETYPES'; + const DOCUMENT_TYPES = 'DOCTYPES'; + const DATEDIFF = 'DATEDIFF'; + const FULLTEXT = 'FULLTEXT'; + const FILESIZE = 'FILESIZE'; +} + +?> \ No newline at end of file diff --git a/search2/search/fieldRegistry.inc.php b/search2/search/fieldRegistry.inc.php new file mode 100755 index 0000000..9a1b498 --- /dev/null +++ b/search2/search/fieldRegistry.inc.php @@ -0,0 +1,254 @@ +fields = array(); + $this->alias = array(); + $this->metadata = array(); + $this->display=array(); + + $config = KTConfig::getSingleton(); + + $this->path = $config->get('search/fieldsPath'); + } + + /** + * Retuns a singleton to the class. + * + * @return ExprFieldRegistry + */ + public static function getRegistry() + { + static $singleton = null; + + if (is_null($singleton)) + { + $singleton = new ExprFieldRegistry(); + $singleton->registerFields(); + } + + return $singleton; + } + + /** + * Add a field to the registry. + * + * @param FieldExpr $field + */ + private function registerField($field) + { + assert(!is_null($field)); + $classname = strtolower(get_class($field)); + $alias = strtolower($field->getAlias()); + + if (array_key_exists($classname, $this->fields) || array_key_exists($alias, $this->alias)) + { + throw new ResolutionException("Class $classname with alias $alias already registered."); + } + + $this->fields[$classname] = $field; + $this->alias[$alias] = $field; + + if ($field instanceof MetadataField ) + { + $fieldsetn = $field->getFieldSet(); + $fieldn= $field->getField(); + $this->metadata[$fieldsetn][$fieldn] = $field; + $this->display[] = "[\"$fieldsetn\"][\"$fieldn\"]"; + } + else + { + $this->display[] = $field->getAlias(); + } + + } + + public function resolveAlias($alias) + { + return $this->getField($alias); + } + + public function resolveMetadataField($fieldset, $field) + { + if (!array_key_exists($fieldset,$this->metadata)) + { + throw new ResolutionException("Metadata class for fieldset '$fieldset' and field '$field' not found."); + } + if (!array_key_exists($field,$this->metadata[$fieldset])) + { + throw new ResolutionException("Metadata class for fieldset '$fieldset' and field '$field' not found."); + } + return $this->metadata[$fieldset][$field]; + } + + + /** + * A static method to lookup a field by fieldname. + * + * @param string $fieldname + * @return unknown + */ + public static function lookupField($fieldname) + { + $registry = ExprFieldRegistry::get(); + return $registry->getField($fieldname); + } + + /** + * Returns a field from the registry. + * + * @param string $fieldname + * @return ExprField + */ + public function getField($fieldname) + { + $fieldname = strtolower($fieldname); + if (array_key_exists($fieldname, $this->fields)) + { + return $this->fields[$fieldname]; + } + if (array_key_exists($fieldname, $this->alias)) + { + return $this->alias[$fieldname]; + } + throw new ResolutionException('Field not found: ' . $fieldname); + } + + public function getAliasNames() + { + return $this->display; + } + + /** + * Load all fields into the registry + * + */ + public function registerFields() + { + $this->fields = array(); + + $dir = opendir($this->path); + while (($file = readdir($dir)) !== false) + { + if (substr($file,-13) == 'Field.inc.php') + { + require_once($this->path . '/' . $file); + $class = substr($file, 0, -8); + + if (!class_exists($class)) + { + continue; + } + + $field = new $class; + if (is_null($field) || !($field instanceof FieldExpr)) + { + continue; + } + + $this->registerField($field); + } + } + closedir($dir); + + $this->registerMetdataFields(); + } + + /** + * Registers metdata fields in system. + * + */ + private function registerMetdataFields() + { + $sql = "SELECT + fs.name as fieldset, f.name as field, fs.id as fsid, f.id as fid + FROM + fieldsets fs + INNER JOIN document_fields f ON f.parent_fieldset=fs.id + WHERE + fs.disabled=0"; + $result = DBUtil::getResultArray($sql); + + foreach($result as $record) + { + $fieldset = $record['fieldset']; + $field = $record['field']; + $fieldsetid = $record['fsid']; + $fieldid = $record['fid']; + $classname = "MetadataField$fieldid"; + + $classdefn = " + class $classname extends MetadataField + { + public function __construct() + { + parent::__construct('$fieldset','$field',$fieldsetid, $fieldid); + } + }"; + eval($classdefn); + + $field = new $classname; + $this->registerField($field); + } + } + + public function getFields() + { + $result = array(); + foreach($this->fields as $field) + { + if ($field instanceof MetadataField) + { + continue; + } + $result[] = $field; + } + return $result; + } + +} + +?> \ No newline at end of file diff --git a/search2/search/fields/AnyMetadataField.inc.php b/search2/search/fields/AnyMetadataField.inc.php new file mode 100755 index 0000000..86e3345 --- /dev/null +++ b/search2/search/fields/AnyMetadataField.inc.php @@ -0,0 +1,22 @@ +setAlias('Metadata'); + } + + public function getInputRequirements() + { + return array('value'=>array('type'=>FieldInputType::TEXT)); + } + + public function is_valid() + { + return DefaultOpCollection::validateParent($this, DefaultOpCollection::$is); + } +} + +?> \ No newline at end of file diff --git a/search2/search/fields/CheckedOutByField.inc.php b/search2/search/fields/CheckedOutByField.inc.php new file mode 100755 index 0000000..3a4fabe --- /dev/null +++ b/search2/search/fields/CheckedOutByField.inc.php @@ -0,0 +1,24 @@ +setAlias('CheckedOutBy'); + $this->joinTo('users', 'id'); + $this->matchField('name'); + } + + public function getInputRequirements() + { + return array('value'=>array('type'=>FieldInputType::USER_LIST)); + } + + public function is_valid() + { + return DefaultOpCollection::validateParent($this, DefaultOpCollection::$is); + } +} + +?> \ No newline at end of file diff --git a/search2/search/fields/CheckedOutDeltaField.inc.php b/search2/search/fields/CheckedOutDeltaField.inc.php new file mode 100644 index 0000000..15ebcf6 --- /dev/null +++ b/search2/search/fields/CheckedOutDeltaField.inc.php @@ -0,0 +1,41 @@ +setAlias('CheckedoutDelta'); + $this->isValueQuoted(false); + } + + public function modifyName($sql) + { + $this->modifiedName = $sql; + $now = date('Y-m-d'); + + + return "cast('$now' as date)"; + } + + + public function modifyValue($value) + { + + return "cast($this->modifiedName + $value as date)"; + } + + public function getInputRequirements() + { + return array('value'=>array('type'=>FieldInputType::DATEDIFF)); + } + + public function is_valid() + { + return DefaultOpCollection::validateParent($this, DefaultOpCollection::$between); + } +} + +?> \ No newline at end of file diff --git a/search2/search/fields/CheckedOutField.inc.php b/search2/search/fields/CheckedOutField.inc.php new file mode 100644 index 0000000..3e4cec2 --- /dev/null +++ b/search2/search/fields/CheckedOutField.inc.php @@ -0,0 +1,27 @@ +setAlias('CheckedOut'); + } + + public function modifyName($sql) + { + return "cast($sql as date)"; + } + + public function getInputRequirements() + { + return array('value'=>array('type'=>FieldInputType::DATE)); + } + + public function is_valid() + { + return DefaultOpCollection::validateParent($this, DefaultOpCollection::$between); + } +} + +?> \ No newline at end of file diff --git a/search2/search/fields/CreatedByField.inc.php b/search2/search/fields/CreatedByField.inc.php new file mode 100755 index 0000000..67e60cd --- /dev/null +++ b/search2/search/fields/CreatedByField.inc.php @@ -0,0 +1,24 @@ +setAlias('CreatedBy'); + $this->joinTo('users', 'id'); + $this->matchField('name'); + } + + public function getInputRequirements() + { + return array('value'=>array('type'=>FieldInputType::USER_LIST)); + } + + public function is_valid() + { + return DefaultOpCollection::validateParent($this, DefaultOpCollection::$is); + } +} + +?> \ No newline at end of file diff --git a/search2/search/fields/CreatedDeltaField.inc.php b/search2/search/fields/CreatedDeltaField.inc.php new file mode 100755 index 0000000..42db168 --- /dev/null +++ b/search2/search/fields/CreatedDeltaField.inc.php @@ -0,0 +1,41 @@ +setAlias('CreatedDelta'); + $this->isValueQuoted(false); + } + + public function modifyName($sql) + { + $this->modifiedName = $sql; + $now = date('Y-m-d'); + + + return "cast('$now' as date)"; + } + + + public function modifyValue($value) + { + + return "cast($this->modifiedName + $value as date)"; + } + + public function getInputRequirements() + { + return array('value'=>array('type'=>FieldInputType::DATEDIFF)); + } + + public function is_valid() + { + return DefaultOpCollection::validateParent($this, DefaultOpCollection::$between); + } +} + +?> \ No newline at end of file diff --git a/search2/search/fields/CreatedField.inc.php b/search2/search/fields/CreatedField.inc.php new file mode 100755 index 0000000..7257b69 --- /dev/null +++ b/search2/search/fields/CreatedField.inc.php @@ -0,0 +1,27 @@ +setAlias('Created'); + } + + public function modifyName($sql) + { + return "cast($sql as date)"; + } + + public function getInputRequirements() + { + return array('value'=>array('type'=>FieldInputType::DATE)); + } + + public function is_valid() + { + return DefaultOpCollection::validateParent($this, DefaultOpCollection::$between); + } +} + +?> \ No newline at end of file diff --git a/search2/search/fields/DiscussionTextField.inc.php b/search2/search/fields/DiscussionTextField.inc.php new file mode 100755 index 0000000..0e9146b --- /dev/null +++ b/search2/search/fields/DiscussionTextField.inc.php @@ -0,0 +1,26 @@ +setAlias('DiscussionText'); + } + + public function getInputRequirements() + { + return array('value'=>array('type'=>FieldInputType::FULLTEXT)); + } + + public function is_valid() + { + return DefaultOpCollection::validateParent($this, DefaultOpCollection::$contains); + } + + + +} + +?> \ No newline at end of file diff --git a/search2/search/fields/DocumentIdField.inc.php b/search2/search/fields/DocumentIdField.inc.php new file mode 100755 index 0000000..d0699ab --- /dev/null +++ b/search2/search/fields/DocumentIdField.inc.php @@ -0,0 +1,22 @@ +setAlias('DocumentId'); + } + + public function getInputRequirements() + { + return array('value'=>array('type'=>FieldInputType::INT)); + } + + public function is_valid() + { + return DefaultOpCollection::validateParent($this, DefaultOpCollection::$is); + } +} + +?> \ No newline at end of file diff --git a/search2/search/fields/DocumentTextField.inc.php b/search2/search/fields/DocumentTextField.inc.php new file mode 100755 index 0000000..b6ee64f --- /dev/null +++ b/search2/search/fields/DocumentTextField.inc.php @@ -0,0 +1,29 @@ +setAlias('DocumentText'); + } + + public function getInputRequirements() + { + return array('value'=>array('type'=>FieldInputType::FULLTEXT)); + } + + public function is_valid() + { + return DefaultOpCollection::validateParent($this, DefaultOpCollection::$contains); + } + + + + + + +} + +?> \ No newline at end of file diff --git a/search2/search/fields/DocumentTypeField.inc.php b/search2/search/fields/DocumentTypeField.inc.php new file mode 100755 index 0000000..f64280f --- /dev/null +++ b/search2/search/fields/DocumentTypeField.inc.php @@ -0,0 +1,24 @@ +setAlias('DocumentType'); + $this->joinTo('document_types_lookup', 'id'); + $this->matchField('name'); + } + + public function getInputRequirements() + { + return array('value'=>array('type'=>FieldInputType::DOCUMENT_TYPES)); + } + + public function is_valid() + { + return DefaultOpCollection::validateParent($this, DefaultOpCollection::$is); + } +} + +?> \ No newline at end of file diff --git a/search2/search/fields/FilenameField.inc.php b/search2/search/fields/FilenameField.inc.php new file mode 100755 index 0000000..4dff8f2 --- /dev/null +++ b/search2/search/fields/FilenameField.inc.php @@ -0,0 +1,22 @@ +setAlias('Filename'); + } + + public function getInputRequirements() + { + return array('value'=>array('type'=>FieldInputType::TEXT)); + } + + public function is_valid() + { + return DefaultOpCollection::validateParent($this, DefaultOpCollection::$is); + } +} + +?> \ No newline at end of file diff --git a/search2/search/fields/FilesizeField.inc.php b/search2/search/fields/FilesizeField.inc.php new file mode 100755 index 0000000..9d527a3 --- /dev/null +++ b/search2/search/fields/FilesizeField.inc.php @@ -0,0 +1,22 @@ +setAlias('Filesize'); + } + + public function getInputRequirements() + { + return array('value'=>array('type'=>FieldInputType::FILESIZE)); + } + + public function is_valid() + { + return DefaultOpCollection::validateParent($this, DefaultOpCollection::$is); + } +} + +?> \ No newline at end of file diff --git a/search2/search/fields/FolderField.inc.php b/search2/search/fields/FolderField.inc.php new file mode 100755 index 0000000..b403016 --- /dev/null +++ b/search2/search/fields/FolderField.inc.php @@ -0,0 +1,24 @@ +setAlias('Folder'); + $this->joinTo('folders', 'id'); + $this->matchField('full_path'); + } + + public function getInputRequirements() + { + return array('value'=>array('type'=>FieldInputType::TEXT)); + } + + public function is_valid() + { + return DefaultOpCollection::validateParent($this, DefaultOpCollection::$is); + } +} + +?> \ No newline at end of file diff --git a/search2/search/fields/FolderFieldID.inc.php b/search2/search/fields/FolderFieldID.inc.php new file mode 100644 index 0000000..5da218d --- /dev/null +++ b/search2/search/fields/FolderFieldID.inc.php @@ -0,0 +1,22 @@ +setAlias('FolderID'); + } + + public function getInputRequirements() + { + return array('value'=>array('type'=>FieldInputType::INT)); + } + + public function is_valid() + { + return DefaultOpCollection::validateParent($this, DefaultOpCollection::$is); + } +} + +?> \ No newline at end of file diff --git a/search2/search/fields/GeneralTextField.inc.php b/search2/search/fields/GeneralTextField.inc.php new file mode 100755 index 0000000..9508442 --- /dev/null +++ b/search2/search/fields/GeneralTextField.inc.php @@ -0,0 +1,45 @@ +setAlias('GeneralText'); + } + + public function getInputRequirements() + { + return array('value'=>array('type'=>FieldInputType::TEXT)); + } + + public function is_valid() + { + return DefaultOpCollection::validateParent($this, DefaultOpCollection::$contains); + } + + + public function rewrite(&$left, &$op, &$right) + { + // note the grouping of the db queries + + $left = new OpExpr(new DocumentTextField(), ExprOp::CONTAINS, $right); + + $op = ExprOp::OP_OR; + + $right = new OpExpr( + new OpExpr(new FilenameField(), ExprOp::CONTAINS, $right), + ExprOp::OP_OR, + new OpExpr( + new OpExpr( + new TitleField(), ExprOp::CONTAINS, $right), + ExprOp::OP_OR, + new OpExpr(new AnyMetadataField(), ExprOp::CONTAINS, $right) + ) + ); + } + + +} + +?> \ No newline at end of file diff --git a/search2/search/fields/IsCheckedOutField.inc.php b/search2/search/fields/IsCheckedOutField.inc.php new file mode 100755 index 0000000..74019d1 --- /dev/null +++ b/search2/search/fields/IsCheckedOutField.inc.php @@ -0,0 +1,44 @@ +setAlias('IsCheckedOut'); + $this->isValueQuoted(false); + } + + public function modifyValue($value) + { + if (is_numeric($value)) + { + $value = ($value+0)?1:0; + } + else + { + switch(strtolower($value)) + { + case 'true': + case 'yes': + $value=1; + break; + default: + $value=0; + } + } + return $value; + } + + public function getInputRequirements() + { + return array('value'=>array('type'=>FieldInputType::BOOLEAN)); + } + + public function is_valid() + { + return DefaultOpCollection::validateParent($this, DefaultOpCollection::$is); + } +} + +?> \ No newline at end of file diff --git a/search2/search/fields/IsImmutableField.inc.php b/search2/search/fields/IsImmutableField.inc.php new file mode 100755 index 0000000..acbb1b0 --- /dev/null +++ b/search2/search/fields/IsImmutableField.inc.php @@ -0,0 +1,44 @@ +setAlias('IsImmutable'); + $this->isValueQuoted(false); + } + + public function modifyValue($value) + { + if (is_numeric($value)) + { + $value = ($value+0)?1:0; + } + else + { + switch(strtolower($value)) + { + case 'true': + case 'yes': + $value=1; + break; + default: + $value=0; + } + } + return $value; + } + + public function getInputRequirements() + { + return array('value'=>array('type'=>FieldInputType::BOOLEAN)); + } + + public function is_valid() + { + return DefaultOpCollection::validateParent($this, DefaultOpCollection::$is); + } +} + +?> \ No newline at end of file diff --git a/search2/search/fields/MimeTypeField.inc.php b/search2/search/fields/MimeTypeField.inc.php new file mode 100755 index 0000000..5194612 --- /dev/null +++ b/search2/search/fields/MimeTypeField.inc.php @@ -0,0 +1,26 @@ +setAlias('MimeType'); + $this->joinTo('mime_types', 'id'); + $this->matchField('mimetypes'); + } + + public function getInputRequirements() + { + // ideally MIME_TYPES + // but we must rework the mime_types table to be prettier! + return array('value'=>array('type'=>FieldInputType::TEXT)); + } + + public function is_valid() + { + return DefaultOpCollection::validateParent($this, DefaultOpCollection::$is); + } +} + +?> \ No newline at end of file diff --git a/search2/search/fields/ModifiedByField.inc.php b/search2/search/fields/ModifiedByField.inc.php new file mode 100755 index 0000000..7eca34d --- /dev/null +++ b/search2/search/fields/ModifiedByField.inc.php @@ -0,0 +1,24 @@ +setAlias('ModifiedBy'); + $this->joinTo('users', 'id'); + $this->matchField('name'); + } + + public function getInputRequirements() + { + return array('value'=>array('type'=>FieldInputType::USER_LIST)); + } + + public function is_valid() + { + return DefaultOpCollection::validateParent($this, DefaultOpCollection::$is); + } +} + +?> \ No newline at end of file diff --git a/search2/search/fields/ModifiedDeltaField.inc.php b/search2/search/fields/ModifiedDeltaField.inc.php new file mode 100755 index 0000000..0264dc8 --- /dev/null +++ b/search2/search/fields/ModifiedDeltaField.inc.php @@ -0,0 +1,39 @@ +setAlias('ModifiedDelta'); + $this->isValueQuoted(false); + } + + public function modifyName($sql) + { + $this->modifiedName = $sql; + $now = date('Y-m-d'); + + + return "cast('$now' as date)"; + } + + public function modifyValue($value) + { + return "cast($this->modifiedName + $value as date)"; + } + + public function getInputRequirements() + { + return array('value'=>array('type'=>FieldInputType::DATEDIFF)); + } + + public function is_valid() + { + return DefaultOpCollection::validateParent($this, DefaultOpCollection::$between); + } +} + +?> \ No newline at end of file diff --git a/search2/search/fields/ModifiedField.inc.php b/search2/search/fields/ModifiedField.inc.php new file mode 100755 index 0000000..f7653c6 --- /dev/null +++ b/search2/search/fields/ModifiedField.inc.php @@ -0,0 +1,27 @@ +setAlias('Modified'); + } + + public function modifyName($sql) + { + return "cast($sql as date)"; + } + + public function getInputRequirements() + { + return array('value'=>array('type'=>FieldInputType::DATE)); + } + + public function is_valid() + { + return DefaultOpCollection::validateParent($this, DefaultOpCollection::$between); + } +} + +?> \ No newline at end of file diff --git a/search2/search/fields/TagField.inc.php b/search2/search/fields/TagField.inc.php new file mode 100755 index 0000000..f156989 --- /dev/null +++ b/search2/search/fields/TagField.inc.php @@ -0,0 +1,22 @@ +setAlias('Tag'); + } + + public function getInputRequirements() + { + return array('value'=>array('type'=>FieldInputType::TEXT)); + } + + public function is_valid() + { + return DefaultOpCollection::validateParent($this, DefaultOpCollection::$is); + } +} + +?> \ No newline at end of file diff --git a/search2/search/fields/TitleField.inc.php b/search2/search/fields/TitleField.inc.php new file mode 100755 index 0000000..8b02c69 --- /dev/null +++ b/search2/search/fields/TitleField.inc.php @@ -0,0 +1,22 @@ +setAlias('Title'); + } + + public function getInputRequirements() + { + return array('value'=>array('type'=>FieldInputType::TEXT)); + } + + public function is_valid() + { + return DefaultOpCollection::validateParent($this, DefaultOpCollection::$is); + } +} + +?> \ No newline at end of file diff --git a/search2/search/fields/WorkflowField.inc.php b/search2/search/fields/WorkflowField.inc.php new file mode 100755 index 0000000..7ccdc7c --- /dev/null +++ b/search2/search/fields/WorkflowField.inc.php @@ -0,0 +1,24 @@ +setAlias('Workflow'); + $this->joinTo('workflows', 'id'); + $this->matchField('name'); + } + + public function getInputRequirements() + { + return array('value'=>array('type'=>FieldInputType::TEXT)); + } + + public function is_valid() + { + return DefaultOpCollection::validateParent($this, DefaultOpCollection::$is); + } +} + +?> \ No newline at end of file diff --git a/search2/search/fields/WorkflowIDField.inc.php b/search2/search/fields/WorkflowIDField.inc.php new file mode 100644 index 0000000..d4c9774 --- /dev/null +++ b/search2/search/fields/WorkflowIDField.inc.php @@ -0,0 +1,22 @@ +setAlias('WorkflowID'); + } + + public function getInputRequirements() + { + return array('value'=>array('type'=>FieldInputType::INT)); + } + + public function is_valid() + { + return DefaultOpCollection::validateParent($this, DefaultOpCollection::$is); + } +} + +?> \ No newline at end of file diff --git a/search2/search/fields/WorkflowStateField.inc.php b/search2/search/fields/WorkflowStateField.inc.php new file mode 100755 index 0000000..6950f26 --- /dev/null +++ b/search2/search/fields/WorkflowStateField.inc.php @@ -0,0 +1,24 @@ +setAlias('WorkflowState'); + $this->joinTo('workflow_states', 'id'); + $this->matchField('name'); + } + + public function getInputRequirements() + { + return array('value'=>array('type'=>FieldInputType::TEXT)); + } + + public function is_valid() + { + return DefaultOpCollection::validateParent($this, DefaultOpCollection::$is); + } +} + +?> \ No newline at end of file diff --git a/search2/search/fields/WorkflowStateIDField.inc.php b/search2/search/fields/WorkflowStateIDField.inc.php new file mode 100644 index 0000000..d47c788 --- /dev/null +++ b/search2/search/fields/WorkflowStateIDField.inc.php @@ -0,0 +1,22 @@ +setAlias('WorkflowStateID'); + } + + public function getInputRequirements() + { + return array('value'=>array('type'=>FieldInputType::INT)); + } + + public function is_valid() + { + return DefaultOpCollection::validateParent($this, DefaultOpCollection::$is); + } +} + +?> \ No newline at end of file diff --git a/search2/search/search.inc.php b/search2/search/search.inc.php new file mode 100755 index 0000000..3238d7b --- /dev/null +++ b/search2/search/search.inc.php @@ -0,0 +1,482 @@ +Rank == $b->Rank) + { + if ($a->Title == $b->Title) + return 0; + // we'll show docs in ascending order by name + return ($a->Title < $b->Title)?-1:1; + } + // we want to be in descending order + return ($a->Rank > $b->Rank)?-1:1; +} + +function search_alias_compare($a, $b) +{ + if ($a['alias'] == $b['alias']) return 0; + return ($a['alias'] < $b['alias'])?-1:1; +} + +class SearchHelper +{ + public static function getSavedSearchEvents() + { + // TODO + $sql = ""; + } + + public static function getJSdocumentTypesStruct($documenttypes = null) + { + if (is_null($documenttypes)) + { + $documenttypes = SearchHelper::getDocumentTypes(); + } + $dt=0; + $documenttypes_str = '['; + foreach($documenttypes as $user) + { + if ($dt++ > 0) $documenttypes_str .= ','; + $id=$user['id']; + $name=$user['name']; + + $documenttypes_str .= "\n\t{id: \"$id\", name: \"$name\"}"; + } + $documenttypes_str .= ']'; + return $documenttypes_str; + + } + + public static function getJSmimeTypesStruct($mimetypes = null) + { + if (is_null($mimetypes)) + { + $mimetypes = SearchHelper::getMimeTypes(); + } + $mt=0; + $mimetypes_str = '['; + foreach($mimetypes as $user) + { + if ($mt++ > 0) $mimetypes_str .= ','; + + $name=$user['name']; + + $mimetypes_str .= "\n\t\"$name\""; + } + $mimetypes_str .= ']'; + + return $mimetypes_str; + } + + public static function getJSusersStruct($users = null) + { + if (is_null($users)) + { + $users = SearchHelper::getUsers(); + } + + $uo=0; + $users_str = '['; + foreach($users as $user) + { + if ($uo++ > 0) $users_str .= ','; + $id=$user['id']; + $name=$user['name']; + + $users_str .= "\n\t{id: \"$id\", name: \"$name\"}"; + } + $users_str .= ']'; + + return $users_str; + } + + public static function getJSfieldsStruct($fields = null) + { + if (is_null($fields)) + { + $fields = SearchHelper::getSearchFields(); + } + $fields_str = '['; + $fo=0; + foreach($fields as $field) + { + if ($fo++ > 0) $fields_str .= ','; + $alias = $field['alias']; + $display = $field['display']; + $type = $field['type']; + $fields_str .= "\n\t{alias: \"$alias\", name: \"$display\", type:\"$type\"}"; + } + $fields_str .= ']'; + + return $fields_str; + } + + public static function getJSworkflowStruct($workflows = null) + { + if (is_null($workflows)) + { + $workflows = SearchHelper::getWorkflows(); + } + + $workflow_str = '['; + $wo=0; + foreach($workflows as $workflow) + { + if ($wo++ > 0) $workflow_str .= ','; + $wid = $workflow['id']; + $name = $workflow['name']; + + $workflow_str .= "\n\t{id:\"$wid\", name: \"$name\", states: [ "; + + $result['workflows'][$wid] = $workflow; + $states = SearchHelper::getWorkflowStates($wid); + $result['workflows'][$wid]['states'] = array(); + $so=0; + foreach($states as $state) + { + if ($so++>0) $workflow_str .= ','; + $sid = $state['id']; + $name=$state['name']; + $result['workflows'][$wid]['states'][$sid] = $state; + $workflow_str .= "\n\t\t{id:\"$wid\", name: \"$name\"}"; + } + $workflow_str .= ']}'; + } + $workflow_str .= ']'; + + return $workflow_str; + } + + public static function getJSfieldsetStruct($fieldsets = null) + { + if (is_null($fieldsets)) + { + $fieldsets = SearchHelper::getFieldsets(); + } + + $fieldset_str = '['; + $fso=0; + foreach($fieldsets as $fieldset) + { + $fsid=$fieldset['id']; + $name = $fieldset['name']; + $desc = $fieldset['description']; + if ($fso++>0) $fieldset_str .= ','; + $fieldset_str .= "\n\t{id:\"$fsid\",name:\"$name\",description:\"$desc\", fields: ["; + + + $result['fieldsets'][$fsid] = $fieldset; + $fields = SearchHelper::getFields($fsid); + $result['fieldsets'][$fsid]['fields'] = array(); + $fo=0; + foreach($fields as $field) + { + if ($fo++ >0) $fieldset_str .= ','; + $fid = $field['id']; + $name= $field['name']; + $desc = $field['description']; + $datatype=$field['datatype']; + $control=$field['control']; + $fieldset_str .= "\n\t\t{id:\"$fid\", name:\"$name\", description:\"$desc\", datatype:\"$datatype\", control:\"$control\", options: ["; + $options = $field['options']; + $oo = 0; + foreach($options as $option) + { + if ($oo++ > 0) $fieldset_str .= ','; + $oid = $option['id']; + $name= $option['name']; + $fieldset_str .= "\n\t\t\t{id: \"$oid\", name: \"$name\"}"; + } + $fieldset_str .= ']}'; + $result['fieldsets'][$fsid]['fields'][$fid] = $field; + } + $fieldset_str .= ']}'; + + } + $fieldset_str .= ']'; + + return $fieldset_str; + } + + + public static function getSavedSearches($userID) + { + $sql = "SELECT id, name FROM search_saved WHERE type='S'"; + + // if we are not the system admin, then we get only ours or shared searches + if (!Permission::userIsSystemAdministrator($userID)) + { + $sql .= " and ( user_id=$userID OR shared=1 ) "; + } + + $rs = DBUtil::getResultArray($sql); + return $rs; + } + + public static function getSearchFields() + { + $registry = ExprFieldRegistry::getRegistry(); + + $fields = $registry->getFields(); + + $results = array(); + foreach($fields as $field ) + { + $type = $field->getInputRequirements(); + $type = $type['value']['type']; + $results[] = array('alias'=>$field->getAlias(), 'display'=>$field->getDisplay(), 'type'=>$type); + } + usort($results, search_alias_compare); + return $results; + } + + public static function getFolder($folderID, $userid) + { + $folder = Folder::get($folderID + 0); + if (PEAR::isError($folder)) + { + return $folder; + } + + if (!Permission::userHasFolderReadPermission($folder)) + { + return new PEAR_Error('no permission to read folder'); + } + + $sql = "SELECT id, name FROM folders WHERE parent_id=$folderID ORDER BY name"; + $rs = DBUtil::getResultArray($sql); + if (PEAR::isError($rs)) + { + return $rs; + } + + $folders = array(); + + foreach($rs as $folder) + { + $fobj = Folder::get($folder['id']); + + if (Permission::userHasFolderReadPermission($fobj)) + { + $folders[] = $folder; + } + } + return $folders; + } + + public static function getFields($fieldsetID) + { + if ($fieldsetID < 0) + { + $documentTypeID = sanitizeForSQL(-$fieldsetID); + $sql = "SELECT + df.id, df.name, df.data_type, df.has_lookup, df.has_lookuptree, df.description + FROM + document_type_fields_link dtfl + INNER JOIN document_fields df on dtfl.field_id=df.id + WHERE + dtfl.document_type_id=$documentTypeID"; + + + } + else + { + $fieldsetID = sanitizeForSQL($fieldsetID); + $sql = "SELECT id, name, data_type, has_lookup, has_lookuptree, description FROM document_fields WHERE parent_fieldset=$fieldsetID"; + } + + $rs = DBUtil::getResultArray($sql); + if (PEAR::isError($rs)) + { + return $rs; + } + if (count($rs) == 0) + { + return new PEAR_Error('Fieldset was not found'); + } + + $result=array(); + foreach($rs as $item) + { + $fieldid=$item['id']; + $type='normal'; + $options = array(); + $haslookup =$item['has_lookup'] + 0 > 0; + $hastree = ($item['has_lookuptree']+0 > 1); + + if ($haslookup || $hastree) + { + $type = 'lookup'; + $sql = "select id, name from metadata_lookup where document_field_id=$fieldid"; + $options = DBUtil::getResultArray($sql); + + } + /*if ($hastree) + { + $type = 'lookup'; + $sql = "select id, name, metadata_lookup_tree_parent as parent from metadata_lookup_tree where document_field_id=$fieldid"; + $options = DBUtil::getResultArray($sql); + }*/ + + if ($item['data_type'] == 'USERLIST') + { + $type = 'lookup'; + $sql = "SELECT id, name from users WHERE disabled=0"; + $options = DBUtil::getResultArray($sql); + } + + $ritem = array( + 'id'=>$fieldid, + 'name'=>$item['name'], + 'description'=>$item['description'], + 'datatype'=>$item['data_type'], + 'control'=>$type, + 'options'=>$options + ); + + $result[]= $ritem; + } + return $result; + } + + public static function getFieldsets() + { + $sql = "SELECT id, name, description FROM fieldsets WHERE disabled=0"; + $rs = DBUtil::getResultArray($sql); + + return $rs; + } + + public static function getDocumentTypeFieldsets($documentTypeID) + { + $documentTypeID = sanitizeForSQL($documentTypeID); + $sql = "SELECT + fs.id, fs.name, fs.description + FROM + fieldsets fs LEFT JOIN document_type_fieldsets_link dtfl ON dtfl.fieldset_id=fs.id + WHERE + fs.disabled=0 AND (dtfl.document_type_id=$documentTypeID OR fs.is_generic=1)"; + $rs = DBUtil::getResultArray($sql); + + return $rs; + } + + public static function getDocumentTypes() + { + $sql = "SELECT id, name from document_types_lookup WHERE disabled=0"; + $rs = DBUtil::getResultArray($sql); + return $rs; + } + + public static function getMimeTypes() { + $sql = "SELECT DISTINCT mimetypes as name FROM mime_types order by mimetypes "; + $rs = DBUtil::getResultArray($sql); + return $rs; + } + + public static function getWorkflows() + { + $sql = "SELECT id, human_name as name FROM workflows WHERE enabled=1"; + $rs = DBUtil::getResultArray($sql); + return $rs; + } + + public static function getUsers() + { + $sql = "SELECT id, name FROM users WHERE disabled=0"; + $rs = DBUtil::getResultArray($sql); + return $rs; + } + + public static function getWorkflowStates($workflowid) + { + $sql = "SELECT id,human_name as name FROM workflow_states WHERE workflow_id=$workflowid"; + $rs = DBUtil::getResultArray($sql); + return $rs; + } + +} + + +function getExpressionLocalityString($expr_str, $locality, $length, $start_offset=10) +{ + if ($locality - $start_offset < 0) + { + $locality = 0; + } + else + { + $locality -= $start_offset; + } + + return substr($expr_str, $locality, $length); +} + +/** + * This parses a query. + * + * @param OpExpr $expr_str + * @return array of MatchResult + */ +function parseExpression($expr_str) +{ + $parser = new SearchCommandParser(); + $lexer = new SearchCommandLexer($expr_str); + +// $parser->PrintTrace(); + $use_internal=false; + + try + { + while ($lexer->yylex()) + { + //print "\n" . $lexer->value . "\n"; + + $parser->doParse($lexer->token, $lexer->value); + + if (!$parser->isExprOk()) + { + $use_internal=true; + $expr_str=getExpressionLocalityString($expr_str, $lexer->offset, 20); + throw new Exception("Parsing problem near '$lexer->value' in '$expr_str' of expression."); + } + } + + // we are now done + $parser->doParse(0, 0); + + if (!$parser->isExprOk()) + { + $use_internal=true; + $expr_str=getExpressionLocalityString($expr_str, $lexer->offset, 20); + throw new Exception("There is a problem parsing the expression '$expr_str'"); + } + + } + catch(ResolutionException $e) + { + throw $e; + } + catch(Exception $e) + { + if ($use_internal) + { + throw $e; + } + $expr_str=getExpressionLocalityString($expr_str, $lexer->offset, 20); + throw new Exception("Parsing problem near '$lexer->value' of expression '$expr_str'."); + } + + return $parser->getExprResult(); +} + + + +?> \ No newline at end of file