diff --git a/search2/ajax/ajax.inc.php b/search2/ajax/ajax.inc.php new file mode 100755 index 0000000..6d8c008 --- /dev/null +++ b/search2/ajax/ajax.inc.php @@ -0,0 +1,218 @@ +$status); + if (isset($message)) + { + $resp['message'] = $message; + } + if (isset($rsName)) + { + $resp[$rsName] = $rs; + } + print json_encode($resp); + exit; + } + + public static function parseQuery($txtQuery, $exitOnSuccess=true) + { + try + { + $expr = parseExpression($txtQuery); + if ($exitOnSuccess) + { + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_SUCCESS ); + } + return $expr; + } + catch(Exception $e) + { + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_PARSE_PROBLEM , $e->getMessage()); + } + } + + public static function updateQuery($iSavedId,$txtQuery, $userID) + { + $txtQuery = sanitizeForSQL($txtQuery); + $iSavedId = sanitizeForSQL($iSavedId); + + $sql = "UPDATE search_saved SET expression='$txtQuery' WHERE id=$iSavedId"; + if (!Permission::userIsSystemAdministrator($userID)) + { + $sql .= " AND user_id = $userID"; + } + $result = DBUtil::runQuery($sql); + if (PEAR::isError($result)) + { + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_INTERNAL ); + } + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_SUCCESS ); + } + + + public static function saveQuery($txtName,$txtQuery, $userID) + { + $lookup = sanitizeForSQL($txtName); + $sql = "select 1 from search_saved where name='$lookup'"; + $result = DBUtil::getResultArray($sql); + if (PEAR::isError($result)) + { + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_INTERNAL ); + } + if (count($result) > 0) + { + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_SAVED_SEARCH_EXISTS, _kt('Search with this name already exists') ); + } + + // autoInsert does escaping... + $values = array( + 'name'=>$txtName, + 'expression'=>$txtQuery, + 'type'=>'S', + 'shared'=>0, + 'user_id' => $userID + ); + + $result = DBUtil::autoInsert('search_saved', $values); + + if (PEAR::isError($result)) + { + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_INTERNAL ); + } + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_SUCCESS ); + } + + public static function getSavedSearches($userID) + { + $rs = SearchHelper::getSavedSearches($userID); + if (PEAR::isError($rs)) + { + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_INTERNAL ); + } + + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_SUCCESS , null, 'searches', $rs); + } + + + + public static function getDocumentTypes() + { + $rs = SearchHelper::getDocumentTypes(); + if (PEAR::isError($rs)) + { + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_INTERNAL, $rs->getMessage() ); + } + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_SUCCESS , null, 'documenttypes', $rs); + } + + public static function getDocumentTypeFieldsets($documentTypeID) + { + $rs = SearchHelper::getDocumentTypeFieldsets($documentTypeID); + + if (PEAR::isError($rs)) + { + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_INTERNAL, $rs->getMessage() ); + } + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_SUCCESS , null, 'fieldsets', $rs); + } + + + public static function getFieldsets() + { + $rs = SearchHelper::getFieldsets(); + if (PEAR::isError($rs)) + { + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_INTERNAL, $rs->getMessage() ); + } + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_SUCCESS , null, 'fieldsets', $rs); + } + + public static function getFields($fieldsetID) + { + $result = SearchHelper::getFields($fieldsetID); + + if (PEAR::isError($result)) + { + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_INTERNAL, $result->getMessage() ); + } + + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_SUCCESS , null, 'fields', $result); + } + + + public static function getFolder($folderID) + { + $userid = AjaxSearchHelper::getSessionUser(); + + $folders = SearchHelper::getFolder($folderID, $userid); + if (PEAR::isError($folders)) + { + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_MISSING_FOLDER, $folders->getMessage() ); + } + + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_SUCCESS , null, 'folders', $folders); + + } + + public static function getSearchFields() + { + $results = SearchHelper::getSearchFields(); + AjaxSearchHelper::createResponse(AjaxSearchHelper::STATUS_SUCCESS , null, 'fields', $results); + } + +} + +?> \ No newline at end of file diff --git a/search2/ajax/metadata.php b/search2/ajax/metadata.php new file mode 100755 index 0000000..26a5ae8 --- /dev/null +++ b/search2/ajax/metadata.php @@ -0,0 +1,24 @@ + \ No newline at end of file diff --git a/search2/ajax/parseExpr.php b/search2/ajax/parseExpr.php new file mode 100755 index 0000000..f469b33 --- /dev/null +++ b/search2/ajax/parseExpr.php @@ -0,0 +1,9 @@ + \ No newline at end of file diff --git a/search2/ajax/saveExpr.php b/search2/ajax/saveExpr.php new file mode 100755 index 0000000..39d63d9 --- /dev/null +++ b/search2/ajax/saveExpr.php @@ -0,0 +1,40 @@ + \ No newline at end of file diff --git a/search2/ajax/savedSearches.php b/search2/ajax/savedSearches.php new file mode 100755 index 0000000..1c0a010 --- /dev/null +++ b/search2/ajax/savedSearches.php @@ -0,0 +1,9 @@ + \ No newline at end of file diff --git a/search2/ajax/searchFields.php b/search2/ajax/searchFields.php new file mode 100755 index 0000000..c4c22ed --- /dev/null +++ b/search2/ajax/searchFields.php @@ -0,0 +1,8 @@ + \ No newline at end of file diff --git a/search2/ajax/treeNodes.php b/search2/ajax/treeNodes.php new file mode 100755 index 0000000..d8ec8a5 --- /dev/null +++ b/search2/ajax/treeNodes.php @@ -0,0 +1,8 @@ + \ No newline at end of file diff --git a/search2/images/kn.png b/search2/images/kn.png new file mode 100755 index 0000000..9773621 --- /dev/null +++ b/search2/images/kn.png diff --git a/search2/images/o-red.png b/search2/images/o-red.png new file mode 100755 index 0000000..15b4571 --- /dev/null +++ b/search2/images/o-red.png diff --git a/search2/images/o-yellow.png b/search2/images/o-yellow.png new file mode 100755 index 0000000..44d1a81 --- /dev/null +++ b/search2/images/o-yellow.png diff --git a/search2/images/wledgetree.png b/search2/images/wledgetree.png new file mode 100755 index 0000000..6854deb --- /dev/null +++ b/search2/images/wledgetree.png diff --git a/search2/indexing/bin/cronIndexer.php b/search2/indexing/bin/cronIndexer.php new file mode 100755 index 0000000..0370a8e --- /dev/null +++ b/search2/indexing/bin/cronIndexer.php @@ -0,0 +1,9 @@ +indexDocuments(); + +?> \ No newline at end of file diff --git a/search2/indexing/bin/diagnose.php b/search2/indexing/bin/diagnose.php new file mode 100755 index 0000000..d44105c --- /dev/null +++ b/search2/indexing/bin/diagnose.php @@ -0,0 +1,11 @@ +diagnose(); + +var_dump($diagnoses); + +?> \ No newline at end of file diff --git a/search2/indexing/bin/optimise.php b/search2/indexing/bin/optimise.php new file mode 100755 index 0000000..8c98457 --- /dev/null +++ b/search2/indexing/bin/optimise.php @@ -0,0 +1,9 @@ +optimise(); + +?> \ No newline at end of file diff --git a/search2/indexing/bin/recreateIndex.php b/search2/indexing/bin/recreateIndex.php new file mode 100755 index 0000000..2e5bb32 --- /dev/null +++ b/search2/indexing/bin/recreateIndex.php @@ -0,0 +1,20 @@ + \ No newline at end of file diff --git a/search2/indexing/bin/registerTypes.php b/search2/indexing/bin/registerTypes.php new file mode 100755 index 0000000..fda3fe6 --- /dev/null +++ b/search2/indexing/bin/registerTypes.php @@ -0,0 +1,9 @@ +registerTypes(true); + +?> \ No newline at end of file diff --git a/search2/indexing/extractorCore.inc.php b/search2/indexing/extractorCore.inc.php new file mode 100755 index 0000000..61f7c41 --- /dev/null +++ b/search2/indexing/extractorCore.inc.php @@ -0,0 +1,631 @@ +needsIntermediate=false; + $this->extractionStatus = null; + $this->indexStatus = null; + } + + /** + * Sets the status of the indexing. + * + * @param unknown_type $status + */ + public function setIndexingStatus($status) + { + $this->indexStatus = $status; + } + /** + * Returns the indexing status. + * + * @return boolean + */ + public function getIndexingStatus() + { + return $this->indexStatus; + } + + /** + * Sets the extraction status. + * + * @param boolean $status + */ + public function setExtractionStatus($status) + { + $this->extractionStatus = $status; + } + /** + * Return the extraction status. + * + * @return boolean + */ + public function getExtractionStatus() + { + return $this->extractionStatus; + } + + /** + * This associates all the mime types associated with the extractor class. + * + */ + public function registerMimeTypes() + { + $types = $this->getSupportedMimeTypes(); + if (empty($types)) + { + return; + } + $classname=get_class($this); + + foreach($types as $type) + { + $sql = "update mime_types set extractor='$classname' where mimetypes='$type' and extractor is null"; + DBUtil::runQuery($sql); + } + } + + /** + * Indicates if an intermediate file is required. + * + * @param $value boolean Optional. If set, we set the value. + * @return boolean + */ + public function needsIntermediateSourceFile($value = null) + { + if (!is_null($value)) + { + $this->needsIntermediate = $value; + } + return $this->needsIntermediate; + } + + /** + * Sets the source filename for the document extractor. + * + * @param string $sourcefile + */ + public function setSourceFile($sourcefile) + { + $this->sourcefile=$sourcefile; + } + + /** + * Returns the source file name. + * + * @return string + */ + public function getSourceFile() { return $this->sourcefile; } + + /** + * Sets the source file's mime type. + * + * @param string $mimetype + */ + public function setMimeType($mimetype) + { + $this->mimetype=$mimetype; + } + /** + * Returns the mime type for the source file. + * + * @return string + */ + public function getMimeType() { return $this->mimetype; } + + /** + * Indicates the extension for the source file. + * + * @param string $extension + */ + public function setExtension($extension) + { + $this->extension=$extension; + } + /** + * Returns the extension of the source file. + * + * @return string + */ + public function getExtension() { return $this->extension; } + + /** + * Sets the file name of the target text file. + * + * @param string $targetfile + */ + public function setTargetFile($targetfile) + { + $this->targetfile=$targetfile; + } + + /** + * Gets the file name of the target text file containing the extracted text. + * + * @return unknown + */ + public function getTargetFile() { return $this->targetfile; } + + /** + * Filter function that may be applied after extraction. This may be overridden. + * + * @param string $text + * @return string + */ + protected function filter($text) + { + return $text; + } + + /** + * Set the document that will be indexed. + * + * @param Document $document + */ + public function setDocument($document) + { + $this->document = $document; + } + + /** + * Returns a reference to the document. + * + * @return string + */ + public function getDocument() + { + return $this->document; + } + + /** + * Returns an array of supported mime types. + * e.g. return array('plain/text'); + * + * + * @return array + * + */ + public abstract function getSupportedMimeTypes(); + + /** + * Extracts the content from the source file. + * + * @return boolean + */ + public abstract function extractTextContent(); + + /** + * Returns a friendly name for the document text extractor. + * + * @return string + */ + public abstract function getDisplayName(); + + /** + * Attempts to diagnose any problems with the indexing process. + * + * @return string + */ + public abstract function diagnose(); + +} + +/** + * This class extends the document extractor to execute some command line application. + * The getCommandLine() method needs to be overridden. + * + */ +abstract class ExternalDocumentExtractor extends DocumentExtractor +{ + /** + * Initialise the extractor. + * + */ + public function __construct() + { + parent::__construct(); + putenv('LANG=en_US.UTF-8'); + } + + /** + * Executes a command. Returns true if successful. + * + * @param string $cmd A command line instruction. + * @return boolean + */ + protected function exec($cmd) + { + $aRet = KTUtil::pexec($cmd); + return $aRet['ret'] == 0; + } + + /** + * Returns the command line string to be executed. + * The command returned should include the target filename. + * + * @return string + */ + protected function getCommandLine() + { + throw new Exception('getCommandLine is not implemented'); + } + + /** + * Executes the command that executes the command. + * Returns true if success. + * + * @return boolean + */ + public function extractTextContent() + { + global $default; + + $cmdline = $this->getCommandLine(); + + $class = get_class($this); + $default->log->debug("$class: " . $cmdline); + + return $this->exec($cmdline); + } + +} + +/** + * An extension to the extenal document extractor. A derived class simply needs + * to implement a constructor and getSupportedMimeTypes(). + * + */ +abstract class ApplicationExtractor extends ExternalDocumentExtractor +{ + /** + * The full path to the application that will be run. This will be resolved from + * the path or using the config file. + * + * @var string + */ + private $application; + /** + * The command name of the application that can be run. + * + * @var string + */ + private $command; + /** + * This is the friendly name for the extractor. + * + * @var string + */ + private $displayname; + /** + * The command line parameters for the application. + * This may include {source} and {target} where substitutions will be done. + * + * @var string + */ + private $params; + + /** + * Initialise the extractor. + * + * @param string $section The section in the config file. + * @param string $appname The application name in the config file. + * @param string $command The command that can be run. + * @param string $displayname + * @param string $params + */ + public function __construct($section, $appname, $command, $displayname, $params) + { + parent::__construct(); + + $this->application = KTUtil::findCommand("$section/$appname", $command); + $this->command = $command; + $this->displayname = $displayname; + $this->params = $params; + } + + /** + * Return the display name. + * + * @return string + */ + public function getDisplayName() + { + return _kt($this->displayname); + } + + /** + * Returns the command line after performing substitutions. + * + * @return unknown + */ + protected function getCommandLine() + { + $sources = array('{source}','{target}'); + $target = array($this->sourcefile, $this->targetfile); + $cmdline = $this->command . ' ' . str_replace($sources,$target, $params); + + return $cmdline; + } + + /** + * Identifies if there are any circumstances why the command can not run that could result in the text extraction process + * failing. + * + * @return mixed Returns string if there is a problem, null otherwise. + */ + public function diagnose() + { + if (false === $this->application) + { + return _kt("Cannot locate binary for $this->displayname ($this->command)."); + } + + return null; + } +} + +abstract class TextExtractor extends DocumentExtractor +{ + /** + * This extracts the text from the document. + * + * @return boolean + */ + public function extractTextContent() + { + $content = file_get_contents($this->sourcefile); + if (false === $content) + { + return false; + } + + $result = file_put_contents($this->targetfile, $this->filter($content)); + + return false !== $result; + } + + /** + * There are no external dependancies to diagnose. + * + * @return null + */ + public function diagnose() + { + return null; + } + +} + +/** + * The composite extractor implies that a conversion is done to an intermediate form before another extractor is run. + * + */ +abstract class CompositeExtractor extends DocumentExtractor +{ + /** + * The initial extractor + * + * @var DocumentExtractor + */ + private $sourceExtractor; + /** + * The text extractor + * + * @var DocumentExtractor + */ + private $targetExtractor; + /** + * The extension for the initial extraction + * + * @var string + */ + private $targetExtension; + /** + * The mime type of the initial extraction. + * + * @var string + */ + private $targetMimeType; + + public function __construct($sourceExtractor, $targetExtension, $targetMimeType, $targetExtractor, $needsIntermediate) + { + $this->sourceExtractor = $sourceExtractor; + $this->targetExtractor = $targetExtractor; + $this->targetExtension = $targetExtension; + $this->targetMimeType = $targetMimeType; + $this->needsIntermediateSourceFile($needsIntermediate); + } + + /** + * Extracts the content of the document + * + * @return string + */ + public function extractTextContent() + { + $intermediateFile = $this->targetfile . '.' . $this->targetExtension; + + $this->sourceExtractor->setSourceFile($this->sourcefile); + $this->sourceExtractor->setTargetFile($intermediateFile); + $this->sourceExtractor->setMimeType($this->mimetype); + $this->sourceExtractor->setExtension($this->extension); + if ($this->sourceExtractor->extractTextContent()) + { + return false; + } + + $this->targetExtractor->setSourceFile($intermediateFile); + $this->targetExtractor->setTargetFile($this->targetfile); + $this->targetExtractor->setMimeType($this->targetMimeType); + $this->targetExtractor->setExtension($this->targetExtension); + $result = $this->targetExtractor->extractTextContent(); + + unlink(@$intermediateFile); + + return $result; + } + + /** + * Diagnose the extractors + * + * @return mixed + */ + public function diagnose() + { + $diagnosis = $this->sourceExtractor->diagnose(); + if (!empty($diagnosis)) + { + return $diagnosis; + } + + $diagnosis = $this->targetExtractor->diagnose(); + if (!empty($diagnosis)) + { + return $diagnosis; + } + + return null; + } +} + + +/** + * The purpose of an extractor hook is to effect the + * + */ +abstract class ExtractorHook +{ + /** + * Returns an array of supported mime types. + * e.g. return array('plain/text'); + * + * + * @return array + * + */ + public abstract function getSupportedMimeTypes(); + + /** + * Returns the friendly name for the hook. + * + * @return string + */ + public abstract function getDisplayName(); + + /** + * This does a basic diagnosis on the hook. + * + * @return string + */ + public function diagnose() + { + return null; + } + + /** + * Perform any pre extraction activities. + * + * @param DocumentExtractor $extractor + */ + public function pre_extract($extractor) + { + } + + /** + * Perform any post extraction activities. + * + * @param DocumentExtractor $extractor + */ + public function post_extract($extractor) + { + + } + + /** + * Perform any pre indexing activities. + * + * @param DocumentExtractor $extractor + */ + public function pre_index($extractor) + { + + } + + /** + * Perform any post indexing activities. + * + * @param DocumentExtractor $extractor + */ + public function post_index($extractor) + { + + } +} + +?> \ No newline at end of file diff --git a/search2/indexing/extractors/MailMimeExtractor.inc.php b/search2/indexing/extractors/MailMimeExtractor.inc.php new file mode 100755 index 0000000..9dee5df --- /dev/null +++ b/search2/indexing/extractors/MailMimeExtractor.inc.php @@ -0,0 +1,17 @@ + \ No newline at end of file diff --git a/search2/indexing/extractors/OOPDFTextExtractor.inc.php b/search2/indexing/extractors/OOPDFTextExtractor.inc.php new file mode 100755 index 0000000..442601f --- /dev/null +++ b/search2/indexing/extractors/OOPDFTextExtractor.inc.php @@ -0,0 +1,101 @@ +pdf2txt = new PDFExtractor(); + $this->text2pdf = new OOTextExtractor(); + } + + public function needsIntermediateSourceFile() + { + // we need the intermediate file because it + // has the correct extension. jodconverter uses the extension to determine mimetype + return true; + } + + public function getDisplayName() + { + throw new Exception('This should be overriden'); + } + + public function getSupportedMimeTypes() + { + return array(); + } + + public function extractTextContent() + { + $pdffile = $this->targetfile . '.pdf'; + + $this->text2pdf->setSourceFile($this->sourcefile); + $this->text2pdf->setTargetFile($pdffile); + $this->text2pdf->setMimeType($this->mimetype); + $this->text2pdf->setExtension($this->extension); + if ($this->extractTextContent()) + { + return false; + } + + $this->pdf2txt->setSourceFile($pdffile); + $this->pdf2txt->setTargetFile($this->targetfile); + $this->pdf2txt->setMimeType('application/pdf'); + $this->pdf2txt->setExtension('pdf'); + $result = $this->pdf2txt->extractTextContent(); + + unlink(@$pdffile); + + return $result; + } + + public function diagnose() + { + $diagnosis = $this->pdf2txt->diagnose(); + if (!empty($diagnosis)) + { + return $diagnosis; + } + + $diagnosis = $this->text2pdf->diagnose(); + if (!empty($diagnosis)) + { + return $diagnosis; + } + + return null; + } +} */ + +?> \ No newline at end of file diff --git a/search2/indexing/extractors/OOPresentationExtractor.inc.php b/search2/indexing/extractors/OOPresentationExtractor.inc.php new file mode 100755 index 0000000..e832cc9 --- /dev/null +++ b/search2/indexing/extractors/OOPresentationExtractor.inc.php @@ -0,0 +1,21 @@ + \ No newline at end of file diff --git a/search2/indexing/extractors/OOSpreadsheetExtractor.inc.php b/search2/indexing/extractors/OOSpreadsheetExtractor.inc.php new file mode 100755 index 0000000..67d6039 --- /dev/null +++ b/search2/indexing/extractors/OOSpreadsheetExtractor.inc.php @@ -0,0 +1,25 @@ + \ No newline at end of file diff --git a/search2/indexing/extractors/OOTextExtractor.inc.php b/search2/indexing/extractors/OOTextExtractor.inc.php new file mode 100755 index 0000000..dbdbfd0 --- /dev/null +++ b/search2/indexing/extractors/OOTextExtractor.inc.php @@ -0,0 +1,81 @@ +converter = KTUtil::findCommand('extractors/jodconverter', 'jodconverter'); + $this->javaPath = KTUtil::findCommand('extractors/java', 'java'); + $this->ooHost = $config->get('openoffice/host', 'localhost'); + $this->ooPort = $config->get('openoffice/port', 8100); + $this->targetMimeType = $targetMimeType; + } + + public function getDisplayName() + { + return _kt('OpenOffice Text Extractor'); + } + + public function getSupportedMimeTypes() + { + return array( + 'text/rtf', + 'application/vnd.oasis.opendocument.text', + 'application/vnd.oasis.opendocument.text-template', + 'application/vnd.oasis.opendocument.text-web', + 'application/vnd.oasis.opendocument.text-master', + 'application/vnd.sun.xml.writer', + 'application/vnd.sun.xml.writer.template', + 'application/vnd.sun.xml.writer.global', + ); + } + + public function needsIntermediateSourceFile() + { + // we need the intermediate file because it + // has the correct extension. jodconverter uses the extension to determine mimetype + return true; + } + + protected function getCommandLine() + { + $cmdline = "$this->javaPath -jar $this->converter $this->sourcefile $this->mimetype $this->targetfile $this->targetMimeType $this->ooHost $this->ooPort"; + return $cmdline; + } + + public function diagnose() + { + if (false === $this->converter) + { + return _kt('Cannot locate jodconverter'); + } + + if (false === $this->javaPath) + { + return _kt('Cannot locate java'); + } + + + + $connection = @fsockopen($this->ooHost, $this->ooPort,$errno, $errstr,5 ); + if (false === $connection) + { + return _kt('Cannot connect to openoffice host'); + } + fclose($connection); + + + return null; + } +} + +?> \ No newline at end of file diff --git a/search2/indexing/extractors/PDFExtractor.inc.php b/search2/indexing/extractors/PDFExtractor.inc.php new file mode 100755 index 0000000..a504071 --- /dev/null +++ b/search2/indexing/extractors/PDFExtractor.inc.php @@ -0,0 +1,16 @@ + \ No newline at end of file diff --git a/search2/indexing/extractors/PSExtractor.inc.php b/search2/indexing/extractors/PSExtractor.inc.php new file mode 100755 index 0000000..b9c5aa7 --- /dev/null +++ b/search2/indexing/extractors/PSExtractor.inc.php @@ -0,0 +1,16 @@ + \ No newline at end of file diff --git a/search2/indexing/extractors/PlainTextExtractor.inc.php b/search2/indexing/extractors/PlainTextExtractor.inc.php new file mode 100755 index 0000000..7ce4ea9 --- /dev/null +++ b/search2/indexing/extractors/PlainTextExtractor.inc.php @@ -0,0 +1,17 @@ + \ No newline at end of file diff --git a/search2/indexing/extractors/ScriptExtractor.inc.php b/search2/indexing/extractors/ScriptExtractor.inc.php new file mode 100755 index 0000000..09305d9 --- /dev/null +++ b/search2/indexing/extractors/ScriptExtractor.inc.php @@ -0,0 +1,17 @@ + \ No newline at end of file diff --git a/search2/indexing/extractors/XMLExtractor.inc.php b/search2/indexing/extractors/XMLExtractor.inc.php new file mode 100755 index 0000000..2d7a2fd --- /dev/null +++ b/search2/indexing/extractors/XMLExtractor.inc.php @@ -0,0 +1,21 @@ +]*>)+@", " ", $text); + } +} + +?> \ No newline at end of file diff --git a/search2/indexing/indexerCore.inc.php b/search2/indexing/indexerCore.inc.php new file mode 100755 index 0000000..e600e10 --- /dev/null +++ b/search2/indexing/indexerCore.inc.php @@ -0,0 +1,942 @@ +document_id=$document_id; + $this->rank= $rank; + $this->title=$title; + $this->text = $text; + $this->loadDocumentInfo(); + } + + protected function __isset($property) + { + switch($property) + { + case 'DocumentID': return isset($this->document_id); + case 'Rank': return isset($this->rank); + case 'Text': return isset($this->text); + case 'Title': return isset($this->title); + case null: break; + default: + throw new Exception("Unknown property '$property' to get on MatchResult"); + } + } + + private function loadDocumentInfo() + { + $sql = "SELECT + f.full_path, f.name, dcv.size as filesize, dcv.major_version, + dcv.minor_version, dcv.filename, cou.name as checkoutuser, w.human_name as workflow, ws.human_name as workflowstate + + FROM + documents d + INNER JOIN document_metadata_version dmv ON d.metadata_version_id = dmv.id + INNER JOIN document_content_version dcv ON dmv.content_version_id = dcv.id + LEFT JOIN folders f ON f.id=d.folder_id + LEFT JOIN users cou ON d.checked_out_user_id=cou.id + LEFT JOIN workflows w ON dmv.workflow_id=w.id + LEFT JOIN workflow_states ws ON dmv.workflow_state_id = ws.id + WHERE + d.id=$this->document_id"; + + $result = DBUtil::getOneResult($sql); + + if (PEAR::isError($result) || empty($result)) + { + $this->live = false; + return; + } + + $this->live = true; + if (is_null($result['name'])) + { + $this->fullpath = '(orphaned)'; + } + else + { + $this->fullpath = $result['full_path'] . '/' . $result['name']; + if (substr($this->fullpath,0,1) == '/') $this->fullpath = substr($this->fullpath,1); + } + + + $this->filesize = $result['filesize'] + 0; + + if ($this->filesize > 1024 * 1024 * 1024) + { + $this->filesize = floor($this->filesize / (1024 * 1024 * 1024)) . 'g'; + } + elseif ($this->filesize > 1024 * 1024) + { + $this->filesize = floor($this->filesize / (1024 * 1024)) . 'm'; + } + elseif ($this->filesize > 1024) + { + $this->filesize = floor($this->filesize / (1024)) . 'k'; + } + else + { + $this->filesize .= 'b'; + } + + $this->version = $result['major_version'] . '.' . $result['minor_version']; + $this->filename=$result['filename']; + $this->checkoutuser = $result['checkoutuser']; + $this->workflow = $result['workflow']; + $this->workflowstate = $result['workflowstate']; + + } + + + + protected function __get($property) + { + switch($property) + { + case 'DocumentID': return $this->document_id; + case 'Rank': return $this->rank; + case 'Text': return $this->text; + case 'Title': return $this->title; + case 'FullPath': return $this->fullpath; + case 'IsLive': return $this->live; + case 'Filesize': return $this->filesize; + case 'Version': return $this->version; + case 'Filename': return $this->filename; + case 'Document': + if (is_null($this->document)) + $this->document = Document::get($this->document_id); + return $this->document; + case 'IsAvailable': + return $this->Document->isLive(); + + case 'CheckedOutUser': + return $this->checkoutuser; + case 'Workflow': + if (is_null($this->workflow)) + { + return ''; + } + return "$this->workflow - $this->workflowstate"; + case null: break; + default: + throw new Exception("Unknown property '$property' to get on MatchResult"); + } + } + + protected function __set($property, $value) + { + switch($property) + { + case 'Rank': $this->rank = number_format($value,2,'.',','); break; + case 'Text': $this->text = $value; break; + default: + throw new Exception("Unknown property '$property' to set on MatchResult"); + } + } +} + +function MatchResultCompare($a, $b) +{ + if ($a->Rank == $b->Rank) { + return 0; + } + return ($a->Rank < $b->Rank) ? -1 : 1; +} + +class QueryResultItem extends MatchResult +{ + protected $discussion; + + public function __construct($document_id, $rank, $title, $text, $discussion) + { + parent::__construct($document_id, $rank, $title, $text); + $this->discussion=$discussion; + } + + protected function __isset($property) + { + switch($property) + { + case 'Discussion': return isset($this->discussion); + default: return parent::__isset($property); + } + } + + protected function __get($property) + { + switch($property) + { + case 'Discussion': return $this->discussion; + default: return parent::__get($property); + } + } +} + +abstract class Indexer +{ + /** + * Cache of extractors + * + * @var array + */ + private $extractorCache; + + /** + * Indicates if the indexer will do logging. + * + * @var boolean + */ + private $debug; + /** + * Cache on mime related hooks + * + * @var unknown_type + */ + private $mimeHookCache; + /** + * Cache on general hooks. + * + * @var array + */ + private $generalHookCache; + + /** + * This is a path to the extractors. + * + * @var string + */ + private $extractorPath; + /** + * This is a path to the hooks. + * + * @var string + */ + private $hookPath; + + /** + * Initialise the indexer + * + */ + protected function __construct() + { + $this->extractorCache=array(); + $this->debug=true; + $this->hookCache = array(); + $this->generalHookCache = array(); + + $config = KTConfig::getSingleton(); + + $this->extractorPath = $config->get('indexer/extractorPath', 'extractors'); + $this->hookPath = $config->get('indexer/extractorHookPath','extractorHooks'); + } + + /** + * Returns a reference to the main class + * + * @return Indexer + */ + public static function get() + { + static $singleton = null; + + if (is_null($singleton)) + { + $config = KTConfig::getSingleton(); + $classname = $config->get('indexer/coreClass'); + + require_once('indexing/indexers/' . $classname . '.inc.php'); + + if (!class_exists($classname)) + { + throw new Exception("Class '$classname' does not exist."); + } + + $singleton = new $classname; + } + + return $singleton; + } + + public abstract function deleteDocument($docid); + + /** + * Remove the association of all extractors to mime types on the database. + * + */ + public function clearExtractors() + { + global $default; + $sql = "update mime_types set extractor=null"; + DBUtil::runQuery($sql); + + $default->log->debug('clearExtractors'); + } + + /** + * lookup the name of the extractor class based on the mime type. + * + * @param string $type + * @return string + */ + public static function resolveExtractor($type) + { + global $default; + $sql = "select extractor from mime_types where filetypes='$type'"; + $class = DBUtil::getOneResultKey($sql,'extractor'); + if (PEAR::isError($class)) + { + $default->log->error("resolveExtractor: cannot resolve $type"); + return $class; + } + if ($this->debug) $default->log->debug("resolveExtractor: Resolved '$class' from mime type '$type'."); + return $class; + } + + /** + * Return all the discussion text. + * + * @param int $docid + * @return string + */ + public static function getDiscussionText($docid) + { + $sql = "SELECT + dc.subject, dc.body + FROM + discussion_threads dt + INNER JOIN discussion_comments dc ON dc.thread_id=dt.id AND dc.id BETWEEN dt.first_comment_id AND dt.last_comment_id + WHERE + dt.document_id=$docid"; + $result = DBUtil::getResultArray($sql); + $text = ''; + + foreach($result as $record) + { + $text .= $record['subject'] . "\n" . $record['body'] . "\n"; + } + + return $text; + } + + /** + * Schedule the indexing of a document. + * + * @param string $document + * @param string $what + */ + public static function index($document, $what='C') + { + global $default; + + $document_id = $document->getId(); + $userid=$_SESSION['userID']; + if (empty($userid)) $userid=1; + + // we dequeue the document so that there are no issues when enqueuing + Indexer::unqueueDocument($document_id); + + // enqueue item + $sql = "INSERT INTO index_files(document_id, user_id, what) VALUES($document_id, $userid, '$what')"; + DBUtil::runQuery($sql); + +// if ($this->debug) $default->log->debug("index: Queuing indexing of $document_id"); + } + + + public static function indexAll() + { + $userid=$_SESSION['userID']; + if (empty($userid)) $userid=1; + $sql = "INSERT INTO index_files(document_id, user_id, what) SELECT id, $userid, 'C' FROM documents WHERE status_id=1"; + DBUtil::runQuery($sql); + } + + /** + * Clearout the scheduling of documents that no longer exist. + * + */ + public static function clearoutDeleted() + { + global $default; + + $sql = 'DELETE FROM + index_files AS iff USING index_files AS iff, documents + WHERE + NOT EXISTS( + SELECT + d.id + FROM + documents AS d + INNER JOIN document_metadata_version dmv ON d.metadata_version_id=dmv.id + WHERE + iff.document_id = d.id OR dmv.status_id=3 + );'; + DBUtil::runQuery($sql); + + // if ($this->debug) $default->log->debug("clearoutDeleted: remove documents"); + } + + + /** + * Check if a document is scheduled to be indexed + * + * @param mixed $document This may be a document or document id + * @return boolean + */ + public static function isDocumentScheduled($document) + { + if (is_numeric($document)) + { + $docid = $document; + } + else if ($document instanceof Document) + { + $docid = $document->getId(); + } + else + { + return false; + } + $sql = "SELECT 1 FROM index_files WHERE document_id=$docid"; + $result = DBUtil::getResultArray($sql); + return count($result) > 0; + } + + /** + * Filters text removing redundant characters such as continuous newlines and spaces. + * + * @param string $filename + */ + private function filterText($filename) + { + $content = file_get_contents($filename); + + $src = array("([\r\n])","([\n][\n])","([\n])","([\t])",'([ ][ ])'); + $tgt = array("\n","\n",' ',' ',' '); + + // shrink what is being stored. + do + { + $orig = $content; + $content = preg_replace($src, $tgt, $content); + } while ($content != $orig); + + return file_put_contents($filename, $content); + } + + /** + * Load hooks for text extraction process. + * + */ + private function loadExtractorHooks() + { + $this->generalHookCache = array(); + $this->mimeHookCache = array(); + + $dir = opendir($this->hookPath); + while (($file = readdir($dir)) !== false) + { + if (substr($file,-12) == 'Hook.inc.php') + { + require_once($this->hookPath . '/' . $file); + $class = substr($file, 0, -8); + + if (!class_exists($class)) + { + continue; + } + + $hook = new $class; + if (!($class instanceof ExtractorHook)) + { + continue; + } + + $mimeTypes = $hook->registerMimeTypes(); + if (is_null($mimeTypes)) + { + $this->generalHookCache[] = & $hook; + } + else + { + foreach($mimeTypes as $type) + { + $this->mimeHookCache[$type][] = & $hook; + } + } + + } + } + closedir($dir); + } + + /** + * This is a refactored function to execute the hooks. + * + * @param DocumentExtractor $extractor + * @param string $phase + * @param string $mimeType Optional. If set, indicates which hooks must be used, else assume general. + */ + private function executeHook($extractor, $phase, $mimeType = null) + { + $hooks = array(); + if (is_null($mimeType)) + { + $hooks = $this->generalHookCache; + } + else + { + if (array_key_exists($mimeType, $this->mimeHookCache)) + { + $hooks = $this->mimeHookCache[$mimeType]; + } + } + if (empty($hooks)) + { + return; + } + + foreach($hooks as $hook) + { + $hook->$phase($extractor); + } + } + + /** + * The main function that may be called repeatedly to index documents. + * + * @param int $max Default 20 + */ + public function indexDocuments($max=null) + { + global $default; + + $config =& KTConfig::getSingleton(); + + if (is_null($max)) + { + $max = $config->get('indexer/batchDocuments',20); + } + + $this->loadExtractorHooks(); + + Indexer::clearoutDeleted(); + + // identify the indexers that must run + // mysql specific limit! + $sql = "SELECT + iff.document_id, mt.filetypes, mt.mimetypes, mt.extractor, iff.what + FROM + index_files iff + INNER JOIN documents d ON iff.document_id=d.id + INNER JOIN document_metadata_version dmv ON d.metadata_version_id=dmv.id + INNER JOIN document_content_version dcv ON dmv.content_version_id=dcv.id + INNER JOIN mime_types mt ON dcv.mime_id=mt.id + WHERE + iff.processdate IS NULL AND dmv.status_id=1 + ORDER BY indexdate + LIMIT $max"; + $result = DBUtil::getResultArray($sql); + if (PEAR::isError($result)) + { + return; + } + + // bail if no work to do + if (count($result) == 0) + { + return; + } + + // identify any documents that need indexing and mark them + // so they are not taken in a followup run + $ids = array(); + foreach($result as $docinfo) + { + $ids[] = $docinfo['document_id']; + } + + // mark the documents as being processed + $date = date('Y-m-d H:j:s'); + $ids=implode(',',$ids); + $sql = "UPDATE index_files SET processdate='$date' WHERE document_id in ($ids)"; + DBUtil::runQuery($sql); + + $extractorCache = array(); + $storageManager = KTStorageManagerUtil::getSingleton(); + + $tempPath = $config->get("urls/tmpDirectory"); + + foreach($result as $docinfo) + { + $docId=$docinfo['document_id']; + $extension=$docinfo['filetypes']; + $mimeType=$docinfo['mimetypes']; + $extractorClass=$docinfo['extractor']; + $indexDocument = in_array($docinfo['what'], array('A','C')); + $indexDiscussion = in_array($docinfo['what'], array('A','D')); + + if ($this->debug) $default->log->debug("Indexing docid: $docId extension: '$extension' mimetype: '$mimeType' extractor: '$extractorClass'"); + + if (empty($extractorClass)) + { + if ($this->debug) $default->log->debug("No extractor for docid: $docId"); + + Indexer::unqueueDocument($docId); + continue; + } + + if ($this->debug) print "Processing document $docId.\n"; + if ($indexDocument) + { + if (array_key_exists($extractorClass, $extractorCache)) + { + $extractor = $extractorCache[$extractorClass]; + } + else + { + require_once('extractors/' . $extractorClass . '.inc.php'); + + if (!class_exists($extractorClass)) + { + $default->log->error("indexDocuments: extractor '$extractorClass' does not exist."); + continue; + } + + $extractor = $extractorCache[$extractorClass] = new $extractorClass(); + } + + if (is_null($extractor)) + { + $default->log->error("indexDocuments: extractor '$extractorClass' not resolved - it is null."); + continue; + } + + if (!($extractor instanceof DocumentExtractor)) + { + $default->log->error("indexDocuments: extractor '$extractorClass' is not a document extractor class."); + continue; + } + + $document = Document::get($docId); + $sourceFile = $storageManager->temporaryFile($document); + + if (empty($sourceFile) || !is_file($sourceFile)) + { + $default->log->error("indexDocuments: source file '$sourceFile' for document $docId does not exist."); + Indexer::unqueueDocument($docId); + continue; + } + + if ($extractor->needsIntermediateSourceFile()) + { + $intermediate = $tempPath . '/'. $document->getFileName(); + $result = @copy($sourceFile, $intermediate); + if ($result === false) + { + $default->log->error("Could not create intermediate file from document $docid"); + // problem. lets try again later. probably permission related. log the issue. + continue; + } + $sourceFile = $intermediate; + } + + $targetFile = tempnam($tempPath, 'ktindexer') . '.txt'; + + $extractor->setSourceFile($sourceFile); + $extractor->setMimeType($mimeType); + $extractor->setExtension($extension); + $extractor->setTargetFile($targetFile); + $extractor->setDocument($document); + $extractor->setIndexingStatus(null); + $extractor->setExtractionStatus(null); + if ($this->debug) $default->log->debug("Extra Info docid: $docId Source File: '$sourceFile' Target File: '$targetFile'"); + + $this->executeHook($extractor, 'pre_extract'); + $this->executeHook($extractor, 'pre_extract', $mimeType); + + if ($extractor->extractTextContent()) + { + $extractor->setExtractionStatus(true); + $this->executeHook($extractor, 'pre_index'); + $this->executeHook($extractor, 'pre_index', $mimeType); + + $title = $document->getName(); + if ($indexDiscussion) + { + $indexStatus = $this->indexDocumentAndDiscussion($docId, $targetFile, $title); + + if (!$indexStatus) $default->log->error("Problem indexing document $docId"); + + $extractor->setIndexingStatus($indexStatus); + } + else + { + if (!$this->filterText($targetFile)) + { + $default->log->error("Problem filtering document $docId"); + } + else + { + $indexStatus = $this->indexDocument($docId, $targetFile, $title); + + if (!$indexStatus) $default->log->error("Problem indexing document $docId"); + + $extractor->setIndexingStatus($indexStatus); + } + } + + $this->executeHook($extractor, 'post_index', $mimeType); + $this->executeHook($extractor, 'post_index'); + } + else + { + $extractor->setExtractionStatus(false); + $default->log->error("Could not extract contents from document $docId"); + } + + $this->executeHook($extractor, 'post_extract', $mimeType); + $this->executeHook($extractor, 'post_extract'); + + if ($extractor->needsIntermediateSourceFile()) + { + @unlink($sourceFile); + } + + @unlink($targetFile); + } + else + { + $this->indexDiscussion($docId); + } + + Indexer::unqueueDocument($docId); + if ($this->debug) $default->log->debug("Done indexing docid: $docId"); + + } + if ($this->debug) print "Done.\n"; + } + + /** + * Index a document. The base class must override this function. + * + * @param int $docId + * @param string $textFile + */ + protected abstract function indexDocument($docId, $textFile, $title=''); + + /** + * Index a discussion. The base class must override this function. + * + * @param int $docId + */ + protected abstract function indexDiscussion($docId); + + /** + * Diagnose the extractors. + * + * @return array + */ + public function diagnose() + { + $diagnosis = $this->_diagnose($this->extractorPath, 'DocumentExtractor', 'Extractor.inc.php'); + $diagnosis = array_merge($diagnosis, $this->_diagnose($this->hookPath, 'Hook', 'Hook.inc.php')); + + return $diagnosis; + } + + /** + * This is a refactored diagnose function. + * + * @param string $path + * @param string $class + * @param string $extension + * @return array + */ + private function _diagnose($path, $baseclass, $extension) + { + global $default; + + $diagnoses = array(); + $dir = opendir($path); + $extlen = - strlen($extension); + while (($file = readdir($dir)) !== false) + { + if (substr($file,$extlen) != $extension) + { + $default->log->error("diagnose: '$file' does not have extension '$extension'."); + continue; + } + + require_once($path . '/' . $file); + + $class = substr($file, 0, -8); + if (!class_exists($class)) + { + $default->log->error("diagnose: class '$class' does not exist."); + continue; + } + + $extractor = new $class(); + if (!is_a($extractor, $baseclass)) + { + $default->log->error("diagnose(): '$class' is not of type DocumentExtractor"); + continue; + } + + $types = $extractor->getSupportedMimeTypes(); + if (empty($types)) + { + if ($this->debug) $default->log->debug("diagnose: class '$class' does not support any types."); + continue; + } + + $diagnosis=$extractor->diagnose(); + if (empty($diagnosis)) + { + continue; + } + $diagnoses[$class] = array( + 'name'=>$extractor->getDisplayName(), + 'diagnosis'=>$diagnosis + ); + + } + closedir($dir); + + return $diagnoses; + } + + + /** + * Register the extractor types. + * + * @param boolean $clear. Optional. Defaults to false. + */ + public function registerTypes($clear=false) + { + if ($clear) + { + $this->clearExtractors(); + } + $dir = opendir($this->extractorPath); + while (($file = readdir($dir)) !== false) + { + if (substr($file,-17) == 'Extractor.inc.php') + { + require_once($this->extractorPath . '/' . $file); + $class = substr($file, 0, -8); + + if (class_exists($class)) + { + continue; + } + + $extractor = new $class; + if (!($class instanceof DocumentExtractor)) + { + continue; + } + + $extractor->registerMimeTypes(); + } + } + closedir($dir); + } + + /** + * This is used as a possible obtimisation effort. It may be overridden in that case. + * + * @param int $docId + * @param string $textFile + */ + protected function indexDocumentAndDiscussion($docId, $textFile, $title='') + { + $this->indexDocument($docId, $textFile, $title); + $this->indexDiscussion($docId); + } + + /** + * Remove the document from the queue. This is normally called when it has been processed. + * + * @param int $docid + */ + public static function unqueueDocument($docid) + { + $sql = "DELETE FROM index_files WHERE document_id=$docid"; + DBUtil::runQuery($sql); + } + + /** + * Run a query on the index. + * + * @param string $query + * @return array + */ + public abstract function query($query); + + /** + * Converts an integer to a string that can be easily compared and reversed. + * + * @param int $int + * @return string + */ + public static function longToString($int) + { + $maxlen = 14; + + $a2z = array('a','b','c','d','e','f','g','h','i','j'); + $o29 = array('0','1','2','3','4','5','6','7','8','9'); + $l = str_pad('',$maxlen - strlen("$int"),'0') . $int; + + return str_replace($o29, $a2z, $l); + } + + /** + * Converts a string to an integer. + * + * @param string $str + * @return int + */ + public static function stringToLong($str) + { + $a2z = array('a','b','c','d','e','f','g','h','i','j'); + $o29 = array('0','1','2','3','4','5','6','7','8','9'); + + $int = str_replace($a2z, $o29, $str) + 0; + + return $int; + } + + /** + * Possibly we can optimise indexes. This method must be overriden. + * + */ + public function optimise() + { + // do nothing + } +} + +?> \ No newline at end of file diff --git a/search2/indexing/indexers/JavaXMLRPCLuceneIndexer.inc.php b/search2/indexing/indexers/JavaXMLRPCLuceneIndexer.inc.php new file mode 100755 index 0000000..4e68882 --- /dev/null +++ b/search2/indexing/indexers/JavaXMLRPCLuceneIndexer.inc.php @@ -0,0 +1,11 @@ + +require_once('indexer.inc.php'); + +class JavaXMLRPCLuceneIndexer extends Indexer +{ + protected function indexDocument($docid, $textfile) + { + throw new Exception('TODO'); + } +} +?> \ No newline at end of file diff --git a/search2/indexing/indexers/PHPLuceneIndexer.inc.php b/search2/indexing/indexers/PHPLuceneIndexer.inc.php new file mode 100755 index 0000000..8d1c17e --- /dev/null +++ b/search2/indexing/indexers/PHPLuceneIndexer.inc.php @@ -0,0 +1,183 @@ + + +require_once 'Zend/Search/Lucene.php'; + +class PHPLuceneIndexer extends Indexer +{ + /** + * @var Zend_Search_Lucene + */ + private $lucene; + + /** + * The constructor for PHP Lucene + * + * @param boolean $create Optional. If true, the lucene index will be recreated. + */ + public function __construct() + { + parent::__construct(); + $config =& KTConfig::getSingleton(); + $indexPath = $config->get('indexer/luceneDirectory'); + $this->lucene = new Zend_Search_Lucene($indexPath, false); + } + + /** + * Creates an index to be used. + * + */ + public static function createIndex() + { + $config =& KTConfig::getSingleton(); + $indexPath = $config->get('indexer/luceneDirectory'); + $lucene = new Zend_Search_Lucene($indexPath, true); + } + + + /** + * A refactored method to add the document to the index.. + * + * @param int $docid + * @param string $content + * @param string $discussion + */ + private function addDocument($docid, $content, $discussion, $title='') + { + $doc = new Zend_Search_Lucene_Document(); + $doc->addField(Zend_Search_Lucene_Field::Text('DocumentID', PHPLuceneIndexer::longToString($docid))); + $doc->addField(Zend_Search_Lucene_Field::Text('Content', $content, 'UTF-8')); + $doc->addField(Zend_Search_Lucene_Field::Text('Discussion', $discussion, 'UTF-8')); + $doc->addField(Zend_Search_Lucene_Field::Text('Title', $title, 'UTF-8')); + $this->lucene->addDocument($doc); + } + + /** + * Indexes a document based on a text file. + * + * @param int $docid + * @param string $textfile + * @return boolean + */ + protected function indexDocument($docid, $textfile, $title='') + { + global $default; + + if (!is_file($textfile)) + { + $default->log->error("Attempting to index $docid $textfile but it is not available."); + return false; + } + + list($content, $discussion) = $this->deleteDocument($docid); + + $this->addDocument($docid, file_get_contents($textfile), $discussion, $title); + + return true; + } + + /** + * Indexes the content and discussions on a document. + * + * @param int $docid + * @param string $textfile + * @return boolean + */ + protected function indexDocumentAndDiscussion($docid, $textfile, $title='') + { + global $default; + + if (!is_file($textfile)) + { + $default->log->error("Attempting to index $docid $textfile but it is not available."); + return false; + } + + $this->deleteDocument($docid); + + $this->addDocument($docid, file_get_contents($textfile), Indexer::getDiscussionText($docid), $title); + + return true; + } + + /** + * Indexes a discussion on a document.. + * + * @param int $docid + * @return boolean + */ + protected function indexDiscussion($docid) + { + list($content, $discussion, $title) = $this->deleteDocument($docid); + + $this->addDocument($docid, $content, Indexer::getDiscussionText($docid), $title); + + return true; + } + + /** + * Optimise the lucene index. + * This can be called periodically to optimise performance and size of the lucene index. + * + */ + public function optimise() + { + $this->lucene->optimize(); + } + + /** + * Removes a document from the index. + * + * @param int $docid + * @return array containing (content, discussion, title) + */ + public function deleteDocument($docid) + { + $content = ''; + $discussion = ''; + $query = Zend_Search_Lucene_Search_QueryParser::parse('DocumentID:' . PHPLuceneIndexer::longToString($docid)); + $hits = $this->lucene->find($query); + // there should only be one, but we'll loop for safety + foreach ($hits as $hit) + { + $content = $hit->Content; + $discussion = $hit->Discussion; + $title = $hit->Title; + $title=''; + + $this->lucene->delete($hit); + } + return array($content, $discussion, $title); + } + + /** + * Enter description here... + * + * @param string $query + * @return array + */ + public function query($query) + { + $results = array(); + $query = Zend_Search_Lucene_Search_QueryParser::parse($query); + + $hits = $this->lucene->find($query); + foreach ($hits as $hit) + { + $document = $hit->getDocument(); + + $document_id = PHPLuceneIndexer::stringToLong($document->DocumentID); + $content = $document->Content ; + $discussion = $document->Discussion ; + $title = $document->Title; + $score = $hit->score; + + // avoid adding duplicates. If it is in already, it has higher priority. + if (!array_key_exists($document_id, $results) || $score > $results[$document_id]->Score) + { + $results[$document_id] = new QueryResultItem($document_id, $score, $title, $content, $discussion); + } + } + return $results; + } +} +?> \ No newline at end of file diff --git a/search2/search/SearchCommandLexer.php b/search2/search/SearchCommandLexer.php new file mode 100755 index 0000000..1ba5fab --- /dev/null +++ b/search2/search/SearchCommandLexer.php @@ -0,0 +1,268 @@ +offset=0; + $this->data=$data; + $this->token=null; + $this->value=''; + $this->length=strlen($data); + $this->state = 0; + $this->escaped=false; + $this->exit=false; + $this->lookahead=null; + $this->char=null; + } + + private function processNormalChar() + { + $append=true; + $clear=false; + $checkwords=false; + $word=''; + + if (in_array($this->char, array('=','(',')','[',']',',','!','<','>','"')) && !empty($this->value)) + { + $word=$this->value; + $checkwords=true; + $this->offset--; + $append=false; + $clear=false; + } + else + switch ($this->char) + { + case ' ': + case "\t": + case "\r": + case "\n": + if (!empty($this->value)) + { + $word=$this->value; + $checkwords=true; + } + $append=false; + $clear=true; + break; + case '=': + $this->token=SearchCommandParser::IS; + break; + case '(': + $this->token=SearchCommandParser::PAR_OPEN; + break; + case ')': + $this->token=SearchCommandParser::PAR_CLOSE; + break; + case ',': + $this->token=SearchCommandParser::COMMA; + break; + case ':': + $this->token=SearchCommandParser::COLON; + break; + case '[': + $this->token=SearchCommandParser::SQUARE_OPEN; + break; + case ']': + $this->token=SearchCommandParser::SQUARE_CLOSE; + break; + case '!': + if ($this->lookahead == '=') + { + $this->zap(); + $this->token=SearchCommandParser::IS_NOT; + } + else + { + throw new Exception('Unexpected token: ' . $this->lookahead); + } + break; + case '<': + case '>': + if ($this->lookahead == '>') + { + $this->zap(); + $this->token=SearchCommandParser::IS_NOT; + } + elseif ($this->lookahead == '=') + { + $this->zap(); + $this->token=($this->char == '<')?(SearchCommandParser::LE):(SearchCommandParser::GE); + } + else + { + $this->token=($this->char == '<')?(SearchCommandParser::LT):(SearchCommandParser::GT); + } + break; + case '"': + $clear=true; + $this->state=1; + break; + + } + if ($clear) + { + $this->char=''; + $this->value=''; + $this->token=null; + } + if ($append) + { + $this->value .= $this->char; + } + if (!is_null($this->token)) + { + $this->exit=true; + } + if ($checkwords) + { + $this->exit=true; + $this->value = $word; + switch (strtolower($word)) + { + case 'not': + $this->token = SearchCommandParser::NOT; + break; + case 'with': + $this->token = SearchCommandParser::WITH; + break; + case 'like': + $this->token = SearchCommandParser::LIKE; + break; + case 'contains': + case 'contain': + $this->token = SearchCommandParser::CONTAINS ; + break; + case 'starts': + case 'start': + $this->token = SearchCommandParser::START ; + break; + case 'ends': + case 'end': + $this->token = SearchCommandParser::END ; + break; + case 'does': + $this->token = SearchCommandParser::DOES ; + break; + case 'is': + $this->token = SearchCommandParser::IS ; + break; + case 'between': + $this->token = SearchCommandParser::BETWEEN ; + break; + case 'or': + $this->token = SearchCommandParser::OPOR ; + break; + case 'and': + $this->token = SearchCommandParser::OPAND ; + break; + + default: + + $this->token = SearchCommandParser::TERMINAL; + break; + + } + } + + } + + private function processStringChar() + { + if ($this->escaped) + { + switch($this->char) + { + case 'r': + $this->value .= "\r"; + break; + case 'n': + $this->value .= "\n"; + break; + case 't': + $this->value .= "\t"; + break; + default: + $this->value .= $this->char; + } + $this->escaped=false; + } + else + { + switch($this->char) + { + case '\\': + $this->escaped=true; + break; + case '"': + $this->escaped=false; + $this->state=0; + $this->exit=true; + $this->token = SearchCommandParser::VALUE; + break; + default: + $this->value .= $this->char; + } + } + } + + private function zap() + { + $this->char = substr($this->data,$this->offset++,1); + if ($this->offset <= $this->length) + { + $this->lookahead= substr($this->data,$this->offset,1); + } + else + { + $this->lookahead=null; + } + } + + public function yylex() + { + $this->exit=false; + $this->token=null; + $this->value=''; + while (!$this->exit) + { + if ($this->length <= $this->offset) + { + return false; + } + + $this->zap(); + switch($this->state) + { + case 0: // initial + $this->processNormalChar(); + break; + case 1: // instring + $this->processStringChar(); + break; + } + + if (is_null($this->lookahead) || !is_null($this->token)) + { + $this->exit=true; + } + } + return true; + } +} + +?> \ No newline at end of file diff --git a/search2/search/SearchCommandParser.php b/search2/search/SearchCommandParser.php new file mode 100755 index 0000000..55eb4b2 --- /dev/null +++ b/search2/search/SearchCommandParser.php @@ -0,0 +1,1321 @@ +string = $s->string; + $this->metadata = $s->metadata; + } else { + $this->string = (string) $s; + if ($m instanceof SearchCommandParseryyToken) { + $this->metadata = $m->metadata; + } elseif (is_array($m)) { + $this->metadata = $m; + } + } + } + + function __toString() + { + return $this->_string; + } + + function offsetExists($offset) + { + return isset($this->metadata[$offset]); + } + + function offsetGet($offset) + { + return $this->metadata[$offset]; + } + + function offsetSet($offset, $value) + { + if ($offset === null) { + if (isset($value[0])) { + $x = ($value instanceof SearchCommandParseryyToken) ? + $value->metadata : $value; + $this->metadata = array_merge($this->metadata, $x); + return; + } + $offset = count($this->metadata); + } + if ($value === null) { + return; + } + if ($value instanceof SearchCommandParseryyToken) { + if ($value->metadata) { + $this->metadata[$offset] = $value->metadata; + } + } elseif ($value) { + $this->metadata[$offset] = $value; + } + } + + function offsetUnset($offset) + { + unset($this->metadata[$offset]); + } +} + +/** The following structure represents a single element of the + * parser's stack. Information stored includes: + * + * + The state number for the parser at this level of the stack. + * + * + The value of the token stored at this level of the stack. + * (In other words, the "major" token.) + * + * + The semantic value stored at this level of the stack. This is + * the information used by the action routines in the grammar. + * It is sometimes called the "minor" token. + */ +class SearchCommandParseryyStackEntry +{ + public $stateno; /* The state-number */ + public $major; /* The major token value. This is the code + ** number for the token at this stack level */ + public $minor; /* The user-supplied minor token value. This + ** is the value of the token */ +}; + +// code external to the class is included here + +// declare_class is output here +#line 2 "SearchCommandParser.y" +class SearchCommandParser#line 102 "SearchCommandParser.php" +{ +/* First off, code is included which follows the "include_class" declaration +** in the input file. */ +#line 4 "SearchCommandParser.y" + + + private $expr_result; + private $parse_result; + + public function __construct() + { + $this->parse_result = 'ok'; + } + + public function getExprResult() + { + return $this->expr_result; + } + + public function isExprOk() + { + return $this->parse_result == 'ok'; + } + +#line 128 "SearchCommandParser.php" + +/* Next is all token values, as class constants +*/ +/* +** These constants (all generated automatically by the parser generator) +** specify the various kinds of tokens (terminals) that the parser +** understands. +** +** Each symbol here is a terminal symbol in the grammar. +*/ + const OPOR = 1; + const OPAND = 2; + const NOT = 3; + const IS = 4; + const CONTAIN = 5; + const LIKE = 6; + const BETWEEN = 7; + const START = 8; + const END = 9; + const GT = 10; + const LE = 11; + const LT = 12; + const GE = 13; + const PAR_OPEN = 14; + const PAR_CLOSE = 15; + const DOES = 16; + const COLON = 17; + const SQUARE_OPEN = 18; + const SQUARE_CLOSE = 19; + const TERMINAL = 20; + const VALUE = 21; + const COMMA = 22; + const CONTAINS = 23; + const WITH = 24; + const IS_NOT = 25; + const YY_NO_ACTION = 84; + const YY_ACCEPT_ACTION = 83; + const YY_ERROR_ACTION = 82; + +/* Next are that tables used to determine what action to take based on the +** current state and lookahead token. These tables are used to implement +** functions that take a state number and lookahead value and return an +** action integer. +** +** Suppose the action integer is N. Then the action is determined as +** follows +** +** 0 <= N < self::YYNSTATE Shift N. That is, +** push the lookahead +** token onto the stack +** and goto state N. +** +** self::YYNSTATE <= N < self::YYNSTATE+self::YYNRULE Reduce by rule N-YYNSTATE. +** +** N == self::YYNSTATE+self::YYNRULE A syntax error has occurred. +** +** N == self::YYNSTATE+self::YYNRULE+1 The parser accepts its +** input. (and concludes parsing) +** +** N == self::YYNSTATE+self::YYNRULE+2 No such action. Denotes unused +** slots in the yy_action[] table. +** +** The action table is constructed as a single large static array $yy_action. +** Given state S and lookahead X, the action is computed as +** +** self::$yy_action[self::$yy_shift_ofst[S] + X ] +** +** If the index value self::$yy_shift_ofst[S]+X is out of range or if the value +** self::$yy_lookahead[self::$yy_shift_ofst[S]+X] is not equal to X or if +** self::$yy_shift_ofst[S] is equal to self::YY_SHIFT_USE_DFLT, it means that +** the action is not in the table and that self::$yy_default[S] should be used instead. +** +** The formula above is for computing the action when the lookahead is +** a terminal symbol. If the lookahead is a non-terminal (as occurs after +** a reduce action) then the static $yy_reduce_ofst array is used in place of +** the static $yy_shift_ofst array and self::YY_REDUCE_USE_DFLT is used in place of +** self::YY_SHIFT_USE_DFLT. +** +** The following are the tables generated in this section: +** +** self::$yy_action A single table containing all actions. +** self::$yy_lookahead A table containing the lookahead for each entry in +** yy_action. Used to detect hash collisions. +** self::$yy_shift_ofst For each state, the offset into self::$yy_action for +** shifting terminals. +** self::$yy_reduce_ofst For each state, the offset into self::$yy_action for +** shifting non-terminals after a reduce. +** self::$yy_default Default action for each state. +*/ + const YY_SZ_ACTTAB = 70; +static public $yy_action = array( + /* 0 */ 52, 15, 8, 7, 4, 23, 22, 37, 34, 54, + /* 10 */ 33, 3, 5, 16, 9, 2, 21, 83, 1, 13, + /* 20 */ 50, 32, 36, 3, 5, 44, 17, 26, 47, 1, + /* 30 */ 19, 39, 1, 41, 14, 46, 20, 1, 45, 38, + /* 40 */ 1, 6, 35, 10, 42, 27, 31, 12, 5, 24, + /* 50 */ 18, 53, 28, 52, 63, 63, 63, 30, 63, 63, + /* 60 */ 63, 49, 48, 29, 40, 43, 51, 63, 11, 25, + ); + static public $yy_lookahead = array( + /* 0 */ 3, 4, 6, 7, 3, 8, 9, 10, 11, 12, + /* 10 */ 13, 1, 2, 16, 17, 14, 27, 28, 29, 18, + /* 20 */ 23, 20, 25, 1, 2, 15, 14, 27, 33, 29, + /* 30 */ 27, 24, 29, 21, 30, 27, 32, 29, 27, 24, + /* 40 */ 29, 2, 19, 18, 15, 21, 19, 5, 2, 33, + /* 50 */ 22, 31, 31, 3, 34, 34, 34, 31, 34, 34, + /* 60 */ 34, 31, 31, 31, 31, 31, 31, 34, 32, 32, +); + const YY_SHIFT_USE_DFLT = -5; + const YY_SHIFT_MAX = 31; + static public $yy_shift_ofst = array( + /* 0 */ 1, -3, 1, 1, 1, 1, 12, 12, 12, 12, + /* 10 */ 12, 12, 12, 12, 12, 50, 50, 24, 24, 10, + /* 20 */ -4, 22, 15, 7, 29, 42, 46, 28, 27, 39, + /* 30 */ 23, 25, +); + const YY_REDUCE_USE_DFLT = -12; + const YY_REDUCE_MAX = 18; + static public $yy_reduce_ofst = array( + /* 0 */ -11, 4, 3, 0, 8, 11, 31, 32, 33, 30, + /* 10 */ 26, 20, 35, 21, 34, 36, 37, 16, -5, +); + static public $yyExpectedTokens = array( + /* 0 */ array(3, 14, 18, 20, ), + /* 1 */ array(3, 4, 8, 9, 10, 11, 12, 13, 16, 17, 23, 25, ), + /* 2 */ array(3, 14, 18, 20, ), + /* 3 */ array(3, 14, 18, 20, ), + /* 4 */ array(3, 14, 18, 20, ), + /* 5 */ array(3, 14, 18, 20, ), + /* 6 */ array(14, 21, ), + /* 7 */ array(14, 21, ), + /* 8 */ array(14, 21, ), + /* 9 */ array(14, 21, ), + /* 10 */ array(14, 21, ), + /* 11 */ array(14, 21, ), + /* 12 */ array(14, 21, ), + /* 13 */ array(14, 21, ), + /* 14 */ array(14, 21, ), + /* 15 */ array(3, ), + /* 16 */ array(3, ), + /* 17 */ array(21, ), + /* 18 */ array(21, ), + /* 19 */ array(1, 2, 15, ), + /* 20 */ array(6, 7, ), + /* 21 */ array(1, 2, ), + /* 22 */ array(24, ), + /* 23 */ array(24, ), + /* 24 */ array(15, ), + /* 25 */ array(5, ), + /* 26 */ array(2, ), + /* 27 */ array(22, ), + /* 28 */ array(19, ), + /* 29 */ array(2, ), + /* 30 */ array(19, ), + /* 31 */ array(18, ), + /* 32 */ array(), + /* 33 */ array(), + /* 34 */ array(), + /* 35 */ array(), + /* 36 */ array(), + /* 37 */ array(), + /* 38 */ array(), + /* 39 */ array(), + /* 40 */ array(), + /* 41 */ array(), + /* 42 */ array(), + /* 43 */ array(), + /* 44 */ array(), + /* 45 */ array(), + /* 46 */ array(), + /* 47 */ array(), + /* 48 */ array(), + /* 49 */ array(), + /* 50 */ array(), + /* 51 */ array(), + /* 52 */ array(), + /* 53 */ array(), + /* 54 */ array(), +); + static public $yy_default = array( + /* 0 */ 82, 66, 82, 82, 82, 82, 82, 82, 82, 82, + /* 10 */ 82, 82, 82, 82, 82, 66, 66, 82, 82, 82, + /* 20 */ 82, 55, 82, 82, 82, 82, 57, 73, 82, 82, + /* 30 */ 82, 82, 69, 78, 77, 68, 81, 76, 80, 79, + /* 40 */ 62, 70, 71, 60, 59, 56, 58, 72, 61, 65, + /* 50 */ 74, 64, 67, 63, 75, +); +/* The next thing included is series of defines which control +** various aspects of the generated parser. +** self::YYNOCODE is a number which corresponds +** to no legal terminal or nonterminal number. This +** number is used to fill in empty slots of the hash +** table. +** self::YYFALLBACK If defined, this indicates that one or more tokens +** have fall-back values which should be used if the +** original value of the token will not parse. +** self::YYSTACKDEPTH is the maximum depth of the parser's stack. +** self::YYNSTATE the combined number of states. +** self::YYNRULE the number of rules in the grammar +** self::YYERRORSYMBOL is the code number of the error symbol. If not +** defined, then do no error processing. +*/ + const YYNOCODE = 35; + const YYSTACKDEPTH = 100; + const YYNSTATE = 55; + const YYNRULE = 27; + const YYERRORSYMBOL = 26; + const YYERRSYMDT = 'yy0'; + const YYFALLBACK = 0; + /** The next table maps tokens into fallback tokens. If a construct + * like the following: + * + * %fallback ID X Y Z. + * + * appears in the grammer, then ID becomes a fallback token for X, Y, + * and Z. Whenever one of the tokens X, Y, or Z is input to the parser + * but it does not parse, the type of the token is changed to ID and + * the parse is retried before an error is thrown. + */ + static public $yyFallback = array( + ); + /** + * Turn parser tracing on by giving a stream to which to write the trace + * and a prompt to preface each trace message. Tracing is turned off + * by making either argument NULL + * + * Inputs: + * + * - A stream resource to which trace output should be written. + * If NULL, then tracing is turned off. + * - A prefix string written at the beginning of every + * line of trace output. If NULL, then tracing is + * turned off. + * + * Outputs: + * + * - None. + * @param resource + * @param string + */ + static function Trace($TraceFILE, $zTracePrompt) + { + if (!$TraceFILE) { + $zTracePrompt = 0; + } elseif (!$zTracePrompt) { + $TraceFILE = 0; + } + self::$yyTraceFILE = $TraceFILE; + self::$yyTracePrompt = $zTracePrompt; + } + + /** + * Output debug information to output (php://output stream) + */ + static function PrintTrace() + { + self::$yyTraceFILE = fopen('php://output', 'w'); + self::$yyTracePrompt = ''; + } + + /** + * @var resource|0 + */ + static public $yyTraceFILE; + /** + * String to prepend to debug output + * @var string|0 + */ + static public $yyTracePrompt; + /** + * @var int + */ + public $yyidx; /* Index of top element in stack */ + /** + * @var int + */ + public $yyerrcnt; /* Shifts left before out of the error */ + /** + * @var array + */ + public $yystack = array(); /* The parser's stack */ + + /** + * For tracing shifts, the names of all terminals and nonterminals + * are required. The following table supplies these names + * @var array + */ + static public $yyTokenName = array( + '$', 'OPOR', 'OPAND', 'NOT', + 'IS', 'CONTAIN', 'LIKE', 'BETWEEN', + 'START', 'END', 'GT', 'LE', + 'LT', 'GE', 'PAR_OPEN', 'PAR_CLOSE', + 'DOES', 'COLON', 'SQUARE_OPEN', 'SQUARE_CLOSE', + 'TERMINAL', 'VALUE', 'COMMA', 'CONTAINS', + 'WITH', 'IS_NOT', 'error', 'expr', + 'cmdline', 'terminal', 'operator', 'value', + 'notop', 'valuelist', + ); + + /** + * For tracing reduce actions, the names of all rules are required. + * @var array + */ + static public $yyRuleName = array( + /* 0 */ "cmdline ::= expr", + /* 1 */ "expr ::= expr OPAND expr", + /* 2 */ "expr ::= expr OPOR expr", + /* 3 */ "expr ::= NOT expr", + /* 4 */ "expr ::= PAR_OPEN expr PAR_CLOSE", + /* 5 */ "expr ::= terminal operator value", + /* 6 */ "expr ::= terminal notop BETWEEN value OPAND value", + /* 7 */ "expr ::= terminal notop LIKE value", + /* 8 */ "expr ::= terminal IS notop value", + /* 9 */ "expr ::= terminal DOES notop CONTAIN value", + /* 10 */ "expr ::= terminal COLON value", + /* 11 */ "notop ::=", + /* 12 */ "notop ::= NOT", + /* 13 */ "terminal ::= SQUARE_OPEN value SQUARE_CLOSE SQUARE_OPEN value SQUARE_CLOSE", + /* 14 */ "terminal ::= TERMINAL", + /* 15 */ "value ::= VALUE", + /* 16 */ "value ::= PAR_OPEN valuelist PAR_CLOSE", + /* 17 */ "valuelist ::= VALUE COMMA valuelist", + /* 18 */ "valuelist ::= VALUE", + /* 19 */ "operator ::= CONTAINS", + /* 20 */ "operator ::= LT", + /* 21 */ "operator ::= GT", + /* 22 */ "operator ::= LE", + /* 23 */ "operator ::= GE", + /* 24 */ "operator ::= START WITH", + /* 25 */ "operator ::= END WITH", + /* 26 */ "operator ::= IS_NOT", + ); + + /** + * This function returns the symbolic name associated with a token + * value. + * @param int + * @return string + */ + function tokenName($tokenType) + { + if ($tokenType === 0) { + return 'End of Input'; + } + if ($tokenType > 0 && $tokenType < count(self::$yyTokenName)) { + return self::$yyTokenName[$tokenType]; + } else { + return "Unknown"; + } + } + + /** + * The following function deletes the value associated with a + * symbol. The symbol can be either a terminal or nonterminal. + * @param int the symbol code + * @param mixed the symbol's value + */ + static function yy_destructor($yymajor, $yypminor) + { + switch ($yymajor) { + /* Here is inserted the actions which take place when a + ** terminal or non-terminal is destroyed. This can happen + ** when the symbol is popped from the stack during a + ** reduce or during error processing or when a parser is + ** being destroyed before it is finished parsing. + ** + ** Note: during a reduce, the only symbols destroyed are those + ** which appear on the RHS of the rule, but which are not used + ** inside the C code. + */ + default: break; /* If no destructor action specified: do nothing */ + } + } + + /** + * Pop the parser's stack once. + * + * If there is a destructor routine associated with the token which + * is popped from the stack, then call it. + * + * Return the major token number for the symbol popped. + * @param SearchCommandParseryyParser + * @return int + */ + function yy_pop_parser_stack() + { + if (!count($this->yystack)) { + return; + } + $yytos = array_pop($this->yystack); + if (self::$yyTraceFILE && $this->yyidx >= 0) { + fwrite(self::$yyTraceFILE, + self::$yyTracePrompt . 'Popping ' . self::$yyTokenName[$yytos->major] . + "\n"); + } + $yymajor = $yytos->major; + self::yy_destructor($yymajor, $yytos->minor); + $this->yyidx--; + return $yymajor; + } + + /** + * Deallocate and destroy a parser. Destructors are all called for + * all stack elements before shutting the parser down. + */ + function __destruct() + { + while ($this->yyidx >= 0) { + $this->yy_pop_parser_stack(); + } + if (is_resource(self::$yyTraceFILE)) { + fclose(self::$yyTraceFILE); + } + } + + /** + * Based on the current state and parser stack, get a list of all + * possible lookahead tokens + * @param int + * @return array + */ + function yy_get_expected_tokens($token) + { + $state = $this->yystack[$this->yyidx]->stateno; + $expected = self::$yyExpectedTokens[$state]; + if (in_array($token, self::$yyExpectedTokens[$state], true)) { + return $expected; + } + $stack = $this->yystack; + $yyidx = $this->yyidx; + do { + $yyact = $this->yy_find_shift_action($token); + if ($yyact >= self::YYNSTATE && $yyact < self::YYNSTATE + self::YYNRULE) { + // reduce action + $done = 0; + do { + if ($done++ == 100) { + $this->yyidx = $yyidx; + $this->yystack = $stack; + // too much recursion prevents proper detection + // so give up + return array_unique($expected); + } + $yyruleno = $yyact - self::YYNSTATE; + $this->yyidx -= self::$yyRuleInfo[$yyruleno]['rhs']; + $nextstate = $this->yy_find_reduce_action( + $this->yystack[$this->yyidx]->stateno, + self::$yyRuleInfo[$yyruleno]['lhs']); + if (isset(self::$yyExpectedTokens[$nextstate])) { + $expected += self::$yyExpectedTokens[$nextstate]; + if (in_array($token, + self::$yyExpectedTokens[$nextstate], true)) { + $this->yyidx = $yyidx; + $this->yystack = $stack; + return array_unique($expected); + } + } + if ($nextstate < self::YYNSTATE) { + // we need to shift a non-terminal + $this->yyidx++; + $x = new SearchCommandParseryyStackEntry; + $x->stateno = $nextstate; + $x->major = self::$yyRuleInfo[$yyruleno]['lhs']; + $this->yystack[$this->yyidx] = $x; + continue 2; + } elseif ($nextstate == self::YYNSTATE + self::YYNRULE + 1) { + $this->yyidx = $yyidx; + $this->yystack = $stack; + // the last token was just ignored, we can't accept + // by ignoring input, this is in essence ignoring a + // syntax error! + return array_unique($expected); + } elseif ($nextstate === self::YY_NO_ACTION) { + $this->yyidx = $yyidx; + $this->yystack = $stack; + // input accepted, but not shifted (I guess) + return $expected; + } else { + $yyact = $nextstate; + } + } while (true); + } + break; + } while (true); + return array_unique($expected); + } + + /** + * Based on the parser state and current parser stack, determine whether + * the lookahead token is possible. + * + * The parser will convert the token value to an error token if not. This + * catches some unusual edge cases where the parser would fail. + * @param int + * @return bool + */ + function yy_is_expected_token($token) + { + if ($token === 0) { + return true; // 0 is not part of this + } + $state = $this->yystack[$this->yyidx]->stateno; + if (in_array($token, self::$yyExpectedTokens[$state], true)) { + return true; + } + $stack = $this->yystack; + $yyidx = $this->yyidx; + do { + $yyact = $this->yy_find_shift_action($token); + if ($yyact >= self::YYNSTATE && $yyact < self::YYNSTATE + self::YYNRULE) { + // reduce action + $done = 0; + do { + if ($done++ == 100) { + $this->yyidx = $yyidx; + $this->yystack = $stack; + // too much recursion prevents proper detection + // so give up + return true; + } + $yyruleno = $yyact - self::YYNSTATE; + $this->yyidx -= self::$yyRuleInfo[$yyruleno]['rhs']; + $nextstate = $this->yy_find_reduce_action( + $this->yystack[$this->yyidx]->stateno, + self::$yyRuleInfo[$yyruleno]['lhs']); + if (isset(self::$yyExpectedTokens[$nextstate]) && + in_array($token, self::$yyExpectedTokens[$nextstate], true)) { + $this->yyidx = $yyidx; + $this->yystack = $stack; + return true; + } + if ($nextstate < self::YYNSTATE) { + // we need to shift a non-terminal + $this->yyidx++; + $x = new SearchCommandParseryyStackEntry; + $x->stateno = $nextstate; + $x->major = self::$yyRuleInfo[$yyruleno]['lhs']; + $this->yystack[$this->yyidx] = $x; + continue 2; + } elseif ($nextstate == self::YYNSTATE + self::YYNRULE + 1) { + $this->yyidx = $yyidx; + $this->yystack = $stack; + if (!$token) { + // end of input: this is valid + return true; + } + // the last token was just ignored, we can't accept + // by ignoring input, this is in essence ignoring a + // syntax error! + return false; + } elseif ($nextstate === self::YY_NO_ACTION) { + $this->yyidx = $yyidx; + $this->yystack = $stack; + // input accepted, but not shifted (I guess) + return true; + } else { + $yyact = $nextstate; + } + } while (true); + } + break; + } while (true); + $this->yyidx = $yyidx; + $this->yystack = $stack; + return true; + } + + /** + * Find the appropriate action for a parser given the terminal + * look-ahead token iLookAhead. + * + * If the look-ahead token is YYNOCODE, then check to see if the action is + * independent of the look-ahead. If it is, return the action, otherwise + * return YY_NO_ACTION. + * @param int The look-ahead token + */ + function yy_find_shift_action($iLookAhead) + { + $stateno = $this->yystack[$this->yyidx]->stateno; + + /* if ($this->yyidx < 0) return self::YY_NO_ACTION; */ + if (!isset(self::$yy_shift_ofst[$stateno])) { + // no shift actions + return self::$yy_default[$stateno]; + } + $i = self::$yy_shift_ofst[$stateno]; + if ($i === self::YY_SHIFT_USE_DFLT) { + return self::$yy_default[$stateno]; + } + if ($iLookAhead == self::YYNOCODE) { + return self::YY_NO_ACTION; + } + $i += $iLookAhead; + if ($i < 0 || $i >= self::YY_SZ_ACTTAB || + self::$yy_lookahead[$i] != $iLookAhead) { + if (count(self::$yyFallback) && $iLookAhead < count(self::$yyFallback) + && ($iFallback = self::$yyFallback[$iLookAhead]) != 0) { + if (self::$yyTraceFILE) { + fwrite(self::$yyTraceFILE, self::$yyTracePrompt . "FALLBACK " . + self::$yyTokenName[$iLookAhead] . " => " . + self::$yyTokenName[$iFallback] . "\n"); + } + return $this->yy_find_shift_action($iFallback); + } + return self::$yy_default[$stateno]; + } else { + return self::$yy_action[$i]; + } + } + + /** + * Find the appropriate action for a parser given the non-terminal + * look-ahead token $iLookAhead. + * + * If the look-ahead token is self::YYNOCODE, then check to see if the action is + * independent of the look-ahead. If it is, return the action, otherwise + * return self::YY_NO_ACTION. + * @param int Current state number + * @param int The look-ahead token + */ + function yy_find_reduce_action($stateno, $iLookAhead) + { + /* $stateno = $this->yystack[$this->yyidx]->stateno; */ + + if (!isset(self::$yy_reduce_ofst[$stateno])) { + return self::$yy_default[$stateno]; + } + $i = self::$yy_reduce_ofst[$stateno]; + if ($i == self::YY_REDUCE_USE_DFLT) { + return self::$yy_default[$stateno]; + } + if ($iLookAhead == self::YYNOCODE) { + return self::YY_NO_ACTION; + } + $i += $iLookAhead; + if ($i < 0 || $i >= self::YY_SZ_ACTTAB || + self::$yy_lookahead[$i] != $iLookAhead) { + return self::$yy_default[$stateno]; + } else { + return self::$yy_action[$i]; + } + } + + /** + * Perform a shift action. + * @param int The new state to shift in + * @param int The major token to shift in + * @param mixed the minor token to shift in + */ + function yy_shift($yyNewState, $yyMajor, $yypMinor) + { + $this->yyidx++; + if ($this->yyidx >= self::YYSTACKDEPTH) { + $this->yyidx--; + if (self::$yyTraceFILE) { + fprintf(self::$yyTraceFILE, "%sStack Overflow!\n", self::$yyTracePrompt); + } + while ($this->yyidx >= 0) { + $this->yy_pop_parser_stack(); + } + /* Here code is inserted which will execute if the parser + ** stack ever overflows */ + return; + } + $yytos = new SearchCommandParseryyStackEntry; + $yytos->stateno = $yyNewState; + $yytos->major = $yyMajor; + $yytos->minor = $yypMinor; + array_push($this->yystack, $yytos); + if (self::$yyTraceFILE && $this->yyidx > 0) { + fprintf(self::$yyTraceFILE, "%sShift %d\n", self::$yyTracePrompt, + $yyNewState); + fprintf(self::$yyTraceFILE, "%sStack:", self::$yyTracePrompt); + for($i = 1; $i <= $this->yyidx; $i++) { + fprintf(self::$yyTraceFILE, " %s", + self::$yyTokenName[$this->yystack[$i]->major]); + } + fwrite(self::$yyTraceFILE,"\n"); + } + } + + /** + * The following table contains information about every rule that + * is used during the reduce. + * + *
+ * array( + * array( + * int $lhs; Symbol on the left-hand side of the rule + * int $nrhs; Number of right-hand side symbols in the rule + * ),... + * ); + *+ */ + static public $yyRuleInfo = array( + array( 'lhs' => 28, 'rhs' => 1 ), + array( 'lhs' => 27, 'rhs' => 3 ), + array( 'lhs' => 27, 'rhs' => 3 ), + array( 'lhs' => 27, 'rhs' => 2 ), + array( 'lhs' => 27, 'rhs' => 3 ), + array( 'lhs' => 27, 'rhs' => 3 ), + array( 'lhs' => 27, 'rhs' => 6 ), + array( 'lhs' => 27, 'rhs' => 4 ), + array( 'lhs' => 27, 'rhs' => 4 ), + array( 'lhs' => 27, 'rhs' => 5 ), + array( 'lhs' => 27, 'rhs' => 3 ), + array( 'lhs' => 32, 'rhs' => 0 ), + array( 'lhs' => 32, 'rhs' => 1 ), + array( 'lhs' => 29, 'rhs' => 6 ), + array( 'lhs' => 29, 'rhs' => 1 ), + array( 'lhs' => 31, 'rhs' => 1 ), + array( 'lhs' => 31, 'rhs' => 3 ), + array( 'lhs' => 33, 'rhs' => 3 ), + array( 'lhs' => 33, 'rhs' => 1 ), + array( 'lhs' => 30, 'rhs' => 1 ), + array( 'lhs' => 30, 'rhs' => 1 ), + array( 'lhs' => 30, 'rhs' => 1 ), + array( 'lhs' => 30, 'rhs' => 1 ), + array( 'lhs' => 30, 'rhs' => 1 ), + array( 'lhs' => 30, 'rhs' => 2 ), + array( 'lhs' => 30, 'rhs' => 2 ), + array( 'lhs' => 30, 'rhs' => 1 ), + ); + + /** + * The following table contains a mapping of reduce action to method name + * that handles the reduction. + * + * If a rule is not set, it has no handler. + */ + static public $yyReduceMap = array( + 0 => 0, + 1 => 1, + 2 => 2, + 3 => 3, + 4 => 4, + 16 => 4, + 5 => 5, + 6 => 6, + 7 => 7, + 8 => 8, + 9 => 9, + 10 => 10, + 11 => 11, + 12 => 12, + 13 => 13, + 14 => 14, + 15 => 15, + 17 => 17, + 18 => 18, + 19 => 19, + 20 => 20, + 21 => 21, + 22 => 22, + 23 => 23, + 24 => 24, + 25 => 25, + 26 => 26, + ); + /* Beginning here are the reduction cases. A typical example + ** follows: + ** #line
+ * rule(A) ::= B. { A = 1; }
+ *
+ *
+ * The parser will translate to something like:
+ *
+ *
+ * function yy_r0(){$this->_retvalue = 1;}
+ *
+ */
+ private $_retvalue;
+
+ /**
+ * Perform a reduce action and the shift that must immediately
+ * follow the reduce.
+ *
+ * For a rule such as:
+ *
+ *
+ * A ::= B blah C. { dosomething(); }
+ *
+ *
+ * This function will first call the action, if any, ("dosomething();" in our
+ * example), and then it will pop three states from the stack,
+ * one for each entry on the right-hand side of the expression
+ * (B, blah, and C in our example rule), and then push the result of the action
+ * back on to the stack with the resulting state reduced to (as described in the .out
+ * file)
+ * @param int Number of the rule by which to reduce
+ */
+ function yy_reduce($yyruleno)
+ {
+ //int $yygoto; /* The next state */
+ //int $yyact; /* The next action */
+ //mixed $yygotominor; /* The LHS of the rule reduced */
+ //SearchCommandParseryyStackEntry $yymsp; /* The top of the parser's stack */
+ //int $yysize; /* Amount to pop the stack */
+ $yymsp = $this->yystack[$this->yyidx];
+ if (self::$yyTraceFILE && $yyruleno >= 0
+ && $yyruleno < count(self::$yyRuleName)) {
+ fprintf(self::$yyTraceFILE, "%sReduce (%d) [%s].\n",
+ self::$yyTracePrompt, $yyruleno,
+ self::$yyRuleName[$yyruleno]);
+ }
+
+ $this->_retvalue = $yy_lefthand_side = null;
+ if (array_key_exists($yyruleno, self::$yyReduceMap)) {
+ // call the action
+ $this->_retvalue = null;
+ $this->{'yy_r' . self::$yyReduceMap[$yyruleno]}();
+ $yy_lefthand_side = $this->_retvalue;
+ }
+ $yygoto = self::$yyRuleInfo[$yyruleno]['lhs'];
+ $yysize = self::$yyRuleInfo[$yyruleno]['rhs'];
+ $this->yyidx -= $yysize;
+ for($i = $yysize; $i; $i--) {
+ // pop all of the right-hand side parameters
+ array_pop($this->yystack);
+ }
+ $yyact = $this->yy_find_reduce_action($this->yystack[$this->yyidx]->stateno, $yygoto);
+ if ($yyact < self::YYNSTATE) {
+ /* If we are not debugging and the reduce action popped at least
+ ** one element off the stack, then we can push the new element back
+ ** onto the stack here, and skip the stack overflow test in yy_shift().
+ ** That gives a significant speed improvement. */
+ if (!self::$yyTraceFILE && $yysize) {
+ $this->yyidx++;
+ $x = new SearchCommandParseryyStackEntry;
+ $x->stateno = $yyact;
+ $x->major = $yygoto;
+ $x->minor = $yy_lefthand_side;
+ $this->yystack[$this->yyidx] = $x;
+ } else {
+ $this->yy_shift($yyact, $yygoto, $yy_lefthand_side);
+ }
+ } elseif ($yyact == self::YYNSTATE + self::YYNRULE + 1) {
+ $this->yy_accept();
+ }
+ }
+
+ /**
+ * The following code executes when the parse fails
+ *
+ * Code from %parse_fail is inserted here
+ */
+ function yy_parse_failed()
+ {
+ if (self::$yyTraceFILE) {
+ fprintf(self::$yyTraceFILE, "%sFail!\n", self::$yyTracePrompt);
+ }
+ while ($this->yyidx >= 0) {
+ $this->yy_pop_parser_stack();
+ }
+ /* Here code is inserted which will be executed whenever the
+ ** parser fails */
+#line 46 "SearchCommandParser.y"
+
+ $this->parse_result = 'syntax';
+#line 1155 "SearchCommandParser.php"
+ }
+
+ /**
+ * The following code executes when a syntax error first occurs.
+ *
+ * %syntax_error code is inserted here
+ * @param int The major type of the error token
+ * @param mixed The minor type of the error token
+ */
+ function yy_syntax_error($yymajor, $TOKEN)
+ {
+#line 35 "SearchCommandParser.y"
+
+ $this->parse_result = 'syntax';
+ $this->parse_message = "";
+#line 1172 "SearchCommandParser.php"
+ }
+
+ /**
+ * The following is executed when the parser accepts
+ *
+ * %parse_accept code is inserted here
+ */
+ function yy_accept()
+ {
+ if (self::$yyTraceFILE) {
+ fprintf(self::$yyTraceFILE, "%sAccept!\n", self::$yyTracePrompt);
+ }
+ while ($this->yyidx >= 0) {
+ $stack = $this->yy_pop_parser_stack();
+ }
+ /* Here code is inserted which will be executed whenever the
+ ** parser accepts */
+#line 41 "SearchCommandParser.y"
+
+ $this->parse_result = 'ok';
+#line 1194 "SearchCommandParser.php"
+ }
+
+ /**
+ * The main parser program.
+ *
+ * The first argument is the major token number. The second is
+ * the token value string as scanned from the input.
+ *
+ * @param int the token number
+ * @param mixed the token value
+ * @param mixed any extra arguments that should be passed to handlers
+ */
+ function doParse($yymajor, $yytokenvalue)
+ {
+// $yyact; /* The parser action. */
+// $yyendofinput; /* True if we are at the end of input */
+ $yyerrorhit = 0; /* True if yymajor has invoked an error */
+
+ /* (re)initialize the parser, if necessary */
+ if ($this->yyidx === null || $this->yyidx < 0) {
+ /* if ($yymajor == 0) return; // not sure why this was here... */
+ $this->yyidx = 0;
+ $this->yyerrcnt = -1;
+ $x = new SearchCommandParseryyStackEntry;
+ $x->stateno = 0;
+ $x->major = 0;
+ $this->yystack = array();
+ array_push($this->yystack, $x);
+ }
+ $yyendofinput = ($yymajor==0);
+
+ if (self::$yyTraceFILE) {
+ fprintf(self::$yyTraceFILE, "%sInput %s\n",
+ self::$yyTracePrompt, self::$yyTokenName[$yymajor]);
+ }
+
+ do {
+ $yyact = $this->yy_find_shift_action($yymajor);
+ if ($yymajor < self::YYERRORSYMBOL &&
+ !$this->yy_is_expected_token($yymajor)) {
+ // force a syntax error
+ $yyact = self::YY_ERROR_ACTION;
+ }
+ if ($yyact < self::YYNSTATE) {
+ $this->yy_shift($yyact, $yymajor, $yytokenvalue);
+ $this->yyerrcnt--;
+ if ($yyendofinput && $this->yyidx >= 0) {
+ $yymajor = 0;
+ } else {
+ $yymajor = self::YYNOCODE;
+ }
+ } elseif ($yyact < self::YYNSTATE + self::YYNRULE) {
+ $this->yy_reduce($yyact - self::YYNSTATE);
+ } elseif ($yyact == self::YY_ERROR_ACTION) {
+ if (self::$yyTraceFILE) {
+ fprintf(self::$yyTraceFILE, "%sSyntax Error!\n",
+ self::$yyTracePrompt);
+ }
+ if (self::YYERRORSYMBOL) {
+ /* A syntax error has occurred.
+ ** The response to an error depends upon whether or not the
+ ** grammar defines an error token "ERROR".
+ **
+ ** This is what we do if the grammar does define ERROR:
+ **
+ ** * Call the %syntax_error function.
+ **
+ ** * Begin popping the stack until we enter a state where
+ ** it is legal to shift the error symbol, then shift
+ ** the error symbol.
+ **
+ ** * Set the error count to three.
+ **
+ ** * Begin accepting and shifting new tokens. No new error
+ ** processing will occur until three tokens have been
+ ** shifted successfully.
+ **
+ */
+ if ($this->yyerrcnt < 0) {
+ $this->yy_syntax_error($yymajor, $yytokenvalue);
+ }
+ $yymx = $this->yystack[$this->yyidx]->major;
+ if ($yymx == self::YYERRORSYMBOL || $yyerrorhit ){
+ if (self::$yyTraceFILE) {
+ fprintf(self::$yyTraceFILE, "%sDiscard input token %s\n",
+ self::$yyTracePrompt, self::$yyTokenName[$yymajor]);
+ }
+ $this->yy_destructor($yymajor, $yytokenvalue);
+ $yymajor = self::YYNOCODE;
+ } else {
+ while ($this->yyidx >= 0 &&
+ $yymx != self::YYERRORSYMBOL &&
+ ($yyact = $this->yy_find_shift_action(self::YYERRORSYMBOL)) >= self::YYNSTATE
+ ){
+ $this->yy_pop_parser_stack();
+ }
+ if ($this->yyidx < 0 || $yymajor==0) {
+ $this->yy_destructor($yymajor, $yytokenvalue);
+ $this->yy_parse_failed();
+ $yymajor = self::YYNOCODE;
+ } elseif ($yymx != self::YYERRORSYMBOL) {
+ $u2 = 0;
+ $this->yy_shift($yyact, self::YYERRORSYMBOL, $u2);
+ }
+ }
+ $this->yyerrcnt = 3;
+ $yyerrorhit = 1;
+ } else {
+ /* YYERRORSYMBOL is not defined */
+ /* This is what we do if the grammar does not define ERROR:
+ **
+ ** * Report an error message, and throw away the input token.
+ **
+ ** * If the input token is $, then fail the parse.
+ **
+ ** As before, subsequent error messages are suppressed until
+ ** three input tokens have been successfully shifted.
+ */
+ if ($this->yyerrcnt <= 0) {
+ $this->yy_syntax_error($yymajor, $yytokenvalue);
+ }
+ $this->yyerrcnt = 3;
+ $this->yy_destructor($yymajor, $yytokenvalue);
+ if ($yyendofinput) {
+ $this->yy_parse_failed();
+ }
+ $yymajor = self::YYNOCODE;
+ }
+ } else {
+ $this->yy_accept();
+ $yymajor = self::YYNOCODE;
+ }
+ } while ($yymajor != self::YYNOCODE && $this->yyidx >= 0);
+ }
+}
\ No newline at end of file
diff --git a/search2/search/SearchCommandParser.y b/search2/search/SearchCommandParser.y
new file mode 100755
index 0000000..8b1f4d7
--- /dev/null
+++ b/search2/search/SearchCommandParser.y
@@ -0,0 +1,211 @@
+%name SearchCommandParser
+%declare_class {class SearchCommandParser}
+
+%include_class {
+
+ private $expr_result;
+ private $parse_result;
+
+ public function __construct()
+ {
+ $this->parse_result = 'ok';
+ }
+
+ public function getExprResult()
+ {
+ return $this->expr_result;
+ }
+
+ public function isExprOk()
+ {
+ return $this->parse_result == 'ok';
+ }
+
+}
+
+%type expr {Expr}
+
+%left OPOR.
+%left OPAND.
+%right NOT.
+%left IS CONTAIN LIKE BETWEEN START END.
+%left GT LE LT GE.
+
+%syntax_error
+{
+ $this->parse_result = 'syntax';
+ $this->parse_message = "";
+}
+
+%parse_accept
+{
+ $this->parse_result = 'ok';
+}
+
+%parse_failure
+{
+ $this->parse_result = 'syntax';
+}
+
+%start_symbol cmdline
+
+cmdline ::= expr(A).
+{
+ $this->expr_result = A;
+}
+
+expr(A) ::= expr(B) OPAND expr(C).
+{
+ A = new OpExpr(B, ExprOp::OP_AND, C);
+}
+
+expr(A) ::= expr(B) OPOR expr(C).
+{
+ A = new OpExpr(B, ExprOp::OP_OR, C);
+}
+
+expr(A) ::= NOT expr(B).
+{
+ $expr = B;
+ $expr->not(!$expr->not());
+ A = $expr;
+}
+
+expr(A) ::= PAR_OPEN expr(B) PAR_CLOSE.
+{
+ A = B;
+}
+
+expr(A) ::= terminal(B) operator(C) value(D).
+{
+ $op = C;
+ $not = false;
+ if ($op == ExprOp::IS_NOT)
+ {
+ $op = ExprOp::IS;
+ $not = true;
+ }
+
+ $fld = new OpExpr(B, $op, D);
+ $fld->not($not);
+ A = $fld;
+}
+
+expr(A) ::= terminal(B) notop(C) BETWEEN value(D) OPAND value(E). [BETWEEN]
+{
+ $expr = new OpExpr(B, ExprOp::BETWEEN, new BetweenValueExpr(D, E));
+ $expr->not(C);
+ A=$expr;
+}
+
+expr(A) ::= terminal(B) notop(C) LIKE value(D).
+{
+ $expr = new OpExpr(B, ExprOp::LIKE, D);
+ $expr->not(C);
+ A=$expr;
+}
+
+expr(A) ::= terminal(B) IS notop(C) value(D).
+{
+ $expr = new OpExpr(B, ExprOp::IS, D);
+ $expr->not(C);
+ A=$expr;
+}
+
+expr(A) ::= terminal(B) DOES notop(C) CONTAIN value(D).
+{
+ $expr = new OpExpr(B, ExprOp::CONTAINS, D);
+ $expr->not(C);
+ A=$expr;
+}
+
+expr(A) ::= terminal(B) COLON value(C).
+{
+ A = new OpExpr(B, ExprOp::CONTAINS, C);
+}
+
+
+notop(A) ::= .
+{
+ A = false;
+}
+
+notop(A) ::= NOT.
+{
+ A = true;
+}
+
+terminal(A) ::= SQUARE_OPEN value(B) SQUARE_CLOSE SQUARE_OPEN value(C) SQUARE_CLOSE.
+{
+ $registry = ExprFieldRegistry::getRegistry();
+ $field = $registry->resolveMetadataField(B, C);
+ A = $field;
+}
+
+terminal(A) ::= TERMINAL(B).
+{
+ $registry = ExprFieldRegistry::getRegistry();
+ $field=$registry->resolveAlias(B);
+ A = $field;
+}
+
+value(A) ::= VALUE(B).
+{
+ A = B;
+}
+
+value(A) ::= PAR_OPEN valuelist(B) PAR_CLOSE.
+{
+ A = B;
+}
+
+valuelist(A) ::= VALUE(B) COMMA valuelist(C).
+{
+ C->addValue(B);
+ A = C;
+}
+
+valuelist(A) ::= VALUE(B).
+{
+ A = new ValueListExpr(B);
+}
+
+operator(A) ::= CONTAINS.
+{
+ A = ExprOp::CONTAINS;
+}
+
+operator(A) ::= LT.
+{
+ A = ExprOp::LESS_THAN;
+}
+
+operator(A) ::= GT.
+{
+ A = ExprOp::GREATER_THAN;
+}
+
+operator(A) ::= LE.
+{
+ A = ExprOp::LESS_THAN_EQUAL;
+}
+
+operator(A) ::= GE.
+{
+ A = ExprOp::GREATER_THAN_EQUAL;
+}
+
+operator(A) ::= START WITH.
+{
+ A = ExprOp::STARTS_WITH;
+}
+
+operator(A) ::= END WITH.
+{
+ A = ExprOp::ENDS_WITH;
+}
+
+operator(A) ::= IS_NOT.
+{
+ A = ExprOp::IS_NOT;
+}
diff --git a/search2/search/bin/cronSavedSearch.php b/search2/search/bin/cronSavedSearch.php
new file mode 100644
index 0000000..11a2420
--- /dev/null
+++ b/search2/search/bin/cronSavedSearch.php
@@ -0,0 +1,21 @@
+
+
+require_once(realpath('../../../config/dmsDefaults.php'));
+//require_once('indexing/indexerCore.inc.php');
+
+// TODO!!
+//$changed_docs = SearchHelper::getSavedSearchEvents();
+
+die('todo');
+/*
+
+how this works -
+
+a saved search is created.
+
+1) any changes - ie new docs, checkins, metadata updates, etc are logged to the saved_search_events table
+2) periodically, iterate through all documents - do search, and mail user results. remove the event indication.
+
+
+*/
+?>
\ No newline at end of file
diff --git a/search2/search/expr.inc.php b/search2/search/expr.inc.php
new file mode 100755
index 0000000..60c69dd
--- /dev/null
+++ b/search2/search/expr.inc.php
@@ -0,0 +1,2182 @@
+dbfields=array();
+ $sql = "SELECT groupname, itemname, ranking, type FROM search_ranking";
+ $rs = DBUtil::getResultArray($sql);
+ foreach($rs as $item)
+ {
+ switch ($item['type'])
+ {
+ case 'T':
+ $this->db[$item['groupname']][$item['itemname']] = $item['ranking']+0;
+ break;
+ case 'M':
+ $this->metadata[$item['groupname']][$item['itemname']] = $item['ranking']+0;
+ break;
+ case 'S':
+ switch($item['groupname'])
+ {
+ case 'Discussion':
+ $this->discussion = $item['ranking']+0;
+ break;
+ case 'DocumentText':
+ $this->text = $item['ranking']+0;
+ break;
+ }
+ break;
+ }
+ }
+ }
+
+ /**
+ * Enter description here...
+ *
+ * @return RankManager
+ */
+ public static function get()
+ {
+ static $singleton = null;
+ if (is_null($singleton))
+ {
+ $singleton = new RankManager();
+ }
+ return $singleton;
+ }
+
+ public function scoreField($groupname, $type='T', $itemname='')
+ {
+ switch($type)
+ {
+ case 'T':
+ return $this->db[$groupname][$itemname];
+ case 'M':
+ return $this->metadata[$groupname][$itemname];
+ case 'S':
+ switch($groupname)
+ {
+ case 'Discussion':
+ return $this->discussion;
+ case 'DocumentText':
+ return $this->text;
+ default:
+ return 0;
+ }
+ default:
+ return 0;
+ }
+ }
+}
+
+
+class Expr
+{
+ /**
+ * The parent expression
+ *
+ * @var Expr
+ */
+ protected $parent;
+
+ protected static $node_id = 0;
+
+ protected $expr_id;
+
+ public function __construct()
+ {
+ $this->expr_id = Expr::$node_id++;
+ }
+
+ public function getExprId()
+ {
+ return $this->expr_id;
+ }
+
+ /**
+ * Coverts the expression to a string
+ *
+ * @return string
+ */
+ public function __toString()
+ {
+ throw new Exception('Not yet implemented in ' . get_class($this));
+ }
+
+ /**
+ * Reference to the parent expression
+ *
+ * @return Expr
+ */
+ public function &getParent()
+ {
+ return $this->parent;
+ }
+
+ /**
+ * Sets the parent expiression
+ *
+ * @param Expr $parent
+ */
+ public function setParent(&$parent)
+ {
+ $this->parent = &$parent;
+ }
+
+ /**
+ * Is the expression valid
+ *
+ * @return boolean
+ */
+ public function is_valid()
+ {
+ return true;
+ }
+
+ public function isExpr()
+ {
+ return $this instanceof OpExpr;
+ }
+
+ public function isOpExpr()
+ {
+ return $this instanceof OpExpr;
+ }
+ public function isValueExpr()
+ {
+ return $this instanceof ValueExpr;
+ }
+ public function isValueListExpr()
+ {
+ return $this instanceof ValueListExpr;
+ }
+
+ public function isDbExpr()
+ {
+ return $this instanceof DBFieldExpr;
+ }
+
+ public function isFieldExpr()
+ {
+ return $this instanceof FieldExpr;
+ }
+
+ public function isSearchableText()
+ {
+ return $this instanceof SearchableText ;
+ }
+
+ public function isMetadataField()
+ {
+ return $this instanceof MetadataField;
+ }
+
+
+
+
+
+ public function toViz(&$str, $phase)
+ {
+ throw new Exception('To be implemented' . get_class($this));
+ }
+
+ public function toVizGraph($options=array())
+ {
+ $str = "digraph tree {\n";
+ if (isset($options['left-to-right']) && $options['left-to-right'])
+ {
+ $str .= "rankdir=LR\n";
+ }
+
+ $this->toViz($str, 0);
+ $this->toViz($str, 1);
+
+ $str .= "}\n";
+
+ if (isset($options['tofile']))
+ {
+ $path=dirname($options['tofile']);
+ $filename=basename($options['tofile']);
+ $ext = pathinfo($filename, PATHINFO_EXTENSION);
+ $base = substr($filename, 0, -strlen($ext)-1);
+
+ $dotfile="$path/$base.$ext";
+ $jpgfile="$path/$base.jpg";
+ $fp = fopen($dotfile,'wt');
+ fwrite($fp, $str);
+ fclose($fp);
+
+ system("dot -Tjpg -o$jpgfile $dotfile");
+
+ if (isset($options['view']) && $options['view'])
+ {
+ system("eog $jpgfile");
+ }
+ }
+
+ return $str;
+ }
+}
+
+class FieldExpr extends Expr
+{
+ /**
+ * Name of the field
+ *
+ * @var string
+ */
+ protected $field;
+
+ protected $alias;
+
+ protected $display;
+
+
+ /**
+ * Constructor for the field expression
+ *
+ * @param string $field
+ */
+ public function __construct($field, $display=null)
+ {
+ parent::__construct();
+ $this->field=$field;
+ if (is_null($display))
+ {
+ $display=get_class($this);
+ }
+ $this->display = $display;
+ $this->setAlias(get_class($this));
+ }
+
+ public function setAlias($alias)
+ {
+ $this->alias=$alias;
+ }
+
+ public function getDisplay()
+ {
+ return $this->display;
+ }
+
+ public function getAlias()
+ {
+ return $this->alias;
+ }
+
+ public function getFullName()
+ {
+ return $this->alias . '.' . $this->field;
+ }
+
+ /**
+ * Returns the field
+ *
+ * @return string
+ */
+ public function getField()
+ {
+ return $this->field;
+ }
+
+ /**
+ * Coverts the expression to a string
+ *
+ * @return string
+ */
+ public function __toString()
+ {
+ return $this->alias;
+ }
+
+ public function toViz(&$str, $phase)
+ {
+ if ($phase == 0)
+ {
+ $expr_id = $this->getExprId();
+ $str .= "struct$expr_id [style=rounded, label=\"$expr_id: FIELD[$this->alias]\"]\n";
+ }
+ }
+
+ public function rewrite(&$left, &$op, &$right, $not=false)
+ {
+ $input = $left->getInputRequirements();
+
+ if ($input['value']['type'] != FieldInputType::FULLTEXT)
+ {
+ return;
+ }
+
+
+ if ($right->isValueExpr())
+ {
+ $value = $right->getValue();
+ }
+ else
+ {
+ $value = $right;
+ }
+
+ if (substr($value,0,1) != '\'' || substr($value,-1) != '\'')
+ {
+ OpExpr::rewriteString($left, $op, $right, $not);
+ }
+ else
+ {
+ $right = new ValueExpr(trim(substr($value,1,-1)));
+ }
+ }
+}
+
+class DBFieldExpr extends FieldExpr
+{
+ /**
+ * The table the field is associated with
+ *
+ * @var string
+ */
+ protected $table;
+
+ protected $jointable;
+ protected $joinfield;
+ protected $matchfield;
+ protected $quotedvalue;
+
+
+ /**
+ * Constructor for the database field
+ *
+ * @param string $field
+ * @param string $table
+ */
+ public function __construct($field, $table, $display=null)
+ {
+ if (is_null($display))
+ {
+ $display = get_class($this);
+ }
+
+ parent::__construct($field, $display);
+
+ $this->table=$table;
+ $this->jointable = null;
+ $this->joinfield = null;
+ $this->matchfield = null;
+ $this->quotedvalue=true;
+ }
+
+ /**
+ * Returns the table name
+ *
+ * @return string
+ */
+ public function getTable()
+ {
+ return $this->table;
+ }
+
+ public function joinTo($table, $field)
+ {
+ $this->jointable=$table;
+ $this->joinfield=$field;
+ }
+ public function matchField($field)
+ {
+ $this->matchfield = $field;
+ }
+
+ public function modifyName($name)
+ {
+ return $name;
+ }
+
+ public function modifyValue($value)
+ {
+ return $value;
+ }
+
+
+ public function getJoinTable() { return $this->jointable; }
+ public function getJoinField() { return $this->joinfield; }
+ public function getMatchingField() { return $this->matchfield; }
+ public function isValueQuoted($quotedvalue = null)
+ {
+ if (isset($quotedvalue))
+ {
+ $this->quotedvalue = $quotedvalue;
+ }
+ return $this->quotedvalue;
+ }
+}
+
+class MetadataField extends DBFieldExpr
+{
+ protected $fieldset;
+ protected $fieldid;
+ protected $fieldsetid;
+
+ public function __construct($fieldset, $field, $fieldsetid, $fieldid)
+ {
+ parent::__construct($field, 'document_fields_link');
+ $this->fieldset=$fieldset;
+ $this->fieldid=$fieldid;
+ $this->fieldsetid=$fieldsetid;
+ }
+
+ public function getFieldSet()
+ {
+ return $this->fieldset;
+ }
+
+ public function getFieldId()
+ {
+ return $this->fieldid;
+ }
+
+ public function getFieldSetId()
+ {
+ return $this->fieldsetid;
+ }
+
+ public function getInputRequirements()
+ {
+ return array('value'=>array('type'=>FieldInputType::TEXT));
+ }
+
+ /**
+ * Coverts the expression to a string
+ *
+ * @return string
+ */
+ public function __toString()
+ {
+ return "METADATA[$this->fieldset][$this->field]";
+ }
+
+}
+
+class SearchableText extends FieldExpr
+{
+}
+
+class ValueExpr extends Expr
+{
+ /**
+ * The value
+ *
+ * @var mixed
+ */
+ protected $value;
+
+ /**
+ * Constructor for the value expression
+ *
+ * @param mixed $value
+ */
+ public function __construct($value)
+ {
+ parent::__construct();
+ $this->value=$value;
+ }
+
+ public function getValue()
+ {
+ return $this->value;
+ }
+
+ /**
+ * Converts the value to a string
+ *
+ * @return unknown
+ */
+ public function __toString()
+ {
+ return (string) "\"$this->value\"";
+ }
+
+ public function toViz(&$str, $phase)
+ {
+ if ($phase == 0)
+ {
+ $expr_id = $this->getExprId();
+ $value = addslashes($this->value);
+ $str .= "struct$expr_id [style=ellipse, label=\"$expr_id: \\\"$value\\\"\"]\n";
+ }
+ }
+
+ public function getSQL($field, $fieldname, $op, $not=false)
+ {
+ $val = $field->modifyValue($this->getValue());
+ $quote = '';
+ if ($field->isValueQuoted())
+ {
+ $val = addslashes($val);
+ $quote = '\'';
+ }
+
+ switch($op)
+ {
+ case ExprOp::CONTAINS:
+ $sql = "$fieldname LIKE '%$val%'";
+ break;
+ case ExprOp::STARTS_WITH:
+ $sql = "$fieldname LIKE '$val%'";
+ break;
+ case ExprOp::ENDS_WITH:
+ $sql = "$fieldname LIKE '%$val'";
+ break;
+ case ExprOp::IS:
+ $sql = "$fieldname = $quote$val$quote";
+ break;
+ case ExprOp::GREATER_THAN :
+ $sql = "$fieldname > $quote$val$quote";
+ break;
+ case ExprOp::GREATER_THAN_EQUAL :
+ $sql = "$fieldname >= $quote$val$quote";
+ break;
+ case ExprOp::LESS_THAN :
+ $sql = "$fieldname < $quote$val$quote";
+ break;
+ case ExprOp::LESS_THAN_EQUAL :
+ $sql = "$fieldname <= $quote$val$quote";
+ break;
+ default:
+ throw new Exception('Unknown op: ' . $op);
+ }
+
+ if ($not)
+ {
+ $sql = "not ($sql)";
+ }
+
+ return $sql;
+ }
+
+}
+
+class ValueListExpr extends Expr
+{
+ /**
+ * The value
+ *
+ * @var mixed
+ */
+ protected $values;
+
+ /**
+ * Constructor for the value expression
+ *
+ * @param mixed $value
+ */
+ public function __construct($value)
+ {
+ parent::__construct($value);
+ $this->values=array($value);
+ }
+
+ public function addValue($value)
+ {
+ $this->values[] = $value;
+ }
+
+
+ public function getValue($param=null)
+ {
+ if (!empty($param))
+ {
+ return $this->values[$param];
+ }
+ $str = '';
+
+ foreach($this->values as $value)
+ {
+ if ($str != '') $str .= ',';
+ $str .= "\"$value\"";
+ }
+
+ return $str;
+ }
+
+ /**
+ * Converts the value to a string
+ *
+ * @return unknown
+ */
+ public function __toString()
+ {
+ return $this->getValue();
+ }
+
+ public function toViz(&$str, $phase)
+ {
+ if ($phase == 0)
+ {
+ $expr_id = $this->getExprId();
+
+ $str .= "struct$expr_id [style=ellipse, label=\"$expr_id: ";
+ $i=0;
+ foreach($this->values as $value)
+ {
+ if ($i++>0) $str .= ',';
+ $value = addslashes($value);
+ $str .= "\\\"$value\\\"";
+ }
+ $str .= "\"]\n";
+ }
+ }
+
+
+
+ public function rewrite(&$left, &$op, &$right, &$not)
+ {
+ if (count($this->values) == 1)
+ {
+ $right = new ValueExpr($this->values[0]);
+ return;
+ }
+ $newops = array();
+ foreach($this->values as $value)
+ {
+ $classname = get_class($left);
+ $class = new $classname;
+ $newop = new OpExpr($class, $op, $value);
+ $newops[] = $newop;
+ }
+
+ $result = $newops[0];
+ for($i=1;$i