Commit 68530cc61b32ec4e204fddcf975fc0ef17dbb71a
1 parent
ddef42e6
- add phrase splitter
- add test harness for phrase splitter - use boolean + and phrases in searchabletext criterion. git-svn-id: https://kt-dms.svn.sourceforge.net/svnroot/kt-dms/trunk@5170 c91229c3-7414-0410-bfa2-8a42b809f60b
Showing
3 changed files
with
82 additions
and
1 deletions
lib/browse/Criteria.inc
| @@ -609,7 +609,20 @@ class SearchableTextCriterion extends BrowseCriterion { | @@ -609,7 +609,20 @@ class SearchableTextCriterion extends BrowseCriterion { | ||
| 609 | 609 | ||
| 610 | $p = array(); | 610 | $p = array(); |
| 611 | $p[0] = "MATCH(DST.document_text) AGAINST (? $boolean_mode)"; | 611 | $p[0] = "MATCH(DST.document_text) AGAINST (? $boolean_mode)"; |
| 612 | - $p[1] = $aRequest[$this->getWidgetBase()]; | 612 | + |
| 613 | + $RESTRICTING_SEARCH = true; | ||
| 614 | + | ||
| 615 | + if ($RESTRICTING_SEARCH) { | ||
| 616 | + // FIXME use a more reasonable approach for splitting this... | ||
| 617 | + $q_set = KTUtil::phraseSplit($aRequest[$this->getWidgetBase()]); | ||
| 618 | + $temp = $q_set; | ||
| 619 | + foreach ($temp as $k => $v) { | ||
| 620 | + $q_set[$k] = sprintf('+"%s"', $v); | ||
| 621 | + } | ||
| 622 | + $p[1] = implode(' ',$q_set); | ||
| 623 | + } else { | ||
| 624 | + $p[1] = $aRequest[$this->getWidgetBase()]; | ||
| 625 | + } | ||
| 613 | return $p; | 626 | return $p; |
| 614 | } | 627 | } |
| 615 | 628 |
lib/util/ktutil.inc
| @@ -540,6 +540,34 @@ class KTUtil { | @@ -540,6 +540,34 @@ class KTUtil { | ||
| 540 | return ((float) $microtime_simple[1] + (float) $microtime_simple[0]); | 540 | return ((float) $microtime_simple[1] + (float) $microtime_simple[0]); |
| 541 | } | 541 | } |
| 542 | 542 | ||
| 543 | + function phraseSplit($sSearchString) { | ||
| 544 | + $a = preg_split('#"#', $sSearchString); | ||
| 545 | + $i = 0; | ||
| 546 | + $phrases = array(); | ||
| 547 | + $word_parts = array(); | ||
| 548 | + foreach ($a as $part) { | ||
| 549 | + if ($i%2 == 0) { | ||
| 550 | + $word_parts[] = $part; | ||
| 551 | + } else { | ||
| 552 | + $phrases[] = $part; | ||
| 553 | + } | ||
| 554 | + $i += 1; | ||
| 555 | + } | ||
| 556 | + | ||
| 557 | + $words = array(); | ||
| 558 | + foreach ($word_parts as $part) { | ||
| 559 | + $w = (array) explode(' ', $part); | ||
| 560 | + foreach ($w as $potential) { if (!empty($potential)) { $words[] = $potential; }} | ||
| 561 | + } | ||
| 562 | + | ||
| 563 | + | ||
| 564 | + return array( | ||
| 565 | + 'words' => $words, | ||
| 566 | + 'phrases' => $phrases, | ||
| 567 | + ); | ||
| 568 | + | ||
| 569 | + } | ||
| 570 | + | ||
| 543 | } | 571 | } |
| 544 | 572 | ||
| 545 | ?> | 573 | ?> |
tests/util/ktutil/testPhraseSplit.php
0 → 100644
| 1 | +<?php | ||
| 2 | + | ||
| 3 | +require_once('../../../config/dmsDefaults.php'); | ||
| 4 | +require_once(KT_LIB_DIR . '/util/ktutil.inc'); | ||
| 5 | + | ||
| 6 | +$tests = array( | ||
| 7 | + // (string, phrases, words) | ||
| 8 | + array('a"b c"d', array('b c'), array('a','d')), | ||
| 9 | +); | ||
| 10 | + | ||
| 11 | + | ||
| 12 | +foreach ($tests as $t) { | ||
| 13 | + print '<pre>'; | ||
| 14 | + | ||
| 15 | + $test = $t[0]; | ||
| 16 | + $phrases = $t[1]; | ||
| 17 | + $words = $t[2]; | ||
| 18 | + | ||
| 19 | + | ||
| 20 | + $p_expect = implode(', ', $phrases); | ||
| 21 | + $w_expect = implode(', ',$words); | ||
| 22 | + | ||
| 23 | + $res = KTUtil::phraseSplit($test); | ||
| 24 | + | ||
| 25 | + $p_got = implode(', ', $res['phrases']); | ||
| 26 | + $w_got = implode(', ', $res['words']); | ||
| 27 | + | ||
| 28 | + | ||
| 29 | + if (($w_got == $w_expect) && ($p_got == $p_expect)) { | ||
| 30 | + print sprintf("Passed: %s\n", $test); | ||
| 31 | + } else { | ||
| 32 | + print "--------\n"; | ||
| 33 | + print sprintf("failed: %s\n", $test); | ||
| 34 | + print sprintf("Phrases - got \"%s\", expected \"%s\"\n", $p_got, $p_expect); | ||
| 35 | + print sprintf("Words - got \"%s\", expected \"%s\"\n", $w_got, $w_expect); | ||
| 36 | + print "--------\n"; | ||
| 37 | + } | ||
| 38 | +} | ||
| 39 | + | ||
| 40 | +?> | ||
| 0 | \ No newline at end of file | 41 | \ No newline at end of file |