Commit f7cbf861637094acda07d418e60f56c5e68e2b21
1 parent
bd981125
- add phrase splitter
- add test harness for phrase splitter - use boolean + and phrases in searchabletext criterion. git-svn-id: https://kt-dms.svn.sourceforge.net/svnroot/kt-dms/trunk@5170 c91229c3-7414-0410-bfa2-8a42b809f60b
Showing
3 changed files
with
82 additions
and
1 deletions
lib/browse/Criteria.inc
| ... | ... | @@ -609,7 +609,20 @@ class SearchableTextCriterion extends BrowseCriterion { |
| 609 | 609 | |
| 610 | 610 | $p = array(); |
| 611 | 611 | $p[0] = "MATCH(DST.document_text) AGAINST (? $boolean_mode)"; |
| 612 | - $p[1] = $aRequest[$this->getWidgetBase()]; | |
| 612 | + | |
| 613 | + $RESTRICTING_SEARCH = true; | |
| 614 | + | |
| 615 | + if ($RESTRICTING_SEARCH) { | |
| 616 | + // FIXME use a more reasonable approach for splitting this... | |
| 617 | + $q_set = KTUtil::phraseSplit($aRequest[$this->getWidgetBase()]); | |
| 618 | + $temp = $q_set; | |
| 619 | + foreach ($temp as $k => $v) { | |
| 620 | + $q_set[$k] = sprintf('+"%s"', $v); | |
| 621 | + } | |
| 622 | + $p[1] = implode(' ',$q_set); | |
| 623 | + } else { | |
| 624 | + $p[1] = $aRequest[$this->getWidgetBase()]; | |
| 625 | + } | |
| 613 | 626 | return $p; |
| 614 | 627 | } |
| 615 | 628 | ... | ... |
lib/util/ktutil.inc
| ... | ... | @@ -540,6 +540,34 @@ class KTUtil { |
| 540 | 540 | return ((float) $microtime_simple[1] + (float) $microtime_simple[0]); |
| 541 | 541 | } |
| 542 | 542 | |
| 543 | + function phraseSplit($sSearchString) { | |
| 544 | + $a = preg_split('#"#', $sSearchString); | |
| 545 | + $i = 0; | |
| 546 | + $phrases = array(); | |
| 547 | + $word_parts = array(); | |
| 548 | + foreach ($a as $part) { | |
| 549 | + if ($i%2 == 0) { | |
| 550 | + $word_parts[] = $part; | |
| 551 | + } else { | |
| 552 | + $phrases[] = $part; | |
| 553 | + } | |
| 554 | + $i += 1; | |
| 555 | + } | |
| 556 | + | |
| 557 | + $words = array(); | |
| 558 | + foreach ($word_parts as $part) { | |
| 559 | + $w = (array) explode(' ', $part); | |
| 560 | + foreach ($w as $potential) { if (!empty($potential)) { $words[] = $potential; }} | |
| 561 | + } | |
| 562 | + | |
| 563 | + | |
| 564 | + return array( | |
| 565 | + 'words' => $words, | |
| 566 | + 'phrases' => $phrases, | |
| 567 | + ); | |
| 568 | + | |
| 569 | + } | |
| 570 | + | |
| 543 | 571 | } |
| 544 | 572 | |
| 545 | 573 | ?> | ... | ... |
tests/util/ktutil/testPhraseSplit.php
0 → 100644
| 1 | +<?php | |
| 2 | + | |
| 3 | +require_once('../../../config/dmsDefaults.php'); | |
| 4 | +require_once(KT_LIB_DIR . '/util/ktutil.inc'); | |
| 5 | + | |
| 6 | +$tests = array( | |
| 7 | + // (string, phrases, words) | |
| 8 | + array('a"b c"d', array('b c'), array('a','d')), | |
| 9 | +); | |
| 10 | + | |
| 11 | + | |
| 12 | +foreach ($tests as $t) { | |
| 13 | + print '<pre>'; | |
| 14 | + | |
| 15 | + $test = $t[0]; | |
| 16 | + $phrases = $t[1]; | |
| 17 | + $words = $t[2]; | |
| 18 | + | |
| 19 | + | |
| 20 | + $p_expect = implode(', ', $phrases); | |
| 21 | + $w_expect = implode(', ',$words); | |
| 22 | + | |
| 23 | + $res = KTUtil::phraseSplit($test); | |
| 24 | + | |
| 25 | + $p_got = implode(', ', $res['phrases']); | |
| 26 | + $w_got = implode(', ', $res['words']); | |
| 27 | + | |
| 28 | + | |
| 29 | + if (($w_got == $w_expect) && ($p_got == $p_expect)) { | |
| 30 | + print sprintf("Passed: %s\n", $test); | |
| 31 | + } else { | |
| 32 | + print "--------\n"; | |
| 33 | + print sprintf("failed: %s\n", $test); | |
| 34 | + print sprintf("Phrases - got \"%s\", expected \"%s\"\n", $p_got, $p_expect); | |
| 35 | + print sprintf("Words - got \"%s\", expected \"%s\"\n", $w_got, $w_expect); | |
| 36 | + print "--------\n"; | |
| 37 | + } | |
| 38 | +} | |
| 39 | + | |
| 40 | +?> | |
| 0 | 41 | \ No newline at end of file | ... | ... |