Overview

Namespaces

  • PHP
  • Sastrawi
    • Dictionary
    • Morphology
      • Disambiguator
    • Specification
    • Stemmer
      • Cache
      • ConfixStripping
      • Context
        • Visitor
      • Filter
    • StopWordRemover

Classes

  • CachedStemmer
  • Stemmer
  • StemmerFactory

Interfaces

  • StemmerInterface
  • Overview
  • Namespace
  • Class
  • Tree
  1: <?php
  2: /**
  3:  * Sastrawi (https://github.com/sastrawi/sastrawi)
  4:  *
  5:  * @link      http://github.com/sastrawi/sastrawi for the canonical source repository
  6:  * @license   https://github.com/sastrawi/sastrawi/blob/master/LICENSE The MIT License (MIT)
  7:  */
  8: 
  9: namespace Sastrawi\Stemmer;
 10: 
 11: use Sastrawi\Dictionary\DictionaryInterface;
 12: use Sastrawi\Stemmer\Context\Context;
 13: use Sastrawi\Stemmer\Context\Visitor\VisitorProvider;
 14: 
 15: /**
 16:  * Indonesian Stemmer.
 17:  * Nazief & Adriani, CS Stemmer, ECS Stemmer, Improved ECS.
 18:  *
 19:  * @link https://github.com/sastrawi/sastrawi/wiki/Resources
 20:  */
 21: class Stemmer implements StemmerInterface
 22: {
 23:     /**
 24:      * The dictionary containing root words
 25:      *
 26:      * @var \Sastrawi\Dictionary\DictionaryInterface
 27:      */
 28:     protected $dictionary;
 29: 
 30:     /**
 31:      * Visitor provider
 32:      *
 33:      * @var \Sastrawi\Stemmer\Context\Visitor\VisitorProvider
 34:      */
 35:     protected $visitorProvider;
 36: 
 37:     public function __construct(DictionaryInterface $dictionary)
 38:     {
 39:         $this->dictionary = $dictionary;
 40:         $this->visitorProvider = new VisitorProvider;
 41:     }
 42: 
 43:     /**
 44:      * @return \Sastrawi\Dictionary\DictionaryInterface
 45:      */
 46:     public function getDictionary()
 47:     {
 48:         return $this->dictionary;
 49:     }
 50: 
 51:     /**
 52:      * Stem a text string to its common stem form.
 53:      *
 54:      * @param  string $text the text string to stem, e.g : memberdayakan pembangunan
 55:      * @return string common stem form, e.g : daya bangun
 56:      */
 57:     public function stem($text)
 58:     {
 59:         $normalizedText = Filter\TextNormalizer::normalizeText($text);
 60: 
 61:         $words = explode(' ', $normalizedText);
 62:         $stems = array();
 63: 
 64:         foreach ($words as $word) {
 65:             $stems[] = $this->stemWord($word);
 66:         }
 67: 
 68:         return implode(' ', $stems);
 69:     }
 70: 
 71:     /**
 72:      * Stem a word to its common stem form.
 73:      *
 74:      * @param  string $word the word to stem, e.g : memberdayakan
 75:      * @return string common stem form, e.g : daya
 76:      */
 77:     protected function stemWord($word)
 78:     {
 79:         if ($this->isPlural($word)) {
 80:             return $this->stemPluralWord($word);
 81:         } else {
 82:             return $this->stemSingularWord($word);
 83:         }
 84:     }
 85: 
 86:     /**
 87:      * @param  string  $word
 88:      * @return boolean
 89:      */
 90:     protected function isPlural($word)
 91:     {
 92:         return strpos($word, '-') !== false;
 93:     }
 94: 
 95:     /**
 96:      * Stem a plural word to its common stem form.
 97:      * Asian J. (2007) “Effective Techniques for Indonesian Text Retrieval” page 76-77.
 98:      *
 99:      * @param  string $plural the word to stem, e.g : bersama-sama
100:      * @return string common stem form, e.g : sama
101:      * @link   http://researchbank.rmit.edu.au/eserv/rmit:6312/Asian.pdf
102:      */
103:     protected function stemPluralWord($plural)
104:     {
105:         preg_match('/^(.*)-(.*)$/', $plural, $words);
106: 
107:         if (!isset($words[1]) || !isset($words[2])) {
108:             return $plural;
109:         }
110: 
111:         $rootWord1 = $this->stemSingularWord($words[1]);
112:         $rootWord2 = $this->stemSingularWord($words[2]);
113: 
114:         if ($rootWord1 == $rootWord2) {
115:             return $rootWord1;
116:         } else {
117:             return $plural;
118:         }
119:     }
120: 
121:     /**
122:      * Stem a singular word to its common stem form.
123:      *
124:      * @param  string $word the word to stem, e.g : mengalahkan
125:      * @return string common stem form, e.g : kalah
126:      */
127:     protected function stemSingularWord($word)
128:     {
129:         $context = new Context($word, $this->dictionary, $this->visitorProvider);
130:         $context->execute();
131: 
132:         return $context->getResult();
133:     }
134: }
135: 
API documentation generated by ApiGen 2.8.0