Overview

Namespaces

  • PHP
  • Sastrawi
    • Dictionary
    • Morphology
      • Disambiguator
    • Specification
    • Stemmer
      • Cache
      • ConfixStripping
      • Context
        • Visitor
      • Filter
    • StopWordRemover

Classes

  • Context
  • Removal

Interfaces

  • ContextInterface
  • RemovalInterface
  • Overview
  • Namespace
  • Class
  • Tree
  1: <?php
  2: /**
  3:  * Sastrawi (https://github.com/sastrawi/sastrawi)
  4:  *
  5:  * @link      http://github.com/sastrawi/sastrawi for the canonical source repository
  6:  * @license   https://github.com/sastrawi/sastrawi/blob/master/LICENSE The MIT License (MIT)
  7:  */
  8: 
  9: namespace Sastrawi\Stemmer\Context;
 10: 
 11: use Sastrawi\Dictionary\DictionaryInterface;
 12: use Sastrawi\Stemmer\Context\Visitor\VisitorInterface;
 13: use Sastrawi\Stemmer\Context\Visitor\VisitableInterface;
 14: use Sastrawi\Stemmer\ConfixStripping;
 15: 
 16: /**
 17:  * Stemming Context using Nazief and Adriani, CS, ECS, Improved ECS
 18:  */
 19: class Context implements ContextInterface, VisitableInterface
 20: {
 21:     /**
 22:      * @var string
 23:      */
 24:     protected $originalWord;
 25: 
 26:     /**
 27:      * @var string
 28:      */
 29:     protected $currentWord;
 30: 
 31:     /**
 32:      * @var boolean
 33:      */
 34:     protected $processIsStopped = false;
 35: 
 36:     /**
 37:      * @var \Sastrawi\Stemmer\Context\RemovalInterface[]
 38:      */
 39:     protected $removals = array();
 40: 
 41:     /**
 42:      * @var \Sastrawi\Dictionary\DictionaryInterface
 43:      */
 44:     protected $dictionary;
 45: 
 46:     /**
 47:      * @var \Sastrawi\Stemmer\Context\Visitor\VisitorProvider
 48:      */
 49:     protected $visitorProvider;
 50: 
 51:     /**
 52:      * @var \Sastrawi\Stemmer\Context\Visitor\VisitorInterface[]
 53:      */
 54:     protected $visitors = array();
 55: 
 56:     /**
 57:      * @var \Sastrawi\Stemmer\Context\Visitor\VisitorInterface[]
 58:      */
 59:     protected $suffixVisitors = array();
 60: 
 61:     /**
 62:      * @var \Sastrawi\Stemmer\Context\Visitor\VisitorInterface[]
 63:      */
 64:     protected $prefixVisitors = array();
 65: 
 66:     /**
 67:      * @var string
 68:      */
 69:     protected $result;
 70: 
 71:     /**
 72:      * @param string                                            $originalWord
 73:      * @param \Sastrawi\Dictionary\DictionaryInterface          $dictionary
 74:      * @param \Sastrawi\Stemmer\Context\Visitor\VisitorProvider $visitorProvider
 75:      */
 76:     public function __construct(
 77:         $originalWord,
 78:         DictionaryInterface $dictionary,
 79:         Visitor\VisitorProvider $visitorProvider
 80:     ) {
 81:         $this->originalWord = $originalWord;
 82:         $this->currentWord  = $this->originalWord;
 83:         $this->dictionary   = $dictionary;
 84:         $this->visitorProvider = $visitorProvider;
 85: 
 86:         $this->initVisitors();
 87:     }
 88: 
 89:     protected function initVisitors()
 90:     {
 91:         $this->visitors       = $this->visitorProvider->getVisitors();
 92:         $this->suffixVisitors = $this->visitorProvider->getSuffixVisitors();
 93:         $this->prefixVisitors = $this->visitorProvider->getPrefixVisitors();
 94:     }
 95: 
 96:     public function setDictionary(DictionaryInterface $dictionary)
 97:     {
 98:         $this->dictionary = $dictionary;
 99:     }
100: 
101:     public function getDictionary()
102:     {
103:         return $this->dictionary;
104:     }
105: 
106:     public function getOriginalWord()
107:     {
108:         return $this->originalWord;
109:     }
110: 
111:     public function setCurrentWord($word)
112:     {
113:         $this->currentWord = $word;
114:     }
115: 
116:     public function getCurrentWord()
117:     {
118:         return $this->currentWord;
119:     }
120: 
121:     public function stopProcess()
122:     {
123:         $this->processIsStopped = true;
124:     }
125: 
126:     public function processIsStopped()
127:     {
128:         return $this->processIsStopped;
129:     }
130: 
131:     public function addRemoval(RemovalInterface $removal)
132:     {
133:         $this->removals[] = $removal;
134:     }
135: 
136:     public function getRemovals()
137:     {
138:         return $this->removals;
139:     }
140: 
141:     public function getResult()
142:     {
143:         return $this->result;
144:     }
145: 
146:     /**
147:      * Execute stemming process; the result can be retrieved with getResult()
148:      *
149:      * @return void
150:      */
151:     public function execute()
152:     {
153:         // step 1 - 5
154:         $this->startStemmingProcess();
155: 
156:         // step 6
157:         if ($this->dictionary->contains($this->getCurrentWord())) {
158:             $this->result = $this->getCurrentWord();
159:         } else {
160:             $this->result = $this->originalWord;
161:         }
162:     }
163: 
164:     /**
165:      * @return void
166:      */
167:     protected function startStemmingProcess()
168:     {
169:         // step 1
170:         if ($this->dictionary->contains($this->getCurrentWord())) {
171:             return;
172:         }
173: 
174:         $this->acceptVisitors($this->visitors);
175: 
176:         if ($this->dictionary->contains($this->getCurrentWord())) {
177:             return;
178:         }
179: 
180:         $csPrecedenceAdjustmentSpecification = new ConfixStripping\PrecedenceAdjustmentSpecification();
181: 
182:         /*
183:          * Confix Stripping
184:          * Try to remove prefix before suffix if the specification is met
185:          */
186:         if ($csPrecedenceAdjustmentSpecification->isSatisfiedBy($this->getOriginalWord())) {
187: 
188:             // step 4, 5
189:             $this->removePrefixes();
190:             if ($this->dictionary->contains($this->getCurrentWord())) {
191:                 return;
192:             }
193: 
194:             // step 2, 3
195:             $this->removeSuffixes();
196:             if ($this->dictionary->contains($this->getCurrentWord())) {
197:                 return;
198:             } else {
199:                 // if the trial is failed, restore the original word
200:                 // and continue to normal rule precedence (suffix first, prefix afterwards)
201:                 $this->setCurrentWord($this->originalWord);
202:                 $this->removals = array();
203:             }
204:         }
205: 
206:         // step 2, 3
207:         $this->removeSuffixes();
208:         if ($this->dictionary->contains($this->getCurrentWord())) {
209:             return;
210:         }
211: 
212:         // step 4, 5
213:         $this->removePrefixes();
214:         if ($this->dictionary->contains($this->getCurrentWord())) {
215:             return;
216:         }
217: 
218:         // ECS loop pengembalian akhiran
219:         $this->loopPengembalianAkhiran();
220:     }
221: 
222:     protected function removePrefixes()
223:     {
224:         for ($i = 0; $i < 3; $i++) {
225:             $this->acceptPrefixVisitors($this->prefixVisitors);
226:             if ($this->dictionary->contains($this->getCurrentWord())) {
227:                 return;
228:             }
229:         }
230:     }
231: 
232:     protected function removeSuffixes()
233:     {
234:         $this->acceptVisitors($this->suffixVisitors);
235:     }
236: 
237:     public function accept(VisitorInterface $visitor)
238:     {
239:         $visitor->visit($this);
240:     }
241: 
242:     protected function acceptVisitors(array $visitors)
243:     {
244:         foreach ($visitors as $visitor) {
245: 
246:             $this->accept($visitor);
247: 
248:             if ($this->getDictionary()->contains($this->getCurrentWord())) {
249:                 return $this->getCurrentWord();
250:             }
251: 
252:             if ($this->processIsStopped()) {
253:                 return $this->getCurrentWord();
254:             }
255:         }
256:     }
257: 
258:     protected function acceptPrefixVisitors(array $visitors)
259:     {
260:         $removalCount = count($this->removals);
261:         foreach ($visitors as $visitor) {
262: 
263:             $this->accept($visitor);
264: 
265:             if ($this->getDictionary()->contains($this->getCurrentWord())) {
266:                 return $this->getCurrentWord();
267:             }
268: 
269:             if ($this->processIsStopped()) {
270:                 return $this->getCurrentWord();
271:             }
272: 
273:             if (count($this->removals) > $removalCount) {
274:                 return;
275:             }
276:         }
277:     }
278: 
279:     /**
280:      * ECS Loop Pengembalian Akhiran
281:      */
282:     public function loopPengembalianAkhiran()
283:     {
284:         // restore prefix to form [DP+[DP+[DP]]] + Root word
285:         $this->restorePrefix();
286: 
287:         $removals = $this->removals;
288:         $reversedRemovals = array_reverse($removals);
289:         $currentWord = $this->getCurrentWord();
290: 
291:         foreach ($reversedRemovals as $removal) {
292:             if (!$this->isSuffixRemoval($removal)) {
293:                 continue;
294:             }
295: 
296:             if ($removal->getRemovedPart() == 'kan') {
297:                 $this->setCurrentWord($removal->getResult() . 'k');
298: 
299:                 // step 4, 5
300:                 $this->removePrefixes();
301:                 if ($this->dictionary->contains($this->getCurrentWord())) {
302:                     return;
303:                 }
304: 
305:                 $this->setCurrentWord($removal->getResult() . 'kan');
306:             } else {
307:                 $this->setCurrentWord($removal->getSubject());
308:             }
309: 
310:             // step 4, 5
311:             $this->removePrefixes();
312:             if ($this->dictionary->contains($this->getCurrentWord())) {
313:                 return;
314:             }
315: 
316:             $this->removals = $removals;
317:             $this->setCurrentWord($currentWord);
318:         }
319:     }
320: 
321:     /**
322:      * Check wether the removed part is a suffix
323:      *
324:      * @param  \Sastrawi\Stemmer\Context\RemovalInterface $removal
325:      * @return boolean
326:      */
327:     protected function isSuffixRemoval(RemovalInterface $removal)
328:     {
329:         return $removal->getAffixType() == 'DS'
330:             || $removal->getAffixType() == 'PP'
331:             || $removal->getAffixType() == 'P';
332:     }
333: 
334:     /**
335:      * Restore prefix to proceed with ECS loop pengembalian akhiran
336:      *
337:      * @return void
338:      */
339:     public function restorePrefix()
340:     {
341:         foreach ($this->removals as $i => $removal) {
342:             if ($removal->getAffixType() == 'DP') {
343:                 // return the word before precoding (the subject of first prefix removal)
344:                 $this->setCurrentWord($removal->getSubject());
345:                 break;
346:             }
347:         }
348: 
349:         foreach ($this->removals as $i => $removal) {
350:             if ($removal->getAffixType() == 'DP') {
351:                 unset($this->removals[$i]);
352:             }
353:         }
354:     }
355: }
356: 
API documentation generated by ApiGen 2.8.0