1: <?php
2: /**
3: * Sastrawi (https://github.com/sastrawi/sastrawi)
4: *
5: * @link http://github.com/sastrawi/sastrawi for the canonical source repository
6: * @license https://github.com/sastrawi/sastrawi/blob/master/LICENSE The MIT License (MIT)
7: */
8:
9: namespace Sastrawi\Stemmer\Context;
10:
11: use Sastrawi\Dictionary\DictionaryInterface;
12: use Sastrawi\Stemmer\Context\Visitor\VisitorInterface;
13: use Sastrawi\Stemmer\Context\Visitor\VisitableInterface;
14: use Sastrawi\Stemmer\ConfixStripping;
15:
16: /**
17: * Stemming Context using Nazief and Adriani, CS, ECS, Improved ECS
18: */
19: class Context implements ContextInterface, VisitableInterface
20: {
21: /**
22: * @var string
23: */
24: protected $originalWord;
25:
26: /**
27: * @var string
28: */
29: protected $currentWord;
30:
31: /**
32: * @var boolean
33: */
34: protected $processIsStopped = false;
35:
36: /**
37: * @var \Sastrawi\Stemmer\Context\RemovalInterface[]
38: */
39: protected $removals = array();
40:
41: /**
42: * @var \Sastrawi\Dictionary\DictionaryInterface
43: */
44: protected $dictionary;
45:
46: /**
47: * @var \Sastrawi\Stemmer\Context\Visitor\VisitorProvider
48: */
49: protected $visitorProvider;
50:
51: /**
52: * @var \Sastrawi\Stemmer\Context\Visitor\VisitorInterface[]
53: */
54: protected $visitors = array();
55:
56: /**
57: * @var \Sastrawi\Stemmer\Context\Visitor\VisitorInterface[]
58: */
59: protected $suffixVisitors = array();
60:
61: /**
62: * @var \Sastrawi\Stemmer\Context\Visitor\VisitorInterface[]
63: */
64: protected $prefixVisitors = array();
65:
66: /**
67: * @var string
68: */
69: protected $result;
70:
71: /**
72: * @param string $originalWord
73: * @param \Sastrawi\Dictionary\DictionaryInterface $dictionary
74: * @param \Sastrawi\Stemmer\Context\Visitor\VisitorProvider $visitorProvider
75: */
76: public function __construct(
77: $originalWord,
78: DictionaryInterface $dictionary,
79: Visitor\VisitorProvider $visitorProvider
80: ) {
81: $this->originalWord = $originalWord;
82: $this->currentWord = $this->originalWord;
83: $this->dictionary = $dictionary;
84: $this->visitorProvider = $visitorProvider;
85:
86: $this->initVisitors();
87: }
88:
89: protected function initVisitors()
90: {
91: $this->visitors = $this->visitorProvider->getVisitors();
92: $this->suffixVisitors = $this->visitorProvider->getSuffixVisitors();
93: $this->prefixVisitors = $this->visitorProvider->getPrefixVisitors();
94: }
95:
96: public function setDictionary(DictionaryInterface $dictionary)
97: {
98: $this->dictionary = $dictionary;
99: }
100:
101: public function getDictionary()
102: {
103: return $this->dictionary;
104: }
105:
106: public function getOriginalWord()
107: {
108: return $this->originalWord;
109: }
110:
111: public function setCurrentWord($word)
112: {
113: $this->currentWord = $word;
114: }
115:
116: public function getCurrentWord()
117: {
118: return $this->currentWord;
119: }
120:
121: public function stopProcess()
122: {
123: $this->processIsStopped = true;
124: }
125:
126: public function processIsStopped()
127: {
128: return $this->processIsStopped;
129: }
130:
131: public function addRemoval(RemovalInterface $removal)
132: {
133: $this->removals[] = $removal;
134: }
135:
136: public function getRemovals()
137: {
138: return $this->removals;
139: }
140:
141: public function getResult()
142: {
143: return $this->result;
144: }
145:
146: /**
147: * Execute stemming process; the result can be retrieved with getResult()
148: *
149: * @return void
150: */
151: public function execute()
152: {
153: // step 1 - 5
154: $this->startStemmingProcess();
155:
156: // step 6
157: if ($this->dictionary->contains($this->getCurrentWord())) {
158: $this->result = $this->getCurrentWord();
159: } else {
160: $this->result = $this->originalWord;
161: }
162: }
163:
164: /**
165: * @return void
166: */
167: protected function startStemmingProcess()
168: {
169: // step 1
170: if ($this->dictionary->contains($this->getCurrentWord())) {
171: return;
172: }
173:
174: $this->acceptVisitors($this->visitors);
175:
176: if ($this->dictionary->contains($this->getCurrentWord())) {
177: return;
178: }
179:
180: $csPrecedenceAdjustmentSpecification = new ConfixStripping\PrecedenceAdjustmentSpecification();
181:
182: /*
183: * Confix Stripping
184: * Try to remove prefix before suffix if the specification is met
185: */
186: if ($csPrecedenceAdjustmentSpecification->isSatisfiedBy($this->getOriginalWord())) {
187:
188: // step 4, 5
189: $this->removePrefixes();
190: if ($this->dictionary->contains($this->getCurrentWord())) {
191: return;
192: }
193:
194: // step 2, 3
195: $this->removeSuffixes();
196: if ($this->dictionary->contains($this->getCurrentWord())) {
197: return;
198: } else {
199: // if the trial is failed, restore the original word
200: // and continue to normal rule precedence (suffix first, prefix afterwards)
201: $this->setCurrentWord($this->originalWord);
202: $this->removals = array();
203: }
204: }
205:
206: // step 2, 3
207: $this->removeSuffixes();
208: if ($this->dictionary->contains($this->getCurrentWord())) {
209: return;
210: }
211:
212: // step 4, 5
213: $this->removePrefixes();
214: if ($this->dictionary->contains($this->getCurrentWord())) {
215: return;
216: }
217:
218: // ECS loop pengembalian akhiran
219: $this->loopPengembalianAkhiran();
220: }
221:
222: protected function removePrefixes()
223: {
224: for ($i = 0; $i < 3; $i++) {
225: $this->acceptPrefixVisitors($this->prefixVisitors);
226: if ($this->dictionary->contains($this->getCurrentWord())) {
227: return;
228: }
229: }
230: }
231:
232: protected function removeSuffixes()
233: {
234: $this->acceptVisitors($this->suffixVisitors);
235: }
236:
237: public function accept(VisitorInterface $visitor)
238: {
239: $visitor->visit($this);
240: }
241:
242: protected function acceptVisitors(array $visitors)
243: {
244: foreach ($visitors as $visitor) {
245:
246: $this->accept($visitor);
247:
248: if ($this->getDictionary()->contains($this->getCurrentWord())) {
249: return $this->getCurrentWord();
250: }
251:
252: if ($this->processIsStopped()) {
253: return $this->getCurrentWord();
254: }
255: }
256: }
257:
258: protected function acceptPrefixVisitors(array $visitors)
259: {
260: $removalCount = count($this->removals);
261: foreach ($visitors as $visitor) {
262:
263: $this->accept($visitor);
264:
265: if ($this->getDictionary()->contains($this->getCurrentWord())) {
266: return $this->getCurrentWord();
267: }
268:
269: if ($this->processIsStopped()) {
270: return $this->getCurrentWord();
271: }
272:
273: if (count($this->removals) > $removalCount) {
274: return;
275: }
276: }
277: }
278:
279: /**
280: * ECS Loop Pengembalian Akhiran
281: */
282: public function loopPengembalianAkhiran()
283: {
284: // restore prefix to form [DP+[DP+[DP]]] + Root word
285: $this->restorePrefix();
286:
287: $removals = $this->removals;
288: $reversedRemovals = array_reverse($removals);
289: $currentWord = $this->getCurrentWord();
290:
291: foreach ($reversedRemovals as $removal) {
292: if (!$this->isSuffixRemoval($removal)) {
293: continue;
294: }
295:
296: if ($removal->getRemovedPart() == 'kan') {
297: $this->setCurrentWord($removal->getResult() . 'k');
298:
299: // step 4, 5
300: $this->removePrefixes();
301: if ($this->dictionary->contains($this->getCurrentWord())) {
302: return;
303: }
304:
305: $this->setCurrentWord($removal->getResult() . 'kan');
306: } else {
307: $this->setCurrentWord($removal->getSubject());
308: }
309:
310: // step 4, 5
311: $this->removePrefixes();
312: if ($this->dictionary->contains($this->getCurrentWord())) {
313: return;
314: }
315:
316: $this->removals = $removals;
317: $this->setCurrentWord($currentWord);
318: }
319: }
320:
321: /**
322: * Check wether the removed part is a suffix
323: *
324: * @param \Sastrawi\Stemmer\Context\RemovalInterface $removal
325: * @return boolean
326: */
327: protected function isSuffixRemoval(RemovalInterface $removal)
328: {
329: return $removal->getAffixType() == 'DS'
330: || $removal->getAffixType() == 'PP'
331: || $removal->getAffixType() == 'P';
332: }
333:
334: /**
335: * Restore prefix to proceed with ECS loop pengembalian akhiran
336: *
337: * @return void
338: */
339: public function restorePrefix()
340: {
341: foreach ($this->removals as $i => $removal) {
342: if ($removal->getAffixType() == 'DP') {
343: // return the word before precoding (the subject of first prefix removal)
344: $this->setCurrentWord($removal->getSubject());
345: break;
346: }
347: }
348:
349: foreach ($this->removals as $i => $removal) {
350: if ($removal->getAffixType() == 'DP') {
351: unset($this->removals[$i]);
352: }
353: }
354: }
355: }
356: