RemoveInflectionalParticle.py 831 Bytes
Newer Older
Sartika Aritonang committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
import re
from Sastrawi.Stemmer.Context.Removal import Removal

class RemoveInflectionalParticle(object):
    """Remove Inflectional particle.
    Asian J. (2007) "Effective Techniques for Indonesian Text Retrieval". page 60

    @link http://researchbank.rmit.edu.au/eserv/rmit:6312/Asian.pdf
    """

    def visit(self, context):
        result = self.remove(context.current_word)
        if result != context.current_word:
            removedPart = re.sub(result, '', context.current_word, 1)
            
            removal = Removal(self, context.current_word, result, removedPart, 'P')

            context.add_removal(removal)
            context.current_word = result

    def remove(self, word):
        """Remove inflectional particle : lah|kah|tah|pun"""
        return re.sub(r'-*(lah|kah|tah|pun)$', '', word, 1)