AI-based Text Summarizer Python, NLP

👤 Sharing: AI
```python
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.stem import PorterStemmer

class TextSummarizer:
    def __init__(self, language='english'):
        nltk.download('stopwords', quiet=True)
        nltk.download('punkt', quiet=True)
        self.stop_words = set(stopwords.words(language))
        self.stemmer = PorterStemmer()

    def preprocess_text(self, text):
        """
        Preprocesses the text by:
            - Tokenizing into words
            - Removing stop words
            - Stemming the words
        """
        words = word_tokenize(text)
        processed_words = [
            self.stemmer.stem(word.lower())
            for word in words
            if word.isalnum() and word.lower() not in self.stop_words
        ]
        return processed_words

    def calculate_word_frequency(self, words):
        """
        Calculates the frequency of each word in the processed text.
        """
        word_frequency = {}
        for word in words:
            if word in word_frequency:
                word_frequency[word] += 1
            else:
                word_frequency[word] = 1
        return word_frequency

    def calculate_sentence_scores(self, sentences, word_frequency):
        """
        Calculates a score for each sentence based on the word frequencies.
        """
        sentence_scores = {}
        for sentence in sentences:
            for word in word_tokenize(sentence.lower()):  # Tokenize each sentence
                if self.stemmer.stem(word) in word_frequency:
                    if len(sentence.split(' ')) < 30: # Optional: Penalize long sentences
                        if sentence not in sentence_scores:
                            sentence_scores[sentence] = word_frequency[self.stemmer.stem(word)]
                        else:
                            sentence_scores[sentence] += word_frequency[self.stemmer.stem(word)]
        return sentence_scores


    def summarize(self, text, num_sentences=5):
        """
        Summarizes the text by:
            - Preprocessing the text
            - Calculating word frequencies
            - Calculating sentence scores
            - Selecting the top N sentences with the highest scores
        """
        sentences = sent_tokenize(text)
        processed_words = self.preprocess_text(text)
        word_frequency = self.calculate_word_frequency(processed_words)
        sentence_scores = self.calculate_sentence_scores(sentences, word_frequency)

        # Sort sentences by score in descending order
        ranked_sentences = sorted(sentence_scores, key=sentence_scores.get, reverse=True)

        # Select the top N sentences
        summary_sentences = ranked_sentences[:num_sentences]

        # Return the summary as a string
        return ' '.join(summary_sentences)

if __name__ == '__main__':
    # Example Usage
    text = """
    Artificial intelligence (AI) is revolutionizing various industries, from healthcare to finance. 
    AI algorithms can analyze vast amounts of data to identify patterns and make predictions with greater accuracy than humans. 
    This has led to significant improvements in medical diagnoses, fraud detection, and personalized customer experiences.
    However, the rise of AI also raises ethical concerns, such as job displacement and algorithmic bias. 
    It is crucial to address these challenges proactively to ensure that AI benefits society as a whole.
    Machine learning, a subset of AI, is particularly effective at learning from data without explicit programming. 
    Deep learning, a further subset, uses artificial neural networks with multiple layers to extract complex features from data.
    These techniques are powering breakthroughs in areas like natural language processing and computer vision.
    The future of AI is uncertain, but its potential to transform our lives is undeniable.
    """

    summarizer = TextSummarizer()
    summary = summarizer.summarize(text, num_sentences=3)
    print("Original Text:\n", text)
    print("\nSummary:\n", summary)
```
👁️ Viewed: 9

Comments