AI-based Text Summarizer Python, NLP
👤 Sharing: AI
```python
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.stem import PorterStemmer
class TextSummarizer:
def __init__(self, language='english'):
nltk.download('stopwords', quiet=True)
nltk.download('punkt', quiet=True)
self.stop_words = set(stopwords.words(language))
self.stemmer = PorterStemmer()
def preprocess_text(self, text):
"""
Preprocesses the text by:
- Tokenizing into words
- Removing stop words
- Stemming the words
"""
words = word_tokenize(text)
processed_words = [
self.stemmer.stem(word.lower())
for word in words
if word.isalnum() and word.lower() not in self.stop_words
]
return processed_words
def calculate_word_frequency(self, words):
"""
Calculates the frequency of each word in the processed text.
"""
word_frequency = {}
for word in words:
if word in word_frequency:
word_frequency[word] += 1
else:
word_frequency[word] = 1
return word_frequency
def calculate_sentence_scores(self, sentences, word_frequency):
"""
Calculates a score for each sentence based on the word frequencies.
"""
sentence_scores = {}
for sentence in sentences:
for word in word_tokenize(sentence.lower()): # Tokenize each sentence
if self.stemmer.stem(word) in word_frequency:
if len(sentence.split(' ')) < 30: # Optional: Penalize long sentences
if sentence not in sentence_scores:
sentence_scores[sentence] = word_frequency[self.stemmer.stem(word)]
else:
sentence_scores[sentence] += word_frequency[self.stemmer.stem(word)]
return sentence_scores
def summarize(self, text, num_sentences=5):
"""
Summarizes the text by:
- Preprocessing the text
- Calculating word frequencies
- Calculating sentence scores
- Selecting the top N sentences with the highest scores
"""
sentences = sent_tokenize(text)
processed_words = self.preprocess_text(text)
word_frequency = self.calculate_word_frequency(processed_words)
sentence_scores = self.calculate_sentence_scores(sentences, word_frequency)
# Sort sentences by score in descending order
ranked_sentences = sorted(sentence_scores, key=sentence_scores.get, reverse=True)
# Select the top N sentences
summary_sentences = ranked_sentences[:num_sentences]
# Return the summary as a string
return ' '.join(summary_sentences)
if __name__ == '__main__':
# Example Usage
text = """
Artificial intelligence (AI) is revolutionizing various industries, from healthcare to finance.
AI algorithms can analyze vast amounts of data to identify patterns and make predictions with greater accuracy than humans.
This has led to significant improvements in medical diagnoses, fraud detection, and personalized customer experiences.
However, the rise of AI also raises ethical concerns, such as job displacement and algorithmic bias.
It is crucial to address these challenges proactively to ensure that AI benefits society as a whole.
Machine learning, a subset of AI, is particularly effective at learning from data without explicit programming.
Deep learning, a further subset, uses artificial neural networks with multiple layers to extract complex features from data.
These techniques are powering breakthroughs in areas like natural language processing and computer vision.
The future of AI is uncertain, but its potential to transform our lives is undeniable.
"""
summarizer = TextSummarizer()
summary = summarizer.summarize(text, num_sentences=3)
print("Original Text:\n", text)
print("\nSummary:\n", summary)
```
👁️ Viewed: 9
Comments