AI-powered Chat Moderator Python, NLP
👤 Sharing: AI
```python
import nltk
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import joblib # For saving and loading models
# Download necessary NLTK resources (run this once)
# nltk.download('punkt')
# nltk.download('stopwords')
# nltk.download('wordnet')
class ChatModerator:
def __init__(self, toxicity_threshold=0.7, similarity_threshold=0.8,
profanity_file="profanity.txt", model_path="toxicity_model.joblib",
vectorizer_path="tfidf_vectorizer.joblib"):
"""
Initializes the ChatModerator.
Args:
toxicity_threshold (float): Threshold for classifying text as toxic. Higher = stricter.
similarity_threshold (float): Threshold for detecting similar messages. Higher = stricter.
profanity_file (str): Path to a text file containing a list of profane words (one word per line).
model_path (str): Path to save/load the toxicity model.
vectorizer_path (str): Path to save/load the TF-IDF vectorizer.
"""
self.toxicity_threshold = toxicity_threshold
self.similarity_threshold = similarity_threshold
self.profanity = self.load_profanity(profanity_file)
self.message_history = [] # Store processed messages for similarity checking
# Load or train the toxicity model and vectorizer
try:
self.model = joblib.load(model_path)
self.vectorizer = joblib.load(vectorizer_path)
print("Loaded existing toxicity model and vectorizer.")
except FileNotFoundError:
print("Training a new toxicity model and vectorizer...")
self.model, self.vectorizer = self.train_toxicity_model()
joblib.dump(self.model, model_path) # Save the model
joblib.dump(self.vectorizer, vectorizer_path) # Save the vectorizer
print("Toxicity model and vectorizer trained and saved.")
def load_profanity(self, file_path):
"""Loads a list of profane words from a file."""
try:
with open(file_path, 'r') as f:
return set(line.strip().lower() for line in f)
except FileNotFoundError:
print(f"Warning: Profanity file not found at {file_path}. Profanity check will be disabled.")
return set()
def preprocess_text(self, text):
"""
Preprocesses the input text: lowercasing, removing punctuation, tokenization, stop word removal, and lemmatization.
"""
text = text.lower()
text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
tokens = nltk.word_tokenize(text)
stop_words = set(nltk.corpus.stopwords.words('english'))
tokens = [w for w in tokens if not w in stop_words]
lemmatizer = nltk.stem.WordNetLemmatizer()
tokens = [lemmatizer.lemmatize(w) for w in tokens]
return " ".join(tokens)
def train_toxicity_model(self, training_data="toxic_comments.csv", max_features=5000):
"""
Trains a simple toxicity detection model using TF-IDF and Logistic Regression.
Args:
training_data (str): Path to a CSV file containing training data with 'comment_text' and 'toxic' columns.
max_features (int): Maximum number of features to use for TF-IDF.
Returns:
tuple: A tuple containing the trained model and the TF-IDF vectorizer.
"""
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
try:
df = pd.read_csv(training_data)
except FileNotFoundError:
print(f"Error: Training data file not found at {training_data}. Returning dummy model.")
# Return a dummy model and vectorizer if training data is missing
return LogisticRegression(), TfidfVectorizer()
# Simple adaptation for a CSV where toxicity is a separate column.
if 'toxic' in df.columns:
X = df['comment_text']
y = df['toxic']
else:
# Assume columns toxic1, toxic2, toxic3 etc. and combine them
toxicity_cols = [col for col in df.columns if 'toxic' in col.lower()]
if not toxicity_cols:
print("Error: No 'toxic' column found in the dataframe. Returning dummy model")
return LogisticRegression(), TfidfVectorizer()
y = df[toxicity_cols].any(axis=1).astype(int)
X = df['comment_text']
X = X.fillna('') # Handle missing text data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
vectorizer = TfidfVectorizer(max_features=max_features)
X_train_vectors = vectorizer.fit_transform(X_train)
X_test_vectors = vectorizer.transform(X_test)
model = LogisticRegression(solver='liblinear', random_state=42)
model.fit(X_train_vectors, y_train)
y_pred = model.predict(X_test_vectors)
print("Toxicity Model Training Report:")
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print(classification_report(y_test, y_pred))
return model, vectorizer
def is_toxic(self, text):
"""
Detects if the text is toxic using the trained model.
Args:
text (str): The text to analyze.
Returns:
bool: True if the text is considered toxic, False otherwise.
"""
# Handle empty input gracefully
if not text:
return False
text = self.preprocess_text(text)
text_vector = self.vectorizer.transform([text])
probability = self.model.predict_proba(text_vector)[0][1] # Probability of being toxic
return probability > self.toxicity_threshold
def contains_profanity(self, text):
"""Checks if the text contains any profane words."""
text = text.lower()
words = text.split()
return any(word in self.profanity for word in words)
def is_repetitive(self, text):
"""
Checks if the text is too similar to previous messages.
Args:
text (str): The text to analyze.
Returns:
bool: True if the text is similar to a previous message, False otherwise.
"""
if not self.message_history:
return False
text = self.preprocess_text(text)
vector = self.vectorizer.transform([text])
similarities = cosine_similarity(vector, self.vectorizer.transform(self.message_history))
max_similarity = similarities.max()
return max_similarity > self.similarity_threshold
def moderate(self, text):
"""
Moderates the given text based on toxicity, profanity, and repetitiveness.
Args:
text (str): The text to moderate.
Returns:
str: "Approved" if the text passes moderation, or a message explaining why it was rejected.
"""
if self.is_toxic(text):
return "Rejected: Message flagged as potentially toxic."
if self.contains_profanity(text):
return "Rejected: Message contains profanity."
if self.is_repetitive(text):
return "Rejected: Message is too similar to previous messages."
processed_text = self.preprocess_text(text)
self.message_history.append(processed_text) # Store for future similarity checks
return "Approved"
# Example Usage:
if __name__ == '__main__':
moderator = ChatModerator(toxicity_threshold=0.6, similarity_threshold=0.7)
# Example messages
message1 = "This is a friendly message."
message2 = "This is a very similar message."
message3 = "This is a horrible message, I hate you all!"
message4 = "This is a f***ing terrible message." #Profanity test
message5 = "This is a slightly changed version of a previous message."
print(f"Message 1: {message1} - {moderator.moderate(message1)}")
print(f"Message 2: {message2} - {moderator.moderate(message2)}")
print(f"Message 3: {message3} - {moderator.moderate(message3)}")
print(f"Message 4: {message4} - {moderator.moderate(message4)}")
print(f"Message 5: {message5} - {moderator.moderate(message5)}") # Might be rejected due to similarity to message2
# To run this code, you'll need:
# 1. Install the required libraries: `pip install nltk scikit-learn pandas joblib`
# 2. Create a file named `profanity.txt` with a list of profane words, one word per line.
# 3. Optionally, create a `toxic_comments.csv` file for training the toxicity model. If this file is missing,
# the code will still run but will use a dummy, untrained model, meaning the toxicity detection will be poor. If you don't want to provide training data, you might remove the calls to `is_toxic`.
# 4. Run the Python script. The first time, it will train and save the model. Subsequent runs will load the saved model.
```
👁️ Viewed: 13
Comments