AI-powered Chat Moderator Python, NLP

👤 Sharing: AI
```python
import nltk
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import joblib  # For saving and loading models

# Download necessary NLTK resources (run this once)
# nltk.download('punkt')
# nltk.download('stopwords')
# nltk.download('wordnet')

class ChatModerator:
    def __init__(self, toxicity_threshold=0.7, similarity_threshold=0.8,
                 profanity_file="profanity.txt", model_path="toxicity_model.joblib",
                 vectorizer_path="tfidf_vectorizer.joblib"):
        """
        Initializes the ChatModerator.

        Args:
            toxicity_threshold (float):  Threshold for classifying text as toxic. Higher = stricter.
            similarity_threshold (float): Threshold for detecting similar messages. Higher = stricter.
            profanity_file (str):  Path to a text file containing a list of profane words (one word per line).
            model_path (str): Path to save/load the toxicity model.
            vectorizer_path (str): Path to save/load the TF-IDF vectorizer.
        """

        self.toxicity_threshold = toxicity_threshold
        self.similarity_threshold = similarity_threshold
        self.profanity = self.load_profanity(profanity_file)
        self.message_history = [] # Store processed messages for similarity checking

        # Load or train the toxicity model and vectorizer
        try:
            self.model = joblib.load(model_path)
            self.vectorizer = joblib.load(vectorizer_path)
            print("Loaded existing toxicity model and vectorizer.")
        except FileNotFoundError:
            print("Training a new toxicity model and vectorizer...")
            self.model, self.vectorizer = self.train_toxicity_model()
            joblib.dump(self.model, model_path)  # Save the model
            joblib.dump(self.vectorizer, vectorizer_path) # Save the vectorizer
            print("Toxicity model and vectorizer trained and saved.")


    def load_profanity(self, file_path):
        """Loads a list of profane words from a file."""
        try:
            with open(file_path, 'r') as f:
                return set(line.strip().lower() for line in f)
        except FileNotFoundError:
            print(f"Warning: Profanity file not found at {file_path}.  Profanity check will be disabled.")
            return set()

    def preprocess_text(self, text):
        """
        Preprocesses the input text: lowercasing, removing punctuation, tokenization, stop word removal, and lemmatization.
        """
        text = text.lower()
        text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
        tokens = nltk.word_tokenize(text)
        stop_words = set(nltk.corpus.stopwords.words('english'))
        tokens = [w for w in tokens if not w in stop_words]

        lemmatizer = nltk.stem.WordNetLemmatizer()
        tokens = [lemmatizer.lemmatize(w) for w in tokens]

        return " ".join(tokens)

    def train_toxicity_model(self, training_data="toxic_comments.csv", max_features=5000):
        """
        Trains a simple toxicity detection model using TF-IDF and Logistic Regression.

        Args:
            training_data (str): Path to a CSV file containing training data with 'comment_text' and 'toxic' columns.
            max_features (int): Maximum number of features to use for TF-IDF.

        Returns:
            tuple: A tuple containing the trained model and the TF-IDF vectorizer.
        """
        import pandas as pd
        from sklearn.model_selection import train_test_split
        from sklearn.linear_model import LogisticRegression
        from sklearn.metrics import accuracy_score, classification_report

        try:
            df = pd.read_csv(training_data)
        except FileNotFoundError:
            print(f"Error: Training data file not found at {training_data}. Returning dummy model.")
            # Return a dummy model and vectorizer if training data is missing
            return LogisticRegression(), TfidfVectorizer()


        # Simple adaptation for a CSV where toxicity is a separate column.
        if 'toxic' in df.columns:
            X = df['comment_text']
            y = df['toxic']
        else:
            # Assume columns toxic1, toxic2, toxic3 etc. and combine them
             toxicity_cols = [col for col in df.columns if 'toxic' in col.lower()]
             if not toxicity_cols:
                 print("Error:  No 'toxic' column found in the dataframe.  Returning dummy model")
                 return LogisticRegression(), TfidfVectorizer()

             y = df[toxicity_cols].any(axis=1).astype(int)
             X = df['comment_text']



        X = X.fillna('') # Handle missing text data

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        vectorizer = TfidfVectorizer(max_features=max_features)
        X_train_vectors = vectorizer.fit_transform(X_train)
        X_test_vectors = vectorizer.transform(X_test)

        model = LogisticRegression(solver='liblinear', random_state=42)
        model.fit(X_train_vectors, y_train)

        y_pred = model.predict(X_test_vectors)
        print("Toxicity Model Training Report:")
        print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
        print(classification_report(y_test, y_pred))

        return model, vectorizer

    def is_toxic(self, text):
        """
        Detects if the text is toxic using the trained model.

        Args:
            text (str): The text to analyze.

        Returns:
            bool: True if the text is considered toxic, False otherwise.
        """

        # Handle empty input gracefully
        if not text:
            return False

        text = self.preprocess_text(text)
        text_vector = self.vectorizer.transform([text])
        probability = self.model.predict_proba(text_vector)[0][1]  # Probability of being toxic

        return probability > self.toxicity_threshold

    def contains_profanity(self, text):
        """Checks if the text contains any profane words."""
        text = text.lower()
        words = text.split()
        return any(word in self.profanity for word in words)

    def is_repetitive(self, text):
        """
        Checks if the text is too similar to previous messages.

        Args:
            text (str): The text to analyze.

        Returns:
            bool: True if the text is similar to a previous message, False otherwise.
        """
        if not self.message_history:
            return False

        text = self.preprocess_text(text)
        vector = self.vectorizer.transform([text])

        similarities = cosine_similarity(vector, self.vectorizer.transform(self.message_history))
        max_similarity = similarities.max()

        return max_similarity > self.similarity_threshold

    def moderate(self, text):
        """
        Moderates the given text based on toxicity, profanity, and repetitiveness.

        Args:
            text (str): The text to moderate.

        Returns:
            str:  "Approved" if the text passes moderation, or a message explaining why it was rejected.
        """

        if self.is_toxic(text):
            return "Rejected: Message flagged as potentially toxic."

        if self.contains_profanity(text):
            return "Rejected: Message contains profanity."

        if self.is_repetitive(text):
            return "Rejected: Message is too similar to previous messages."

        processed_text = self.preprocess_text(text)
        self.message_history.append(processed_text)  # Store for future similarity checks
        return "Approved"

# Example Usage:
if __name__ == '__main__':
    moderator = ChatModerator(toxicity_threshold=0.6, similarity_threshold=0.7)

    # Example messages
    message1 = "This is a friendly message."
    message2 = "This is a very similar message."
    message3 = "This is a horrible message, I hate you all!"
    message4 = "This is a f***ing terrible message." #Profanity test
    message5 = "This is a slightly changed version of a previous message."

    print(f"Message 1: {message1} - {moderator.moderate(message1)}")
    print(f"Message 2: {message2} - {moderator.moderate(message2)}")
    print(f"Message 3: {message3} - {moderator.moderate(message3)}")
    print(f"Message 4: {message4} - {moderator.moderate(message4)}")
    print(f"Message 5: {message5} - {moderator.moderate(message5)}")  # Might be rejected due to similarity to message2


#  To run this code, you'll need:
#  1.  Install the required libraries: `pip install nltk scikit-learn pandas joblib`
#  2.  Create a file named `profanity.txt` with a list of profane words, one word per line.
#  3.  Optionally, create a `toxic_comments.csv` file for training the toxicity model. If this file is missing,
#      the code will still run but will use a dummy, untrained model, meaning the toxicity detection will be poor.  If you don't want to provide training data, you might remove the calls to `is_toxic`.
#  4.  Run the Python script. The first time, it will train and save the model.  Subsequent runs will load the saved model.
```
👁️ Viewed: 13

Comments