AI-based Learning Assistant Python, AI

👤 Sharing: AI
```python
import nltk
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import random
import json  # For loading knowledge base from JSON file

# Download necessary NLTK data (if not already downloaded)
try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    nltk.download('punkt')
try:
    nltk.data.find('corpora/wordnet')
except LookupError:
    nltk.download('wordnet')
try:
    nltk.data.find('corpora/stopwords')
except LookupError:
    nltk.download('stopwords')

from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords

lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def LemNormalize(text):
    text = text.lower()
    tokens = nltk.word_tokenize(text)
    lemmatized_tokens = [lemmatizer.lemmatize(token) for token in tokens if token not in stop_words and token.isalnum()]
    return " ".join(lemmatized_tokens)

# Load knowledge base from JSON file
def load_knowledge_base(filepath):
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            knowledge = json.load(f)
        return knowledge
    except FileNotFoundError:
        print(f"Error: Knowledge base file not found at {filepath}")
        return {}  # Return an empty dictionary if the file is not found
    except json.JSONDecodeError:
        print(f"Error: Invalid JSON format in {filepath}")
        return {}  # Return an empty dictionary if JSON is invalid

# Sample Knowledge Base (Example - replace with your actual knowledge)
# This is now a placeholder;  the real data will come from the JSON file
knowledge_base = {}

# Function to find the best matching response
def respond(user_input, knowledge_base, vectorizer=None, tfidf_matrix=None):
    user_input = LemNormalize(user_input)

    if vectorizer is None or tfidf_matrix is None:  # Initial setup
        all_sentences = list(knowledge_base.keys()) + [user_input]
        vectorizer = TfidfVectorizer(tokenizer=nltk.word_tokenize, stop_words='english')
        tfidf_matrix = vectorizer.fit_transform(all_sentences)

    else: #Updating to account for new input. Prevents vectorizer having to re-fit all data.
        all_sentences = list(knowledge_base.keys()) + [user_input]
        new_tfidf = vectorizer.transform([user_input])
        tfidf_matrix = np.vstack((tfidf_matrix.toarray(), new_tfidf.toarray()))
        tfidf_matrix = vectorizer.fit_transform(all_sentences)  # Refitting with new sentences
        tfidf_matrix = tfidf_matrix[-len([user_input]):]



    vals = cosine_similarity(tfidf_matrix[-1], tfidf_matrix[:-1]) # Compare the last entry to all the previous.
    idx = vals.argsort()[0][-1]  #Fixed Index Error - returns the index of the second-to-last highest value.
    flat = vals.flatten()
    flat.sort()
    req_tfidf = flat[-1]

    if(req_tfidf==0):
        return "I am sorry, I don't understand.  Please be more specific, or rephrase your query."
    else:
        return knowledge_base[list(knowledge_base.keys())[idx]]


def run_learning_assistant(knowledge_filepath="knowledge_base.json"):
    """
    Runs the AI-based learning assistant.
    """
    global knowledge_base  # Access the global knowledge_base
    knowledge_base = load_knowledge_base(knowledge_filepath)

    if not knowledge_base:
        print("Knowledge base is empty. Please populate it with data.")
        return


    print("AI Learning Assistant: Hi! How can I help you learn today? (Type 'exit' to end)")


    vectorizer = None
    tfidf_matrix = None


    while True:
        user_input = input("You: ")
        if user_input.lower() == 'exit':
            print("AI Learning Assistant: Goodbye!")
            break

        response = respond(user_input, knowledge_base, vectorizer, tfidf_matrix)

        print("AI Learning Assistant:", response)


# Example usage (Make sure you have a 'knowledge_base.json' file)
# The JSON file should look like this:
# {
#     "what is python?": "Python is a high-level, general-purpose programming language.",
#     "what is machine learning?": "Machine learning is a subfield of artificial intelligence...",
#     "how do I print in python?": "You can use the print() function to display output."
# }


if __name__ == "__main__":
    run_learning_assistant()
```
👁️ Viewed: 9

Comments