AI-based Job Matching Tool Python, NLP

👤 Sharing: AI
```python
import nltk
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import string

# Download required NLTK resources (run this once)
try:
    nltk.data.find('corpora/wordnet')
except LookupError:
    nltk.download('wordnet')

try:
    nltk.data.find('corpora/stopwords')
except LookupError:
    nltk.download('stopwords')

try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    nltk.download('punkt')


def preprocess_text(text):
    """
    Preprocesses text by tokenizing, removing stop words and punctuation, and lemmatizing.

    Args:
        text: The input text string.

    Returns:
        A string of preprocessed text.
    """
    text = text.lower()
    tokenizer = nltk.word_tokenize  # More robust than string.split()
    tokens = tokenizer(text)

    stop_words = set(stopwords.words('english'))
    punctuation = set(string.punctuation)
    lemmatizer = WordNetLemmatizer()

    tokens = [lemmatizer.lemmatize(w) for w in tokens if w not in stop_words and w not in punctuation and w.isalnum()]

    return " ".join(tokens)


def calculate_similarity(job_description, resume_text):
    """
    Calculates the cosine similarity between a job description and a resume.

    Args:
        job_description: The text of the job description.
        resume_text: The text of the resume.

    Returns:
        The cosine similarity score (float) between the job description and resume.
    """

    job_description = preprocess_text(job_description)
    resume_text = preprocess_text(resume_text)

    vectorizer = TfidfVectorizer()
    vectors = vectorizer.fit_transform([job_description, resume_text])

    similarity_matrix = cosine_similarity(vectors)
    return similarity_matrix[0][1]



def match_jobs(job_descriptions, resume_text, top_n=5):
    """
    Matches a resume to a list of job descriptions based on cosine similarity.

    Args:
        job_descriptions: A list of job description texts.
        resume_text: The text of the resume.
        top_n: The number of top matches to return.

    Returns:
        A list of tuples, where each tuple contains the index of the job description
        and the corresponding similarity score, sorted by similarity in descending order.
    """

    similarities = []
    for i, job_description in enumerate(job_descriptions):
        similarity = calculate_similarity(job_description, resume_text)
        similarities.append((i, similarity))

    # Sort by similarity in descending order
    sorted_matches = sorted(similarities, key=lambda x: x[1], reverse=True)

    return sorted_matches[:top_n]


if __name__ == '__main__':
    # Example usage
    job_descriptions = [
        "We are looking for a Python developer with experience in NLP.  Experience with TensorFlow or PyTorch is a plus.  Must have experience in designing, developing, and deploying machine learning models.  Strong communication skills required.",
        "Seeking a data scientist to analyze large datasets.  Proficiency in SQL and statistical modeling is required.  Experience with Python is a plus.",
        "Software engineer needed to build web applications.  Experience with JavaScript, React, and Node.js is required.  Knowledge of Python is a plus.",
        "Machine learning engineer to develop and deploy AI models. Experience with Python, TensorFlow, and cloud platforms is required.",
        "Full-stack developer with experience in Python and JavaScript.  Experience with Django and React is preferred."
    ]

    resume_text = """
    I am a highly motivated data scientist with 5+ years of experience in machine learning and natural language processing.
    I have extensive experience in Python, TensorFlow, and PyTorch.  I have built and deployed several machine learning models in production.
    I also have experience with SQL and data analysis. I have strong communication and problem-solving skills. My key skills include Python, NLP, Machine Learning, Deep Learning, TensorFlow, PyTorch, SQL, Data Analysis, Communication.
    """

    top_matches = match_jobs(job_descriptions, resume_text)

    print("Top Job Matches:")
    for job_index, similarity_score in top_matches:
        print(f"Job {job_index + 1}: Similarity = {similarity_score:.4f}")
        print(f"Job Description: {job_descriptions[job_index]}")
        print("---")
```
👁️ Viewed: 8

Comments