AI-based Job Matching Tool Python, NLP
👤 Sharing: AI
```python
import nltk
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import string
# Download required NLTK resources (run this once)
try:
nltk.data.find('corpora/wordnet')
except LookupError:
nltk.download('wordnet')
try:
nltk.data.find('corpora/stopwords')
except LookupError:
nltk.download('stopwords')
try:
nltk.data.find('tokenizers/punkt')
except LookupError:
nltk.download('punkt')
def preprocess_text(text):
"""
Preprocesses text by tokenizing, removing stop words and punctuation, and lemmatizing.
Args:
text: The input text string.
Returns:
A string of preprocessed text.
"""
text = text.lower()
tokenizer = nltk.word_tokenize # More robust than string.split()
tokens = tokenizer(text)
stop_words = set(stopwords.words('english'))
punctuation = set(string.punctuation)
lemmatizer = WordNetLemmatizer()
tokens = [lemmatizer.lemmatize(w) for w in tokens if w not in stop_words and w not in punctuation and w.isalnum()]
return " ".join(tokens)
def calculate_similarity(job_description, resume_text):
"""
Calculates the cosine similarity between a job description and a resume.
Args:
job_description: The text of the job description.
resume_text: The text of the resume.
Returns:
The cosine similarity score (float) between the job description and resume.
"""
job_description = preprocess_text(job_description)
resume_text = preprocess_text(resume_text)
vectorizer = TfidfVectorizer()
vectors = vectorizer.fit_transform([job_description, resume_text])
similarity_matrix = cosine_similarity(vectors)
return similarity_matrix[0][1]
def match_jobs(job_descriptions, resume_text, top_n=5):
"""
Matches a resume to a list of job descriptions based on cosine similarity.
Args:
job_descriptions: A list of job description texts.
resume_text: The text of the resume.
top_n: The number of top matches to return.
Returns:
A list of tuples, where each tuple contains the index of the job description
and the corresponding similarity score, sorted by similarity in descending order.
"""
similarities = []
for i, job_description in enumerate(job_descriptions):
similarity = calculate_similarity(job_description, resume_text)
similarities.append((i, similarity))
# Sort by similarity in descending order
sorted_matches = sorted(similarities, key=lambda x: x[1], reverse=True)
return sorted_matches[:top_n]
if __name__ == '__main__':
# Example usage
job_descriptions = [
"We are looking for a Python developer with experience in NLP. Experience with TensorFlow or PyTorch is a plus. Must have experience in designing, developing, and deploying machine learning models. Strong communication skills required.",
"Seeking a data scientist to analyze large datasets. Proficiency in SQL and statistical modeling is required. Experience with Python is a plus.",
"Software engineer needed to build web applications. Experience with JavaScript, React, and Node.js is required. Knowledge of Python is a plus.",
"Machine learning engineer to develop and deploy AI models. Experience with Python, TensorFlow, and cloud platforms is required.",
"Full-stack developer with experience in Python and JavaScript. Experience with Django and React is preferred."
]
resume_text = """
I am a highly motivated data scientist with 5+ years of experience in machine learning and natural language processing.
I have extensive experience in Python, TensorFlow, and PyTorch. I have built and deployed several machine learning models in production.
I also have experience with SQL and data analysis. I have strong communication and problem-solving skills. My key skills include Python, NLP, Machine Learning, Deep Learning, TensorFlow, PyTorch, SQL, Data Analysis, Communication.
"""
top_matches = match_jobs(job_descriptions, resume_text)
print("Top Job Matches:")
for job_index, similarity_score in top_matches:
print(f"Job {job_index + 1}: Similarity = {similarity_score:.4f}")
print(f"Job Description: {job_descriptions[job_index]}")
print("---")
```
👁️ Viewed: 8
Comments