AI-powered Resume Ranking Tool Python
👤 Sharing: AI
```python
import nltk
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
class ResumeRanker:
def __init__(self, job_description, resumes):
"""
Initializes the ResumeRanker with the job description and a list of resumes.
Args:
job_description (str): The job description text.
resumes (list): A list of resume texts.
"""
self.job_description = self.preprocess_text(job_description)
self.resumes = [self.preprocess_text(resume) for resume in resumes]
self.vectorizer = TfidfVectorizer()
def preprocess_text(self, text):
"""
Preprocesses the input text by:
- Converting to lowercase
- Removing punctuation
- Tokenizing
- Removing stop words
- Lemmatizing
Args:
text (str): The input text to preprocess.
Returns:
str: The preprocessed text.
"""
try:
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
text = text.lower()
text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
tokens = word_tokenize(text)
stop_words = set(stopwords.words('english'))
tokens = [w for w in tokens if not w in stop_words]
lemmatizer = WordNetLemmatizer()
tokens = [lemmatizer.lemmatize(w) for w in tokens]
return ' '.join(tokens)
except LookupError as e:
print(f"Error during preprocessing (likely missing NLTK data): {e}. Please download required NLTK data. Try running nltk.download('all') in a Python terminal.")
return text # Return original text if NLTK processing fails.
def calculate_similarity(self):
"""
Calculates the cosine similarity between the job description and each resume.
Returns:
list: A list of similarity scores, one for each resume.
"""
try:
documents = [self.job_description] + self.resumes
tfidf_matrix = self.vectorizer.fit_transform(documents)
cosine_similarities = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:])
return cosine_similarities[0].tolist()
except Exception as e:
print(f"Error during similarity calculation: {e}")
return [0.0] * len(self.resumes) # Return zero similarity scores if calculation fails
def rank_resumes(self):
"""
Ranks the resumes based on their similarity to the job description.
Returns:
list: A list of tuples, where each tuple contains the resume index and its similarity score,
sorted in descending order of similarity.
"""
similarity_scores = self.calculate_similarity()
ranked_resumes = sorted(enumerate(similarity_scores), key=lambda x: x[1], reverse=True)
return ranked_resumes
def get_ranked_resume_texts(self):
"""
Returns the resumes in ranked order.
Returns:
list: A list of resume texts, sorted in descending order of similarity to the job description.
"""
ranked_resumes = self.rank_resumes()
return [self.resumes[i] for i, score in ranked_resumes]
if __name__ == '__main__':
# Example usage:
job_description = """
Software Engineer
We are looking for a passionate software engineer to design, develop, and install software solutions.
Software Engineer responsibilities include gathering user requirements, defining system functionality, and writing code in various languages.
Our ideal candidates are familiar with the software development life cycle (SDLC) from preliminary system analysis to testing and deployment.
"""
resumes = [
"""
John Doe
Software Engineer
Experience in Python, Java, and C++. Developed web applications and worked on the SDLC.
Familiar with user requirements and system analysis.
""",
"""
Jane Smith
Data Scientist
Experience in data analysis and machine learning. Proficient in Python and R.
Worked on various data science projects.
""",
"""
Peter Jones
Web Developer
Experience in HTML, CSS, and JavaScript. Developed responsive websites.
"""
]
ranker = ResumeRanker(job_description, resumes)
ranked_resumes = ranker.rank_resumes()
print("Ranked Resumes:")
for i, (resume_index, score) in enumerate(ranked_resumes):
print(f"Rank {i+1}: Resume {resume_index+1} - Similarity Score: {score:.4f}")
print(f"Content: {resumes[resume_index][:100]}...\n") #Print first 100 chars of resume.
#Example getting ranked resume texts
ranked_resume_texts = ranker.get_ranked_resume_texts()
print("\nRanked Resume Texts (First 50 chars):")
for i, text in enumerate(ranked_resume_texts):
print(f"Rank {i+1}: {text[:50]}...")
```
👁️ Viewed: 8
Comments