AI-powered Resume Optimization Python, NLP
👤 Sharing: AI
```python
import nltk
import re
from collections import Counter
# Download necessary NLTK resources (if not already downloaded)
try:
nltk.data.find('tokenizers/punkt')
except LookupError:
nltk.download('punkt')
try:
nltk.data.find('corpora/stopwords')
except LookupError:
nltk.download('stopwords')
try:
nltk.data.find('corpora/wordnet')
except LookupError:
nltk.download('wordnet')
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
class ResumeOptimizer:
"""
A class for optimizing resumes using NLP techniques.
"""
def __init__(self, resume_text):
"""
Initializes the ResumeOptimizer with the resume text.
Args:
resume_text (str): The text content of the resume.
"""
self.resume_text = resume_text
self.stop_words = set(stopwords.words('english'))
self.lemmatizer = WordNetLemmatizer()
def clean_text(self):
"""
Cleans the resume text by removing special characters,
converting to lowercase, and removing extra whitespaces.
Returns:
str: The cleaned resume text.
"""
text = re.sub(r'[^a-zA-Z\s]', '', self.resume_text) # Remove special characters
text = text.lower() # Convert to lowercase
text = ' '.join(text.split()) # Remove extra whitespaces
return text
def tokenize_text(self, text):
"""
Tokenizes the cleaned resume text into individual words.
Args:
text (str): The cleaned resume text.
Returns:
list: A list of words (tokens).
"""
return word_tokenize(text)
def remove_stopwords(self, tokens):
"""
Removes common English stop words from the list of tokens.
Args:
tokens (list): A list of words (tokens).
Returns:
list: A list of tokens with stop words removed.
"""
return [token for token in tokens if token not in self.stop_words]
def lemmatize_tokens(self, tokens):
"""
Lemmatizes the tokens to reduce words to their base form.
Args:
tokens (list): A list of words (tokens).
Returns:
list: A list of lemmatized tokens.
"""
return [self.lemmatizer.lemmatize(token) for token in tokens]
def analyze_word_frequency(self, tokens):
"""
Analyzes the frequency of words in the processed resume text.
Args:
tokens (list): A list of processed tokens.
Returns:
collections.Counter: A Counter object containing word frequencies.
"""
return Counter(tokens)
def extract_skills(self, skills_list):
"""
Extracts relevant skills from the resume text based on a given list of skills.
Args:
skills_list (list): A list of skills to look for in the resume.
Returns:
list: A list of skills found in the resume.
"""
cleaned_resume = self.clean_text()
skills_found = [skill for skill in skills_list if skill.lower() in cleaned_resume]
return skills_found
def optimize(self, skills_list):
"""
Performs the complete resume optimization process.
Args:
skills_list (list): A list of desired skills.
Returns:
tuple: A tuple containing:
- cleaned_resume (str): The cleaned resume text.
- word_frequency (collections.Counter): Word frequency analysis.
- extracted_skills (list): Extracted skills from the resume.
"""
cleaned_resume = self.clean_text()
tokens = self.tokenize_text(cleaned_resume)
tokens = self.remove_stopwords(tokens)
tokens = self.lemmatize_tokens(tokens)
word_frequency = self.analyze_word_frequency(tokens)
extracted_skills = self.extract_skills(skills_list)
return cleaned_resume, word_frequency, extracted_skills
if __name__ == '__main__':
# Example usage:
resume_text = """
John Doe
Software Engineer
john.doe@email.com | (123) 456-7890
Summary
A highly motivated and skilled software engineer with 5+ years of experience in developing and implementing software solutions.
Proficient in Python, Java, and C++. Experienced in using machine learning techniques and natural language processing (NLP).
Passionate about creating innovative and efficient software applications.
Skills
- Python
- Java
- C++
- Machine Learning
- Natural Language Processing (NLP)
- Data Analysis
- SQL
- Git
- Agile Development
Experience
Software Engineer, ABC Company (2018-Present)
- Developed and maintained software applications using Python and Java.
- Implemented machine learning algorithms for data analysis.
- Collaborated with cross-functional teams to deliver high-quality software products.
Education
Bachelor of Science in Computer Science, XYZ University (2014-2018)
Projects
- Developed a machine learning model for image classification using Python and TensorFlow.
- Created a web application using Java and Spring Boot.
"""
desired_skills = ["Python", "Machine Learning", "Data Analysis", "Cloud Computing", "Project Management"]
optimizer = ResumeOptimizer(resume_text)
cleaned_resume, word_frequency, extracted_skills = optimizer.optimize(desired_skills)
print("Cleaned Resume:\n", cleaned_resume)
print("\nWord Frequency:\n", word_frequency.most_common(10)) # Print top 10 most frequent words
print("\nExtracted Skills:\n", extracted_skills)
```
👁️ Viewed: 9
Comments