AI-powered Resume Optimization Python, NLP

👤 Sharing: AI
```python
import nltk
import re
from collections import Counter

# Download necessary NLTK resources (if not already downloaded)
try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    nltk.download('punkt')
try:
    nltk.data.find('corpora/stopwords')
except LookupError:
    nltk.download('stopwords')
try:
    nltk.data.find('corpora/wordnet')
except LookupError:
    nltk.download('wordnet')



from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize


class ResumeOptimizer:
    """
    A class for optimizing resumes using NLP techniques.
    """

    def __init__(self, resume_text):
        """
        Initializes the ResumeOptimizer with the resume text.

        Args:
            resume_text (str): The text content of the resume.
        """
        self.resume_text = resume_text
        self.stop_words = set(stopwords.words('english'))
        self.lemmatizer = WordNetLemmatizer()

    def clean_text(self):
        """
        Cleans the resume text by removing special characters,
        converting to lowercase, and removing extra whitespaces.

        Returns:
            str: The cleaned resume text.
        """
        text = re.sub(r'[^a-zA-Z\s]', '', self.resume_text)  # Remove special characters
        text = text.lower()  # Convert to lowercase
        text = ' '.join(text.split())  # Remove extra whitespaces
        return text

    def tokenize_text(self, text):
        """
        Tokenizes the cleaned resume text into individual words.

        Args:
            text (str): The cleaned resume text.

        Returns:
            list: A list of words (tokens).
        """
        return word_tokenize(text)

    def remove_stopwords(self, tokens):
        """
        Removes common English stop words from the list of tokens.

        Args:
            tokens (list): A list of words (tokens).

        Returns:
            list: A list of tokens with stop words removed.
        """
        return [token for token in tokens if token not in self.stop_words]

    def lemmatize_tokens(self, tokens):
        """
        Lemmatizes the tokens to reduce words to their base form.

        Args:
            tokens (list): A list of words (tokens).

        Returns:
            list: A list of lemmatized tokens.
        """
        return [self.lemmatizer.lemmatize(token) for token in tokens]

    def analyze_word_frequency(self, tokens):
        """
        Analyzes the frequency of words in the processed resume text.

        Args:
            tokens (list): A list of processed tokens.

        Returns:
            collections.Counter: A Counter object containing word frequencies.
        """
        return Counter(tokens)

    def extract_skills(self, skills_list):
        """
        Extracts relevant skills from the resume text based on a given list of skills.

        Args:
            skills_list (list): A list of skills to look for in the resume.

        Returns:
            list: A list of skills found in the resume.
        """
        cleaned_resume = self.clean_text()
        skills_found = [skill for skill in skills_list if skill.lower() in cleaned_resume]
        return skills_found

    def optimize(self, skills_list):
        """
        Performs the complete resume optimization process.

        Args:
            skills_list (list): A list of desired skills.

        Returns:
            tuple: A tuple containing:
                - cleaned_resume (str): The cleaned resume text.
                - word_frequency (collections.Counter): Word frequency analysis.
                - extracted_skills (list): Extracted skills from the resume.
        """
        cleaned_resume = self.clean_text()
        tokens = self.tokenize_text(cleaned_resume)
        tokens = self.remove_stopwords(tokens)
        tokens = self.lemmatize_tokens(tokens)
        word_frequency = self.analyze_word_frequency(tokens)
        extracted_skills = self.extract_skills(skills_list)

        return cleaned_resume, word_frequency, extracted_skills


if __name__ == '__main__':
    # Example usage:
    resume_text = """
    John Doe
    Software Engineer
    john.doe@email.com | (123) 456-7890

    Summary
    A highly motivated and skilled software engineer with 5+ years of experience in developing and implementing software solutions.
    Proficient in Python, Java, and C++.  Experienced in using machine learning techniques and natural language processing (NLP).
    Passionate about creating innovative and efficient software applications.

    Skills
    - Python
    - Java
    - C++
    - Machine Learning
    - Natural Language Processing (NLP)
    - Data Analysis
    - SQL
    - Git
    - Agile Development

    Experience
    Software Engineer, ABC Company (2018-Present)
    - Developed and maintained software applications using Python and Java.
    - Implemented machine learning algorithms for data analysis.
    - Collaborated with cross-functional teams to deliver high-quality software products.

    Education
    Bachelor of Science in Computer Science, XYZ University (2014-2018)

    Projects
    - Developed a machine learning model for image classification using Python and TensorFlow.
    - Created a web application using Java and Spring Boot.
    """

    desired_skills = ["Python", "Machine Learning", "Data Analysis", "Cloud Computing", "Project Management"]

    optimizer = ResumeOptimizer(resume_text)
    cleaned_resume, word_frequency, extracted_skills = optimizer.optimize(desired_skills)

    print("Cleaned Resume:\n", cleaned_resume)
    print("\nWord Frequency:\n", word_frequency.most_common(10))  # Print top 10 most frequent words
    print("\nExtracted Skills:\n", extracted_skills)
```
👁️ Viewed: 9

Comments