AI-powered Resume Ranking Tool Python

👤 Sharing: AI
```python
import nltk
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

class ResumeRanker:
    def __init__(self, job_description, resumes):
        """
        Initializes the ResumeRanker with the job description and a list of resumes.

        Args:
            job_description (str): The job description text.
            resumes (list): A list of resume texts.
        """
        self.job_description = self.preprocess_text(job_description)
        self.resumes = [self.preprocess_text(resume) for resume in resumes]
        self.vectorizer = TfidfVectorizer()

    def preprocess_text(self, text):
        """
        Preprocesses the input text by:
            - Converting to lowercase
            - Removing punctuation
            - Tokenizing
            - Removing stop words
            - Lemmatizing

        Args:
            text (str): The input text to preprocess.

        Returns:
            str: The preprocessed text.
        """
        try:
            from nltk.corpus import stopwords
            from nltk.stem import WordNetLemmatizer
            from nltk.tokenize import word_tokenize

            text = text.lower()
            text = re.sub(r'[^\w\s]', '', text)  # Remove punctuation
            tokens = word_tokenize(text)
            stop_words = set(stopwords.words('english'))
            tokens = [w for w in tokens if not w in stop_words]
            lemmatizer = WordNetLemmatizer()
            tokens = [lemmatizer.lemmatize(w) for w in tokens]
            return ' '.join(tokens)
        except LookupError as e:
            print(f"Error during preprocessing (likely missing NLTK data): {e}. Please download required NLTK data.  Try running nltk.download('all') in a Python terminal.")
            return text # Return original text if NLTK processing fails.

    def calculate_similarity(self):
        """
        Calculates the cosine similarity between the job description and each resume.

        Returns:
            list: A list of similarity scores, one for each resume.
        """
        try:
            documents = [self.job_description] + self.resumes
            tfidf_matrix = self.vectorizer.fit_transform(documents)
            cosine_similarities = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:])
            return cosine_similarities[0].tolist()
        except Exception as e:
            print(f"Error during similarity calculation: {e}")
            return [0.0] * len(self.resumes)  # Return zero similarity scores if calculation fails


    def rank_resumes(self):
        """
        Ranks the resumes based on their similarity to the job description.

        Returns:
            list: A list of tuples, where each tuple contains the resume index and its similarity score,
                  sorted in descending order of similarity.
        """
        similarity_scores = self.calculate_similarity()
        ranked_resumes = sorted(enumerate(similarity_scores), key=lambda x: x[1], reverse=True)
        return ranked_resumes

    def get_ranked_resume_texts(self):
        """
        Returns the resumes in ranked order.

        Returns:
            list: A list of resume texts, sorted in descending order of similarity to the job description.
        """
        ranked_resumes = self.rank_resumes()
        return [self.resumes[i] for i, score in ranked_resumes]

if __name__ == '__main__':
    # Example usage:
    job_description = """
    Software Engineer

    We are looking for a passionate software engineer to design, develop, and install software solutions.
    Software Engineer responsibilities include gathering user requirements, defining system functionality, and writing code in various languages.
    Our ideal candidates are familiar with the software development life cycle (SDLC) from preliminary system analysis to testing and deployment.
    """

    resumes = [
        """
        John Doe
        Software Engineer

        Experience in Python, Java, and C++.  Developed web applications and worked on the SDLC.
        Familiar with user requirements and system analysis.
        """,
        """
        Jane Smith
        Data Scientist

        Experience in data analysis and machine learning.  Proficient in Python and R.
        Worked on various data science projects.
        """,
        """
        Peter Jones
        Web Developer

        Experience in HTML, CSS, and JavaScript.  Developed responsive websites.
        """
    ]

    ranker = ResumeRanker(job_description, resumes)
    ranked_resumes = ranker.rank_resumes()

    print("Ranked Resumes:")
    for i, (resume_index, score) in enumerate(ranked_resumes):
        print(f"Rank {i+1}: Resume {resume_index+1} - Similarity Score: {score:.4f}")
        print(f"Content: {resumes[resume_index][:100]}...\n")  #Print first 100 chars of resume.

    #Example getting ranked resume texts
    ranked_resume_texts = ranker.get_ranked_resume_texts()
    print("\nRanked Resume Texts (First 50 chars):")
    for i, text in enumerate(ranked_resume_texts):
        print(f"Rank {i+1}: {text[:50]}...")
```
👁️ Viewed: 8

Comments