-
Notifications
You must be signed in to change notification settings - Fork 0
/
checker.py
82 lines (65 loc) · 3.16 KB
/
checker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import os
import PyPDF2
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from const import position_map, BASE_FOLDER, USER_RESUME_NAME
from colorama import Fore, Style, init
# Initialize colorama
init(autoreset=True)
def extract_text_from_pdf(file_path):
"""Extract text from a PDF file."""
with open(file_path, 'rb') as file:
reader = PyPDF2.PdfReader(file)
text = ''
for page in reader.pages:
text += page.extract_text() + ' '
return text.strip().lower()
def rank_resumes(cv_folder, keywords):
"""Rank resumes based on keyword relevance."""
resumes = []
resume_names = []
# Convert keywords to lowercase
keywords_lower = [keyword.lower() for keyword in keywords]
# Read all PDF files in the CV folder
for filename in os.listdir(cv_folder):
if filename.endswith('.pdf'):
file_path = os.path.join(cv_folder, filename)
text = extract_text_from_pdf(file_path)
resumes.append(text)
resume_names.append(filename)
# Create a TF-IDF Vectorizer
vectorizer = TfidfVectorizer(vocabulary=keywords_lower)
tfidf_matrix = vectorizer.fit_transform(resumes)
# Calculate cosine similarity
similarity_matrix = cosine_similarity(tfidf_matrix)
# Rank resumes based on similarity to keywords
scores = similarity_matrix.sum(axis=1)
ranked_resumes = sorted(zip(resume_names, scores), key=lambda x: x[1], reverse=True)
return ranked_resumes
# Run the ranking
if __name__ == "__main__":
print(Fore.CYAN + "Select a position to rank resumes:")
for key, (position, _) in position_map.items():
print(Fore.YELLOW + f"{key}. {position}")
# Get user input
choice = int(input(Fore.CYAN + "Enter the number corresponding to the position: "))
if choice in position_map:
position, keywords = position_map[choice]
cv_folder = os.path.join(BASE_FOLDER, position.lower()) # Path to the folder
print(Fore.GREEN + f"Selected Position: {position}")
ranked_resumes = rank_resumes(cv_folder, keywords)
# Determine the number of resumes to display
display_count = min(5, len(ranked_resumes)) # Show top 5 or less if fewer resumes
# Output the rankings
print(Fore.MAGENTA + "Ranked Resumes:")
for rank, (resume_name, score) in enumerate(ranked_resumes[:display_count], start=1):
print(Fore.WHITE + f"Rank {rank}: {resume_name} with score {score:.2f}")
# Check for user's resume rank and score
user_rank_info = next(((rank, score) for rank, (name, score) in enumerate(ranked_resumes, start=1) if name == USER_RESUME_NAME), None)
if user_rank_info:
user_rank, user_score = user_rank_info
print(Fore.GREEN + f"Your resume '{USER_RESUME_NAME}' is ranked: {user_rank} with a score of {user_score:.2f}")
else:
print(Fore.RED + f"Your resume '{USER_RESUME_NAME}' was not found in the rankings.")
else:
print(Fore.RED + "Invalid selection. Please run the program again and select a valid option.")