Skip to content

Commit

Permalink
simplify resume improver and fix langchain issue
Browse files Browse the repository at this point in the history
  • Loading branch information
takline committed Oct 7, 2024
1 parent ae83eae commit 454ae9d
Show file tree
Hide file tree
Showing 9 changed files with 178 additions and 180 deletions.
21 changes: 14 additions & 7 deletions models/job_post.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import List, Optional
from ..prompts.prompts import Prompts
from ..services.extractor import ExtractorLLM
from .. import config
from .. import services

Prompts.initialize()


class JobDescription(BaseModel):
"""Description of a job posting."""

Expand Down Expand Up @@ -39,20 +41,25 @@ class JobDescription(BaseModel):
None, description=Prompts.descriptions["JOB_DESCRIPTION"]["technical_skills"]
)
non_technical_skills: Optional[List[str]] = Field(
None, description=Prompts.descriptions["JOB_DESCRIPTION"]["non_technical_skills"]
None,
description=Prompts.descriptions["JOB_DESCRIPTION"]["non_technical_skills"],
)


class JobPost(ExtractorLLM):
class JobPost:
def __init__(self, posting: str):
"""Initialize JobPost with the job posting string."""
super().__init__()
self.posting = posting
self.extractor_llm = services.langchain_helpers.create_llm(
chat_model=config.CHAT_MODEL,
model_name=config.MODEL_NAME,
temperature=config.TEMPERATURE,
cache=True,
)
self.parsed_job = None

def parse_job_post(self, **chain_kwargs) -> dict:
"""Parse the job posting to extract job description and skills."""
self.parsed_job = self.extract_from_input(
pydantic_object=JobDescription, input=self.posting, **chain_kwargs
)
model = self.extractor_llm.with_structured_output(JobDescription)
self.parsed_job = model.invoke(self.posting).dict()
return self.parsed_job
2 changes: 1 addition & 1 deletion prompts/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def _load_prompts(yaml_path: str) -> dict:
sub_data["job_posting_template"]
),
HumanMessagePromptTemplate.from_template(
sub_data.get("master_resume_template", "")
sub_data.get("resume_template", "")
),
HumanMessage(content=sub_data["instruction_message"]),
HumanMessage(content=sub_data["criteria_message"]),
Expand Down
12 changes: 6 additions & 6 deletions prompts/prompts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,22 @@ SECTION_HIGHLIGHTER:
Keywords that may be triggered by Applicant Tracking Systems (ATS) that should be added (if applicable): {ats_keywords}
The ideal candidate has the following skills:{technical_skills}
{non_technical_skills}
master_resume_template: |
<Master Resume>{section}
resume_template: |
<Resume>{section}
instruction_message: |
<Instruction> Identify the relevant portions from the <Master Resume> that match the <Job Posting>, rephrase these relevant portions into highlights, and rate the relevance of each highlight to the <Job Posting> on a scale of 1-5.
<Instruction> Identify the relevant portions from the <Resume> that match the <Job Posting>, rephrase these relevant portions into highlights, and rate the relevance of each highlight to the <Job Posting> on a scale of 1-5.
criteria_message: |
<Criteria>
- Each highlight must be based on what is mentioned in the <Master Resume>.
- In each highlight, include how that experience in the <Master Resume> demonstrates an ability to perform duties mentioned in the <Job Posting>.
- Each highlight must be based on what is mentioned in the <Resume>.
- In each highlight, include how that experience in the <Resume> demonstrates an ability to perform duties mentioned in the <Job Posting>.
- In each highlight, try to include action verbs, give tangible and concrete examples, and include success metrics when available.
- Grammar, spellings, and sentence structure must be correct.
steps_message: |
<Steps>
- Create a <Plan> for following the <Instruction> while meeting all the <Criteria>.
- What <Additional Steps> are needed to follow the <Plan>?
- Follow all steps one by one and show your <Work>.
- Verify that highlights are reflective of the <Master Resume> and not the <Job Posting>. Update if necessary.
- Verify that highlights are reflective of the <Resume> and not the <Job Posting>. Update if necessary.
- Verify that all <Criteria> are met, and update if necessary.
- Provide the answer to the <Instruction> with prefix <Final Answer>.
Expand Down
1 change: 0 additions & 1 deletion services/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from .extractor import *
from .resume_improver import *
from .langchain_helpers import *
from .background_runner import *
25 changes: 0 additions & 25 deletions services/extractor.py

This file was deleted.

126 changes: 123 additions & 3 deletions services/langchain_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import langchain
from langchain_community.cache import InMemoryCache
from .. import config
from .. import utils

# Set up LLM cache
langchain.llm_cache = InMemoryCache()
Expand All @@ -26,6 +27,7 @@ def format_list_as_string(lst: list, list_sep: str = "\n- ") -> str:
return str(lst)



def format_prompt_inputs_as_strings(prompt_inputs: list[str], **kwargs):
"""Convert values to string for all keys in kwargs matching list in prompt inputs."""
return {
Expand All @@ -45,16 +47,134 @@ def parse_date(date_str: str) -> datetime:


def datediff_years(start_date: str, end_date: str) -> float:
"""Get difference between arbitrarily formatted dates in fractional years to the floor month.
"""Calculate the difference between two dates in fractional years.
Args:
start_date (str): The start date in string format.
end_date (str): The end date in string format. Can be "Present" to use the current date.
Returns:
float: The difference in years, including fractional years.
"""
if isinstance(end_date, str) and end_date.lower() == "present":
end_date = datetime.today().strftime("%Y-%m-%d")
datediff = relativedelta(parse_date(end_date), parse_date(start_date))
return datediff.years + datediff.months / 12.0


def chain_formatter(format_type: str, input_data) -> str:
"""Format resume/job inputs for inclusion in a runnable sequence.
Args:
format_type (str): The type of data to format (e.g., 'experience', 'projects', 'skills', 'education').
input_data: The data to be formatted.
Returns:
str: The formatted data as a string.
"""
if format_type == 'experience':
as_list = format_experiences_for_prompt(input_data)
return format_prompt_inputs_as_strings(as_list)
elif format_type == 'projects':
as_list = format_projects_for_prompt(input_data)
return format_prompt_inputs_as_strings(as_list)
elif format_type == 'skills':
as_list = format_skills_for_prompt(input_data)
return format_prompt_inputs_as_strings(as_list)
elif format_type == 'education':
return format_education_for_resume(input_data)
else:
return input_data


def format_education_for_resume(education_list: list[dict]) -> str:
"""Format education entries for inclusion in a resume.
Args:
education_list (list[dict]): A list of dictionaries containing education details.
Returns:
str: A formatted string of education entries.
"""
formatted_education = []
for entry in education_list:
school = entry.get('school', '')
degrees = ', '.join(degree.get('names', ['Degree'])[0] for degree in entry.get('degrees', []))
formatted_education.append(f"{school}: {degrees}")
return '\n'.join(formatted_education)


def format_skills_for_prompt(input_data) -> list:
"""Format skills for inclusion in a prompt.
Args:
skills (list): The list of skills.
Returns:
list: A formatted list of skills.
"""
result = []
for cat in input_data:
curr = ""
if cat.get("category", ""):
curr += f"{cat['category']}: "
if "skills" in cat:
curr += "Proficient in "
curr += ", ".join(cat["skills"])
result.append(curr)
return result

def get_cumulative_time_from_titles(titles) -> int:
"""Calculate the cumulative time from job titles.
Args:
titles (list): A list of job titles with start and end dates.
Returns:
int: The cumulative time in years.
"""
result = 0.0
for t in titles:
if "startdate" in t and "enddate" in t:
if t["enddate"] == "current":
last_date = datetime.today().strftime("%Y-%m-%d")
else:
last_date = t["enddate"]
result += datediff_years(start_date=t["startdate"], end_date=last_date)
return round(result)

def format_experiences_for_prompt(input_data) -> list:
"""Format experiences for inclusion in a prompt.
Returns:
list: A formatted list of experiences.
"""
result = []
for exp in input_data:
curr = ""
if "titles" in exp:
exp_time = get_cumulative_time_from_titles(exp["titles"])
curr += f"{exp_time} years experience in:"
if "highlights" in exp:
curr += format_list_as_string(exp["highlights"], list_sep="\n - ")
curr += "\n"
result.append(curr)
return result

def format_projects_for_prompt(input_data) -> list:
"""Format projects for inclusion in a prompt.
Returns:
list: A formatted list of projects.
"""
result = []
for exp in input_data:
curr = ""
if "name" in exp:
name = exp["name"]
curr += f"Side Project: {name}"
if "highlights" in exp:
curr += format_list_as_string(exp["highlights"], list_sep="\n - ")
curr += "\n"
result.append(curr)
return result
Loading

0 comments on commit 454ae9d

Please sign in to comment.