simplify resume improver and fix langchain issue

takline · Oct 7, 2024 · 454ae9d · 454ae9d
1 parent ae83eae
commit 454ae9d
Show file tree

Hide file tree

Showing 9 changed files with 178 additions and 180 deletions.
diff --git a/models/job_post.py b/models/job_post.py
@@ -1,10 +1,12 @@
 from langchain_core.pydantic_v1 import BaseModel, Field
 from typing import List, Optional
 from ..prompts.prompts import Prompts
-from ..services.extractor import ExtractorLLM
+from .. import config
+from .. import services
 
 Prompts.initialize()
 
+
 class JobDescription(BaseModel):
     """Description of a job posting."""
 
@@ -39,20 +41,25 @@ class JobDescription(BaseModel):
         None, description=Prompts.descriptions["JOB_DESCRIPTION"]["technical_skills"]
     )
     non_technical_skills: Optional[List[str]] = Field(
-        None, description=Prompts.descriptions["JOB_DESCRIPTION"]["non_technical_skills"]
+        None,
+        description=Prompts.descriptions["JOB_DESCRIPTION"]["non_technical_skills"],
     )
 
 
-class JobPost(ExtractorLLM):
+class JobPost:
     def __init__(self, posting: str):
         """Initialize JobPost with the job posting string."""
-        super().__init__()
         self.posting = posting
+        self.extractor_llm = services.langchain_helpers.create_llm(
+            chat_model=config.CHAT_MODEL,
+            model_name=config.MODEL_NAME,
+            temperature=config.TEMPERATURE,
+            cache=True,
+        )
         self.parsed_job = None
 
     def parse_job_post(self, **chain_kwargs) -> dict:
         """Parse the job posting to extract job description and skills."""
-        self.parsed_job = self.extract_from_input(
-            pydantic_object=JobDescription, input=self.posting, **chain_kwargs
-        )
+        model = self.extractor_llm.with_structured_output(JobDescription)
+        self.parsed_job = model.invoke(self.posting).dict()
         return self.parsed_job
diff --git a/prompts/prompts.py b/prompts/prompts.py
@@ -39,7 +39,7 @@ def _load_prompts(yaml_path: str) -> dict:
                     sub_data["job_posting_template"]
                 ),
                 HumanMessagePromptTemplate.from_template(
-                    sub_data.get("master_resume_template", "")
+                    sub_data.get("resume_template", "")
                 ),
                 HumanMessage(content=sub_data["instruction_message"]),
                 HumanMessage(content=sub_data["criteria_message"]),

diff --git a/prompts/prompts.yaml b/prompts/prompts.yaml
@@ -9,22 +9,22 @@ SECTION_HIGHLIGHTER:
       Keywords that may be triggered by Applicant Tracking Systems (ATS) that should be added (if applicable): {ats_keywords}
       The ideal candidate has the following skills:{technical_skills}
       {non_technical_skills}
-  master_resume_template: |
-      <Master Resume>{section}
+  resume_template: |
+      <Resume>{section}
   instruction_message: |
-      <Instruction> Identify the relevant portions from the <Master Resume> that match the <Job Posting>, rephrase these relevant portions into highlights, and rate the relevance of each highlight to the <Job Posting> on a scale of 1-5.
+      <Instruction> Identify the relevant portions from the <Resume> that match the <Job Posting>, rephrase these relevant portions into highlights, and rate the relevance of each highlight to the <Job Posting> on a scale of 1-5.
   criteria_message: |
       <Criteria> 
-      - Each highlight must be based on what is mentioned in the <Master Resume>. 
-      - In each highlight, include how that experience in the <Master Resume> demonstrates an ability to perform duties mentioned in the <Job Posting>.
+      - Each highlight must be based on what is mentioned in the <Resume>. 
+      - In each highlight, include how that experience in the <Resume> demonstrates an ability to perform duties mentioned in the <Job Posting>.
       - In each highlight, try to include action verbs, give tangible and concrete examples, and include success metrics when available.
       - Grammar, spellings, and sentence structure must be correct.
   steps_message: |
       <Steps>
       - Create a <Plan> for following the <Instruction> while meeting all the <Criteria>.
       - What <Additional Steps> are needed to follow the <Plan>?
       - Follow all steps one by one and show your <Work>.
-      - Verify that highlights are reflective of the <Master Resume> and not the <Job Posting>. Update if necessary.
+      - Verify that highlights are reflective of the <Resume> and not the <Job Posting>. Update if necessary.
       - Verify that all <Criteria> are met, and update if necessary.
       - Provide the answer to the <Instruction> with prefix <Final Answer>.
 

diff --git a/services/__init__.py b/services/__init__.py
@@ -1,4 +1,3 @@
-from .extractor import *
 from .resume_improver import *
 from .langchain_helpers import *
 from .background_runner import *
diff --git a/services/extractor.py b/services/extractor.py
diff --git a/services/langchain_helpers.py b/services/langchain_helpers.py
@@ -6,6 +6,7 @@
 import langchain
 from langchain_community.cache import InMemoryCache
 from .. import config
+from .. import utils
 
 # Set up LLM cache
 langchain.llm_cache = InMemoryCache()
@@ -26,6 +27,7 @@ def format_list_as_string(lst: list, list_sep: str = "\n- ") -> str:
     return str(lst)
 
 
+
 def format_prompt_inputs_as_strings(prompt_inputs: list[str], **kwargs):
     """Convert values to string for all keys in kwargs matching list in prompt inputs."""
     return {
@@ -45,16 +47,134 @@ def parse_date(date_str: str) -> datetime:
 
 
 def datediff_years(start_date: str, end_date: str) -> float:
-    """Get difference between arbitrarily formatted dates in fractional years to the floor month.
-    
+    """Calculate the difference between two dates in fractional years.
+
     Args:
         start_date (str): The start date in string format.
         end_date (str): The end date in string format. Can be "Present" to use the current date.
-    
+
     Returns:
         float: The difference in years, including fractional years.
     """
     if isinstance(end_date, str) and end_date.lower() == "present":
         end_date = datetime.today().strftime("%Y-%m-%d")
     datediff = relativedelta(parse_date(end_date), parse_date(start_date))
     return datediff.years + datediff.months / 12.0
+
+
+def chain_formatter(format_type: str, input_data) -> str:
+    """Format resume/job inputs for inclusion in a runnable sequence.
+
+    Args:
+        format_type (str): The type of data to format (e.g., 'experience', 'projects', 'skills', 'education').
+        input_data: The data to be formatted.
+
+    Returns:
+        str: The formatted data as a string.
+    """
+    if format_type == 'experience':
+        as_list = format_experiences_for_prompt(input_data)
+        return format_prompt_inputs_as_strings(as_list)
+    elif format_type == 'projects':
+        as_list = format_projects_for_prompt(input_data)
+        return format_prompt_inputs_as_strings(as_list)
+    elif format_type == 'skills':
+        as_list = format_skills_for_prompt(input_data)
+        return format_prompt_inputs_as_strings(as_list)
+    elif format_type == 'education':
+        return format_education_for_resume(input_data)
+    else:
+        return input_data
+
+
+def format_education_for_resume(education_list: list[dict]) -> str:
+    """Format education entries for inclusion in a resume.
+
+    Args:
+        education_list (list[dict]): A list of dictionaries containing education details.
+
+    Returns:
+        str: A formatted string of education entries.
+    """
+    formatted_education = []
+    for entry in education_list:
+        school = entry.get('school', '')
+        degrees = ', '.join(degree.get('names', ['Degree'])[0] for degree in entry.get('degrees', []))
+        formatted_education.append(f"{school}: {degrees}")
+    return '\n'.join(formatted_education)
+
+
+def format_skills_for_prompt(input_data) -> list:
+    """Format skills for inclusion in a prompt.
+
+    Args:
+        skills (list): The list of skills.
+
+    Returns:
+        list: A formatted list of skills.
+    """
+    result = []
+    for cat in input_data:
+        curr = ""
+        if cat.get("category", ""):
+            curr += f"{cat['category']}: "
+        if "skills" in cat:
+            curr += "Proficient in "
+            curr += ", ".join(cat["skills"])
+            result.append(curr)
+    return result
+
+def get_cumulative_time_from_titles(titles) -> int:
+    """Calculate the cumulative time from job titles.
+
+    Args:
+        titles (list): A list of job titles with start and end dates.
+
+    Returns:
+        int: The cumulative time in years.
+    """
+    result = 0.0
+    for t in titles:
+        if "startdate" in t and "enddate" in t:
+            if t["enddate"] == "current":
+                last_date = datetime.today().strftime("%Y-%m-%d")
+            else:
+                last_date = t["enddate"]
+        result += datediff_years(start_date=t["startdate"], end_date=last_date)
+    return round(result)
+
+def format_experiences_for_prompt(input_data) -> list:
+    """Format experiences for inclusion in a prompt.
+
+    Returns:
+        list: A formatted list of experiences.
+    """
+    result = []
+    for exp in input_data:
+        curr = ""
+        if "titles" in exp:
+            exp_time = get_cumulative_time_from_titles(exp["titles"])
+            curr += f"{exp_time} years experience in:"
+        if "highlights" in exp:
+            curr += format_list_as_string(exp["highlights"], list_sep="\n  - ")
+            curr += "\n"
+            result.append(curr)
+    return result
+
+def format_projects_for_prompt(input_data) -> list:
+    """Format projects for inclusion in a prompt.
+
+    Returns:
+        list: A formatted list of projects.
+    """
+    result = []
+    for exp in input_data:
+        curr = ""
+        if "name" in exp:
+            name = exp["name"]
+            curr += f"Side Project: {name}"
+        if "highlights" in exp:
+            curr += format_list_as_string(exp["highlights"], list_sep="\n  - ")
+            curr += "\n"
+            result.append(curr)
+    return result