Spaces:

astro21
/

resume-revealer

Sleeping

App Files Files Community

astro21 commited on Apr 14

Commit

da7be98

•

1 Parent(s): 35236b5

Upload 3 files

Browse files

Files changed (3) hide show

ResumeStructure.py +15 -0
prompt_template.py +107 -0
utils.py +119 -0

ResumeStructure.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from typing import List, Dict, Union
+from langchain_core.pydantic_v1 import BaseModel, Field
+class ResumeStructure(BaseModel):
+    education: List[Dict[str, str]] = Field(description="List of dictionaries containing 'university' and 'CGPA'")
+    work: List[Dict[str, Union[str, List[str]]]] = Field(description="List of dictionaries containing "
+                                                                     "'organization', 'location', 'position', "
+                                                                     "'duration', 'standardized_job_title', "
+                                                                     "and 'predicted_skills'")
+    projects: List[Dict[str, Union[str, List[str]]]] = Field(description="List of dictionaries containing "
+                                                                         "'project_name', 'start_date', 'end_date', "
+                                                                         "'description', and 'predicted_skills'")
+    skills: Dict[str, List[str]] = Field(description="Dictionary containing 'technical' and 'non_technical' skills")
+    career_trajectory: str = Field(description="String representing the career progression of the candidate")

prompt_template.py ADDED Viewed

	@@ -0,0 +1,107 @@

+template = """
+For the following text, extract the following information:
+Warning: Don't greet or write any introduction. Just start with the answer to the prompts. Do as per the instructions given in the prompt. If you don't know the answer, leave that part (keep blank) and move to the next part.
+1. Education: Extract the name of the all universities/colleges attended by the candidate with there CGPA.
+2. Work: Extract all organization names where he/she has worked along with the position held, and the duration of employment.
+            Predicted Skills : Also extract skills based on the work experience.
+            Standardized Job Title: Identify the standardized job title for each work experience.
+            Standardized Job Title: Identify the standardized job title for each work experience.Skills based on work experience
+3. Projects: Extract the details of the projects the candidate has worked on.
+                Predicted Skills : Also extract skills based on each project.
+4.Skills: Identify the technical and non-technical skills associated with each work experience and project.
+5.Career Trajectory: Identify the career progression of the candidate based on their work experience.
+Output them in the following format:
+Warning: if there is no data for any of the fields, leave it blank.
+        "Education: " and separate multiple entries with new line .
+        "Work: " Organization Name, Location, Position, Start Date - End Date 'and separate multiple entries with a comma.
+            "Job Title: " Identify the  job title for each work experience. Clean and strip them off suffixes, prefixes and seniority.
+            " Predicted Skills : " and separate multiple entries with a comma for each work experience.
+        Note: Separate each work experience with a new line.
+        Warning: Don't print this text - "Organization Name, Location, Position, Start Date - End Date" as it is in the output  .
+        "Project Name, Start Date - End Date, Project Description " and separate multiple entries with a comma and a new line for each project. (
+            " Predicted Skills : " and separate multiple entries with a comma for each project.
+            Note:  Project Description should be in 30 to 40 words
+        Note: Separate each project with a new line.
+        Warning: Don't print "Project Name, Start Date - End Date, Project Description"  as it is (text)  in the output .
+        "Skills: " Skills under the skills section.
+                    Classify them as technical and non-technical skills if possible.
+        "Career Trajectory: " and separate multiple entries with a -> . Career Trajectory should be in acsending order with respect to date of joining.
+                eg1 : "Data Analyst -> Data Scientist -> Senior Data Scientist"
+                eg2 : "School Name -> College Name -> University Name -> Job Title -> Job Title"
+Resume: {text}
+"""
+template_format_instructions = """
+For the following text, extract the following information:
+Warning: Don't greet or write any introduction. Just start with the answer to the prompts. Do as per the instructions given in the prompt. If you don't know the answer, leave that part (keep blank) and move to the next part.
+1. Education: Extract the name of the all universities/colleges attended by the candidate with there CGPA.
+2. Work: Extract all organization names where he/she has worked along with the position held, and the duration of employment.
+            Predicted Skills : Also extract skills based on the work experience.
+            Standardized Job Title: Identify the standardized job title for each work experience.
+            Standardized Job Title: Identify the standardized job title for each work experience.Skills based on work experience
+3. Projects: Extract the details of the projects the candidate has worked on.
+                Predicted Skills : Also extract skills based on each project.
+4.Skills: Identify the technical and non-technical skills associated with each work experience and project.
+5.Career Trajectory: Identify the career progression of the candidate based on their work experience.
+Output them in the following format:
+Warning: if there is no data for any of the fields, leave it blank.
+        "Education: " and separate multiple entries with new line .
+        "Work: " Organization Name, Location, Position, Start Date - End Date 'and separate multiple entries with a comma.
+            "Job Title: " Identify the  job title for each work experience. Clean and strip them off suffixes, prefixes and seniority.
+            " Predicted Skills : " and separate multiple entries with a comma for each work experience.
+        Note: Separate each work experience with a new line.
+        Warning: Don't print this text - "Organization Name, Location, Position, Start Date - End Date" as it is in the output  .
+        "Project Name, Start Date - End Date, Project Description " and separate multiple entries with a comma and a new line for each project. (
+            " Predicted Skills : " and separate multiple entries with a comma for each project.
+            Note:  Project Description should be in 30 to 40 words
+        Note: Separate each project with a new line.
+        Warning: Don't print "Project Name, Start Date - End Date, Project Description"  as it is (text)  in the output .
+        "Skills: " Skills under the skills section.
+                    Classify them as technical and non-technical skills if possible.
+        "Career Trajectory: " and separate multiple entries with a -> . Career Trajectory should be in ascending order with respect to date of joining.
+                eg1 : "Data Analyst -> Data Scientist -> Senior Data Scientist"
+                eg2 : "School Name -> College Name -> University Name -> Job Title -> Job Title"
+Resume: {text}
+\n{format_instructions}\n
+"""

utils.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import os
+import shutil
+from dedoc import DedocManager
+from langchain.chat_models import ChatOpenAI
+from langchain.prompts import PromptTemplate
+from langchain_core.output_parsers import JsonOutputParser
+from ResumeStructure import ResumeStructure
+from fastapi import UploadFile
+from prompt_template import template_format_instructions, template
+from typing import List
+# Create a directory to store temporary files
+TEMP_DIR = "temp_files"
+if not os.path.exists(TEMP_DIR):
+    os.makedirs(TEMP_DIR)
+async def process_file_with_dedoc(file: UploadFile):
+    """
+    Process the file using Dedoc and return the output data.
+    Args:
+    - file: The UploadedFile object to be processed.
+    Returns:
+    - Output data if the file is processed successfully, None otherwise.
+    """
+    manager = DedocManager()
+    supported_formats = ['jpg', 'jpeg', 'png', 'docx', 'pdf', 'html', 'doc']
+    print(f"Processing file '{file.filename}'...")
+    # Save the uploaded file to a temporary directory
+    file_path = os.path.join(TEMP_DIR, file.filename)
+    with open(file_path, "wb") as buffer:
+        shutil.copyfileobj(file.file, buffer)
+    # Extract file extension from the file name
+    file_name, file_extension = os.path.splitext(file.filename)
+    file_extension = file_extension[1:].lower()  # Remove the leading dot and convert to lowercase
+    # Check if the file extension is supported
+    if file_extension not in supported_formats:
+        print(f"Cannot process file '{file.filename}'. Unsupported file format.")
+        return None
+    # Process the file using Dedoc
+    output = manager.parse(file_path)
+    output_data = output.to_api_schema().model_dump()
+    # Remove the temporary file
+    os.remove(file_path)
+    return output_data
+async def extract_text_from_all_levels(data):
+    """
+    Extract text from all levels of subparagraphs in the JSON data.
+    Args:
+    - data: The JSON data containing subparagraphs.
+    Returns:
+    - A string containing the text from all levels of subparagraphs.
+    """
+    text = ""
+    if 'subparagraphs' in data['content']['structure']:
+        subparagraphs = data['content']['structure']['subparagraphs']
+        text += await extract_text_from_subparagraphs(subparagraphs)
+    return text
+async def extract_text_from_subparagraphs(subparagraphs):
+    """
+    Recursively extract text from subparagraphs.
+    Args:
+    - subparagraphs: A list of subparagraphs.
+    Returns:
+    - A string containing the text from all subparagraphs.
+    """
+    text = ""
+    for subpara in subparagraphs:
+        text += subpara['text'] + "\n"
+        if 'subparagraphs' in subpara:
+            text += await extract_text_from_subparagraphs(subpara['subparagraphs'])
+    return text
+def generate_formatted_resume(resume, chat_llm):
+    prompt = PromptTemplate(
+        template=template,
+        input_variables=["text"],
+    )
+    chain = prompt | chat_llm
+    result = chain.invoke({"text": resume})
+    return result.content
+def generate_json_structured_resume(resume, chat_llm):
+    parser = JsonOutputParser(pydantic_object=ResumeStructure)
+    prompt = PromptTemplate(
+        template=template_format_instructions,
+        input_variables=["text"],
+        partial_variables={"format_instructions": parser.get_format_instructions()}
+    )
+    chain = prompt | chat_llm | parser
+    result = chain.invoke({"text": resume})
+    return result