updated roles getting using slug

This commit is contained in:
2025-05-05 21:13:58 +00:00
parent 2398cb867b
commit dcae438e64
5 changed files with 65 additions and 5 deletions
+2 -1
View File
@@ -29,6 +29,7 @@ def get_roles():
return jsonify({"error": "No file part", "message": "Please upload a file with the key 'document'."}), 400 return jsonify({"error": "No file part", "message": "Please upload a file with the key 'document'."}), 400
file = request.files['document'] file = request.files['document']
role_slug = request.form.get('role_slug')
# If the user does not select a file, the browser may also submit an empty part without filename # If the user does not select a file, the browser may also submit an empty part without filename
if file.filename == '': if file.filename == '':
@@ -48,7 +49,7 @@ def get_roles():
# Generate roles from the docs # Generate roles from the docs
parser = DocumentParser() parser = DocumentParser()
roles = parser.get_roles(docs)["roles"] roles = parser.get_roles_using_slug(docs,role_slug)["roles"]
# Cleanup: Delete all files in the upload directory after processing # Cleanup: Delete all files in the upload directory after processing
delete_all_files_in_directory(upload_folder) delete_all_files_in_directory(upload_folder)
+11 -4
View File
@@ -156,8 +156,11 @@ def get_vision_mission_extraction_from_doc2():
**You must return the response in the exact HTML `<p>` format shown below, including the numbering, lettered sub-points, `<br>` tags for line breaks, and the double `<br><br>` between departments. Adhere to this format precisely.** **You must return the response in the exact HTML `<p>` format shown below, including the numbering, lettered sub-points, `<br>` tags for line breaks, and the double `<br><br>` between departments. Adhere to this format precisely.**
NOTE: IF NO departments are provided, consider all departments found in the documents, do not omit any NOTE: IF NO departments are provided, consider all departments found in the documents, do not omit any
**Example Output Format:** **Example Output Format:** **Example Output Format:** Two texts (one for vision and for goal)
<p>Vision: To be the best in the world</p><p>Company Goals:</p><p>1. Sales<br> a. Brand Awareness: To be the best in the world<br> b. Revenue Growth: Increase annual sales by 20% through strategic partnerships and lead generation.<br><br>2. Marketing<br> a. Digital Presence: Enhance online visibility through SEO and social media campaigns.<br> b. Customer Engagement: Boost engagement via personalized content and email marketing.<br><br>3. Product Development<br> a. Innovation: Launch 3 new features based on user feedback by Q4.<br> b. Quality Assurance: Maintain product bug rate under 2% through improved testing pipelines.<br><br>4. Human Resources<br> a. Talent Acquisition: Recruit top talent and reduce time-to-hire to under 30 days.<br> b. Employee Retention: Increase employee retention by 15% through career development program.<br></p>
<p>Vision: To create a future where our organization is recognized for its contributions and effectiveness in achieving its goals.</p>"
“<p>Company Goals:</p><p>1. Account Management: Manage accounts effectively to enhance customer satisfaction and retention.<brbr>2. Finance<br> a. Financial Stability: Finance the company to ensure long-term sustainability and growth.<br><br>3. Account<br> a. Account Management: Manage accounts effectively to enhance customer satisfaction and retention.<br></p>”
""" """
@@ -196,10 +199,14 @@ def get_vision_mission_extraction_from_questionnaire_executive():
NOTE: If the goal and mission of a can not be gotten from the questionaire response, make it empty. NOTE: If the goal and mission of a can not be gotten from the questionaire response, make it empty.
NOTE: Ensure you extract every piece of information found for the vision and goals from the questionnaire. DO NOT OMIT ANYTHING. NOTE: Ensure you extract every piece of information found for the vision and goals from the questionnaire. DO NOT OMIT ANYTHING.
**Example Output Format:**
NOTE: Group the goals based on the departments found in the questions see example response below pointing to sales, marketing and product develpoment NOTE: Group the goals based on the departments found in the questions see example response below pointing to sales, marketing and product develpoment
NOTE: ADHERE STRICTLY TO THIS OUTPUT FORMAT , DO NOT CHANGE IT PLEASE NOTE: ADHERE STRICTLY TO THIS OUTPUT FORMAT , DO NOT CHANGE IT PLEASE
<p>Vision: To be the best in the world</p><p>Company Goals:</p><p>1. Sales<br> a. Brand Awareness: To be the best in the world<br> b. Revenue Growth: Increase annual sales by 20% through strategic partnerships and lead generation.<br><br>2. Marketing<br> a. Digital Presence: Enhance online visibility through SEO and social media campaigns.<br> b. Customer Engagement: Boost engagement via personalized content and email marketing.<br><br>3. Product Development<br> a. Innovation: Launch 3 new features based on user feedback by Q4.<br> b. Quality Assurance: Maintain product bug rate under 2% through improved testing pipelines.<br><br>4. Human Resources<br> a. Talent Acquisition: Recruit top talent and reduce time-to-hire to under 30 days.<br> b. Employee Retention: Increase employee retention by 15% through career development program.<br></p> **Example Output Format:** Two texts (one for vision and for goal)
<p>Vision: To create a future where our organization is recognized for its contributions and effectiveness in achieving its goals.</p>"
“<p>Company Goals:</p><p>1. Account Management: Manage accounts effectively to enhance customer satisfaction and retention.<brbr>2. Finance<br> a. Financial Stability: Finance the company to ensure long-term sustainability and growth.<br><br>3. Account<br> a. Account Management: Manage accounts effectively to enhance customer satisfaction and retention.<br></p>”
""" """
+52
View File
@@ -200,6 +200,58 @@ class DocumentParser:
temperature=0.1 temperature=0.1
) )
return json.loads(response.choices[0].message.content)
def get_roles_using_slug(self, docs, role_slug):
# Extract the text content from the Document objects
docs_text = [doc.page_content for doc in docs]
response = self.client.beta.chat.completions.parse(
model=self.model,
messages=[
{
"role": "system",
"content": f'''You are a specialized role extractor for company documents. Your task is to identify and extract job roles/positions mentioned in the provided text.
TASK:
1. Extract ALL job roles/positions mentioned in the text as a list.
2. Filter the extracted roles based on the provided role_slug: "{role_slug}".
3. Return the filtered roles as a JSON list.
RULES:
- Return an empty list if no matching roles are found.
- The role_slug is a keyword or category used to filter relevant roles.
- Only include roles that semantically relate to the role_slug.
- Be precise in extracting official job titles rather than general descriptions.
EXAMPLES:
Example 1:
Text: "Our company is looking to hire a Senior Data Scientist, Junior Data Analyst, and Database Administrator for the Analytics department. We also have openings for Financial Manager and Customer Support Manager."
Role_slug: "data"
Expected output: ["Senior Data Scientist", "Junior Data Analyst", "Database Administrator"]
Example 2:
Text: "The restructuring process will affect several departments including the Financial Analysis team, Customer Relations department, and Sales Management. We are currently seeking a Regional Sales Manager, Sales Team Supervisor, and Customer Support Manager."
Role_slug: "manager"
Expected output: ["Financial Manager", "Regional Sales Manager", "Customer Support Manager"]
Provide the result as a valid JSON array of strings.
''',
},
{
"role": "user",
"content": [
{
"type": "text",
"text": text
} for text in docs_text
]
}
],
response_format=Roles_response,
max_tokens=4096,
temperature=0.1
)
return json.loads(response.choices[0].message.content) return json.loads(response.choices[0].message.content)
'''def extract_departments_and_managers(self, docs): '''def extract_departments_and_managers(self, docs):
Binary file not shown.
Binary file not shown.