refactor: update model initialization to use settings.model across services

2025-11-11 12:02:08 +00:00
parent 2b83ffe00c
commit 8d745c1f8e
5 changed files with 168 additions and 118 deletions
@@ -5,6 +5,7 @@ class Settings(BaseSettings):
    database_url: Optional[str] = None
    secret_key: Optional[str] = None
    api_key: Optional[str] = None
+    model: str = "openai/gpt-oss-120b"
    GROQ_API_KEY: str
    class Config:
        env_file = ".env"
@@ -727,97 +727,101 @@ async def match_specific_receipts(request: MatchSpecificRequest, db: db_dependen
            )
            logger.info(f"Matching completed, got {len(matching_results)} results")

+            # Filter results by confidence threshold (10% minimum)
+            CONFIDENCE_THRESHOLD = 0.10
+            filtered_results = [r for r in matching_results if r.confidence_score >= CONFIDENCE_THRESHOLD]
+            logger.info(f"After filtering by {CONFIDENCE_THRESHOLD*100}% threshold: {len(filtered_results)} matches remain")
+
            # Convert matching results to response format
            match_responses = []
-            for result in matching_results:
+            for result in filtered_results:
                # Get final tax amount from LLM analysis if available, otherwise use receipt's stated tax
-                if result.confidence_score > 0:
-                    final_tax = result.receipt.tax
-                    # if result.tax_analysis and "final_tax_amount" in result.tax_analysis:
-                    #     final_tax = result.tax_analysis["final_tax_amount"]
+                final_tax = result.receipt.tax
+                # if result.tax_analysis and "final_tax_amount" in result.tax_analysis:
+                #     final_tax = result.tax_analysis["final_tax_amount"]

-                    # Extract flag_for_review and auto_approve from tax_analysis if available
-                    flag_for_review = None
-                    auto_approve = None
-                    if result.tax_analysis:
-                        flag_for_review = result.tax_analysis.get("flag_for_review")
-                        auto_approve = result.tax_analysis.get("auto_approve")
+                # Extract flag_for_review and auto_approve from tax_analysis if available
+                flag_for_review = None
+                auto_approve = None
+                if result.tax_analysis:
+                    flag_for_review = result.tax_analysis.get("flag_for_review")
+                    auto_approve = result.tax_analysis.get("auto_approve")

-                    match_response = MatchResponse(
-                        receipt_id=result.receipt.id,
-                        transaction_id=result.transaction.id
-                        if result.transaction
-                        else "no_match",
-                        confidence_score=result.confidence_score * 100,
-                        match_reason=result.match_reason,
-                        receipt_vendor=result.receipt.vendor,
-                        receipt_amount=result.receipt.amount,
-                        receipt_description=result.receipt.description,
-                        receipt_category=result.receipt.category,
-                        receipt_tax_amount=final_tax,
-                        transaction_vendor=result.transaction.vendor
-                        if result.transaction
-                        else "",
-                        transaction_amount=result.transaction.amount
-                        if result.transaction
-                        else 0.0,
-                        tax_analysis=result.tax_analysis,
-                        flag_for_review=flag_for_review,
-                        auto_approve=auto_approve,
-                        # Transaction metadata
-                        transaction_source=result.transaction.source
-                        if result.transaction
-                        else None,
-                        # QuickBooks CSV fields
-                        TxnId=result.transaction.TxnId if result.transaction else None,
-                        AccountType=result.transaction.AccountType
-                        if result.transaction
-                        else None,
-                        AccountNumber=result.transaction.AccountNumber
-                        if result.transaction
-                        else None,
-                        TransactionDate=result.transaction.TransactionDate
-                        if result.transaction
-                        else None,
-                        TransactionType=result.transaction.TransactionType
-                        if result.transaction
-                        else None,
-                        ChequeNumber=result.transaction.ChequeNumber
-                        if result.transaction
-                        else None,
-                        Description1=result.transaction.Description1
-                        if result.transaction
-                        else None,
-                        Description2=result.transaction.Description2
-                        if result.transaction
-                        else None,
-                        VendorId=result.transaction.VendorId
-                        if result.transaction
-                        else None,
-                        VendorName=result.transaction.VendorName
-                        if result.transaction
-                        else None,
-                        AccountId=result.transaction.AccountId
-                        if result.transaction
-                        else None,
-                        AccountName=result.transaction.AccountName
-                        if result.transaction
-                        else None,
-                        Source=result.transaction.source if result.transaction else None,
-                    )
-                    match_responses.append(match_response)
+                match_response = MatchResponse(
+                    receipt_id=result.receipt.id,
+                    transaction_id=result.transaction.id
+                    if result.transaction
+                    else "no_match",
+                    confidence_score=result.confidence_score * 100,
+                    match_reason=result.match_reason,
+                    receipt_vendor=result.receipt.vendor,
+                    receipt_amount=result.receipt.amount,
+                    receipt_description=result.receipt.description,
+                    receipt_category=result.receipt.category,
+                    receipt_tax_amount=final_tax,
+                    transaction_vendor=result.transaction.vendor
+                    if result.transaction
+                    else "",
+                    transaction_amount=result.transaction.amount
+                    if result.transaction
+                    else 0.0,
+                    tax_analysis=result.tax_analysis,
+                    flag_for_review=flag_for_review,
+                    auto_approve=auto_approve,
+                    # Transaction metadata
+                    transaction_source=result.transaction.source
+                    if result.transaction
+                    else None,
+                    # QuickBooks CSV fields
+                    TxnId=result.transaction.TxnId if result.transaction else None,
+                    AccountType=result.transaction.AccountType
+                    if result.transaction
+                    else None,
+                    AccountNumber=result.transaction.AccountNumber
+                    if result.transaction
+                    else None,
+                    TransactionDate=result.transaction.TransactionDate
+                    if result.transaction
+                    else None,
+                    TransactionType=result.transaction.TransactionType
+                    if result.transaction
+                    else None,
+                    ChequeNumber=result.transaction.ChequeNumber
+                    if result.transaction
+                    else None,
+                    Description1=result.transaction.Description1
+                    if result.transaction
+                    else None,
+                    Description2=result.transaction.Description2
+                    if result.transaction
+                    else None,
+                    VendorId=result.transaction.VendorId
+                    if result.transaction
+                    else None,
+                    VendorName=result.transaction.VendorName
+                    if result.transaction
+                    else None,
+                    AccountId=result.transaction.AccountId
+                    if result.transaction
+                    else None,
+                    AccountName=result.transaction.AccountName
+                    if result.transaction
+                    else None,
+                    Source=result.transaction.source if result.transaction else None,
+                )
+                match_responses.append(match_response)

-            # Calculate statistics
+            # Calculate statistics on filtered results
            high_confidence = len(
-                [r for r in matching_results if r.confidence_score >= 0.8]
+                [r for r in filtered_results if r.confidence_score >= 0.8]
            )
            low_confidence = len(
-                [r for r in matching_results if r.confidence_score < 0.5]
+                [r for r in filtered_results if r.confidence_score < 0.5]
            )
            avg_score = (
-                sum(r.confidence_score for r in matching_results)
-                / len(matching_results)
-                if matching_results
+                sum(r.confidence_score for r in filtered_results)
+                / len(filtered_results)
+                if filtered_results
                else 0
            )

@@ -829,6 +833,7 @@ async def match_specific_receipts(request: MatchSpecificRequest, db: db_dependen
            }

            logger.info(f"Generated stats: {stats}")
+            logger.info(f"Match responses: {match_responses}")
            logger.info(
                f"Match-specific completed successfully with {len(match_responses)} matches"
            )
@@ -15,7 +15,7 @@ logger = logging.getLogger(__name__)
 class AIMatcher:
    def __init__(self, use_batch_matching=True):
        self.client = groq.Groq(api_key=settings.GROQ_API_KEY)
-        self.model = "llama-3.1-8b-instant"
+        self.model = settings.model
        self.max_retries = 3
        self.retry_delay = 2  # seconds - increased for rate limiting
        self.rate_limit_delay = 1.0  # seconds between API calls
@@ -116,7 +116,7 @@ class AIMatcher:
        for i, transaction in enumerate(candidates):
            transaction_amount_abs = abs(transaction.amount)
            date_diff = abs((receipt.receipt_date - transaction.transaction_date).days)
-            amount_diff = abs(receipt.amount - transaction_amount_abs)
+            amount_diff = abs(receipt.amount - transaction_amount_abs - receipt.tax)
            amount_percent_diff = (
                (amount_diff / receipt.amount) * 100 if receipt.amount > 0 else 0
            )
@@ -127,10 +127,12 @@ Candidate {i + 1}:
 - Amount: ${transaction.amount} (absolute: ${transaction_amount_abs})
 - Date: {transaction.transaction_date.strftime("%Y-%m-%d")} ({date_diff} days difference)
 - Notes: {transaction.notes}
- Amount difference: ${amount_diff} ({amount_percent_diff:.1f}%)
+- Amount difference: ${amount_diff} ({amount_percent_diff:.1f}%) Taking in account receipt tax
 """
+        logger.info(f"\nThis is the receipt: {receipt}\n")
+        logger.info(f"\nCandidate text: {candidates_text}\n")

-        prompt = f"""You are an expert at matching receipts to bank transactions. Analyze the receipt below against ALL the candidate transactions and return the BEST match.
+        prompt = f"""You are an expert at matching receipts to bank transactions. Your PRIMARY goal is to find the candidate with the CLOSEST AMOUNT match.

 RECEIPT TO MATCH:
 - Vendor: {receipt.vendor}
@@ -142,39 +144,52 @@ RECEIPT TO MATCH:
 CANDIDATE TRANSACTIONS:
 {candidates_text}

-SCORING CRITERIA (Amount is the PRIMARY factor):
+CRITICAL INSTRUCTIONS FOR SELECTION:
+1. FIRST: Find the candidate(s) with the SMALLEST amount percentage difference
+2. ONLY if multiple candidates have similar amounts (within 2% of each other), THEN consider vendor/date/notes
+3. USE THE PERCENTAGE DIFFERENCE PROVIDED for each candidate - DO NOT calculate yourself
+4. IGNORE vendor/description matches if amounts are far apart (>20% difference)
+5. The candidate with the closest amount is almost always the correct match

-Amount Similarity (MOST IMPORTANT - 60% weight):
- Exact match or within 1%: Start at 0.9-1.0
- Within 5%: Start at 0.75-0.89
- Within 10%: Start at 0.5-0.74
- Within 20%: Start at 0.3-0.49
- More than 20% difference: Start at 0.0-0.29
+SCORING CRITERIA - AMOUNT DIFFERENCE IS 90% OF THE DECISION:

-Then adjust UP or DOWN based on:
- Vendor similarity (20% weight): Exact or similar name increases score
- Date proximity (15% weight): Within 7 days increases score, within 30 days moderate increase
- Description/notes match (5% weight): Relevant keywords increase score
+Step 1: Calculate BASE SCORE using the provided amount percentage difference:
+- 0-1% difference: Base score = 0.95
+- 1-2% difference: Base score = 0.90
+- 2-3% difference: Base score = 0.85
+- 3-5% difference: Base score = 0.75
+- 5-7% difference: Base score = 0.65
+- 7-10% difference: Base score = 0.55
+- 10-15% difference: Base score = 0.40
+- 15-20% difference: Base score = 0.25
+- 20-30% difference: Base score = 0.15
+- 30-50% difference: Base score = 0.08
+- 50-100% difference: Base score = 0.03
+- >100% difference: Base score = 0.01
+
+Step 2: ADJUST the base score (±0.10 maximum):
+- Vendor exact match: +0.10
+- Vendor similar/partial match: +0.05
+- Date within 7 days: +0.05
+- Date within 30 days: +0.02
+- Description/notes keywords match: +0.02
+- Vendor completely different: -0.05
+- Date >90 days apart: -0.03
+
+Step 3: Ensure final score is between 0.0 and 1.0

-EXAMPLES:
- Amount match + vendor match + close date = 0.95-1.0 (Perfect match)
- Amount match + different vendor + close date = 0.85-0.94 (High confidence)
- Amount match + different vendor + far date = 0.70-0.84 (Medium-high confidence)
- Amount similar (5%) + vendor match = 0.75-0.85 (Medium-high confidence)
- Amount similar (10%) + some matches = 0.50-0.69 (Medium confidence)
- Amount very different (>20%) = 0.0-0.29 regardless of other factors

 CRITICAL: You MUST return valid JSON only. No explanations, no text before or after.

 Return format:
-{{"candidate_number": 1, "confidence_score": 0.87, "reason": "Exact amount match with similar vendor"}}
+{{"candidate_number": 1, "confidence_score": 0.65, "reason": "5.8% amount difference with similar vendor"}}

 Another example:
-{{"candidate_number": 3, "confidence_score": 0.15, "reason": "Poor match but best available"}}
+{{"candidate_number": 2, "confidence_score": 0.01, "reason": "9850% amount difference, extremely poor match"}}

 Return ONLY JSON for the best candidate:"""

-        logger.info(f"This is the prompt: {prompt}")
+        # logger.info(f"This is the prompt: {prompt}")
        for attempt in range(self.max_retries):
            try:
                result = self._call_groq_api_with_timeout(
@@ -192,6 +207,22 @@ Return ONLY JSON for the best candidate:"""

                if 0 <= candidate_num < len(candidates):
                    best_transaction = candidates[candidate_num]
+                    
+                    # Validate the match - catch AI errors with extreme amount differences
+                    transaction_amount_abs = abs(best_transaction.amount)
+                    amount_diff = abs(receipt.amount - transaction_amount_abs)
+                    amount_percent_diff = (
+                        (amount_diff / receipt.amount) * 100 if receipt.amount > 0 else 0
+                    )
+                    
+                    # If amount difference is >100%, force very low score
+                    if amount_percent_diff > 100:
+                        logger.warning(
+                            f"Overriding AI score for extreme mismatch: {receipt.amount} vs {transaction_amount_abs} ({amount_percent_diff:.1f}% diff)"
+                        )
+                        score = min(0.05, score)  # Cap at 0.05 for extreme mismatches
+                        reason = f"{amount_percent_diff:.1f}% amount difference, extreme mismatch"
+                    
                    logger.info(
                        f"AI selected candidate {candidate_num + 1}: {best_transaction.vendor} (score: {score:.3f})"
                    )
@@ -303,7 +334,7 @@ Return ONLY JSON for the best candidate:"""
                logger.warning(f"Fallback parsing also failed: {fallback_error}")

        # Final fallback
-        logger.warning(f"Could not parse single match response: {result}")
+        # logger.warning(f"Could not parse single match response: {result}")
        return -1, 0.0, f"Parse error: {result[:50]}..."

    def _filter_candidates(
@@ -311,18 +342,29 @@ Return ONLY JSON for the best candidate:"""
    ) -> List[Transaction]:
        """Filter transactions to create a reasonable candidate list"""
        candidates = []
-        amount_threshold = receipt.amount * 2.0  # 200% threshold - very inclusive
-
+        
        for transaction in transactions:
            # Use absolute value for transaction amount comparison
            transaction_amount_abs = abs(transaction.amount)
-
-            # Only exclude transactions with obviously different amounts
-            if abs(receipt.amount - transaction_amount_abs) <= amount_threshold:
+            amount_diff = abs(receipt.amount - transaction_amount_abs)
+            
+            # Calculate percentage difference
+            if receipt.amount > 0:
+                percent_diff = (amount_diff / receipt.amount) * 100
+            else:
+                percent_diff = 0
+            
+            # Be more restrictive: exclude transactions with >300% difference
+            # This prevents extreme mismatches while still being generous
+            if percent_diff <= 300:
                candidates.append(transaction)
+            else:
+                logger.debug(
+                    f"Filtered out transaction ${transaction_amount_abs} for receipt ${receipt.amount} ({percent_diff:.1f}% difference)"
+                )

        logger.debug(
-            f"Filtered {len(transactions)} transactions to {len(candidates)} candidates"
+            f"Filtered {len(transactions)} transactions to {len(candidates)} candidates for receipt ${receipt.amount}"
        )
        return candidates

@@ -389,7 +431,9 @@ Return ONLY JSON for the best candidate:"""
        
        Consider description and category similarity in your scoring.
        
-        The most important factor to consider is the Amount for both the transaction and the receipt. The closer the amounts, the higher the score. If the amounts are different or not close return a low score (0-0.1) based on other factors.
+        THINGS TO NOTE:
+        The most important factor to consider is the Amount for both the transaction and the receipt, the closer the amounts, the higher the score. 
+        If the amounts are different or not close return a low score (0-0.1) based on other factors.
        
        IMPORTANT: Return ONLY the score and reason separated by a pipe character.
        Format: [score]|[reason]
@@ -405,8 +449,8 @@ Return ONLY JSON for the best candidate:"""
                # Parse the result - handle multiple formats
                score, reason = self._parse_ai_response(result)

-                logger.debug(f"AI Response: {result}")
-                logger.debug(f"Parsed: score={score}, reason={reason}")
+                # logger.debug(f"AI Response: {result}")
+                # logger.debug(f"Parsed: score={score}, reason={reason}")

                return score, reason

@@ -509,7 +553,7 @@ Return ONLY JSON for the best candidate:"""
                        {"role": "user", "content": prompt}
                    ],
                    max_tokens=150,
-                    temperature=0.1,
+                    temperature=0,
                )
                return response.choices[0].message.content.strip()
            except Exception as e:
@@ -19,7 +19,7 @@ class AIRulesMatcher:

    def __init__(self):
        self.client = groq.Groq(api_key=settings.GROQ_API_KEY)
-        self.model = "llama-3.1-8b-instant"
+        self.model = settings.model

    def apply_rules_to_matches(
        self, matches: List[Match], ai_rules: Optional[List[Dict]] = None
@@ -48,7 +48,7 @@ class LLMTaxAnalyzer:

    def __init__(self):
        self.client = groq.Groq(api_key=settings.GROQ_API_KEY)
-        self.model = "llama-3.1-8b-instant"
+        self.model = settings.model
        self.max_retries = 3

    def analyze_and_apply_tax_rules_batch(