import asyncio
import threading
from typing import Dict, Any, List, Optional
import logging
from datetime import datetime
import os
import pymongo
from dotenv import load_dotenv
from bson.codec_options import CodecOptions, DatetimeConversion
from .job_api_service import JobAPIService
from .opensearch_service import OpenSearchService
from .embeddings import Embedder
from .mongo_service import mongo_service
from .ai_service import AIService

# Load environment variables
load_dotenv()

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


class JobMatchingService:
    """Service for job matching with candidate screening and background processing"""
    
    def __init__(self):
        self.job_api_service = JobAPIService()
        self.opensearch_service = OpenSearchService()
        self.embedder = Embedder()
        self.ai_service = AIService()
        
    def process_job_matching_async(self, job_emp_id: str) -> Dict[str, Any]:
        """
        Start background job matching process and return immediately
        
        Args:
            job_emp_id: The employer job ID to process
            
        Returns:
            Dict with success status - actual processing happens in background
        """
        try:
            # Start background processing in a separate thread
            thread = threading.Thread(
                target=self._background_job_matching,
                args=(job_emp_id,),
                daemon=True
            )
            thread.start()
            
            return {
                "success": True,
                "message": f"Job matching process started for job ID: {job_emp_id}",
                "job_id": job_emp_id,
                "status": "processing_started"
            }
            
        except Exception as e:
            logger.error(f"Error starting job matching process: {str(e)}")
            return {
                "success": False,
                "error": str(e),
                "job_id": job_emp_id
            }
    
    def process_specific_candidates_async(self, job_emp_id: str, jobseeker_ids: List[int]) -> Dict[str, Any]:
        """
        Start background processing for specific candidate analysis
        
        Args:
            job_emp_id: The employer job ID to process
            jobseeker_ids: List of specific jobseeker IDs to analyze
            
        Returns:
            Dict with success status - actual processing happens in background
        """
        try:
            # Start background processing in a separate thread
            thread = threading.Thread(
                target=self._background_specific_candidates_analysis,
                args=(job_emp_id, jobseeker_ids),
                daemon=True
            )
            thread.start()
            
            return {
                "success": True,
                "message": f"Specific candidate analysis started for job ID: {job_emp_id}",
                "job_id": job_emp_id,
                "jobseeker_count": len(jobseeker_ids),
                "status": "processing_started"
            }
            
        except Exception as e:
            logger.error(f"Error starting specific candidate analysis: {str(e)}")
            return {
                "success": False,
                "error": str(e),
                "job_id": job_emp_id
            }
    
    def _background_job_matching(self, job_emp_id: str):
        """
        Background process for job matching - runs in separate thread
        
        Args:
            job_emp_id: The employer job ID to process
        """
        try:
            logger.info(f"Starting background job matching for job ID: {job_emp_id}")
            
            # Step 1: Get job details from job API
            job_details = self._get_job_details(job_emp_id)
            if not job_details:
                logger.error(f"Could not retrieve job details for job ID: {job_emp_id}")
                return
            
            # Step 2: Search candidates using both OpenSearch and Vector search
            # Returns None if either OpenSearch or Vector search has no results
            candidates = self._search_candidates(job_details)
            if not candidates:
                logger.info(f"No matching candidates found for job ID: {job_emp_id}")
                return
            
            # Step 3: Process each candidate
            processed_count = 0
            for candidate in candidates:
                try:
                    # Generate AI analysis for the candidate
                    analysis = self._generate_candidate_analysis(job_details, candidate)
                    
                    if analysis:
                        # Only store if matching percentage is 50% or higher
                        if analysis.get("matching_percentage", 0) >= 50.0:
                            # Store screening result
                            self._store_screening_result(job_emp_id, candidate, analysis, job_details)
                            processed_count += 1
                        else:
                            logger.info(f"Candidate {candidate.get('resume_id')} below 50% threshold: {analysis.get('matching_percentage')}%")
                        
                except Exception as e:
                    logger.error(f"Error processing candidate {candidate.get('resume_id')}: {str(e)}")
                    continue
            
            logger.info(f"Job matching completed for job ID: {job_emp_id}. Processed {processed_count} candidates.")
            
        except Exception as e:
            logger.error(f"Error in background job matching for job ID {job_emp_id}: {str(e)}")
    
    def _background_specific_candidates_analysis(self, job_emp_id: str, jobseeker_ids: List[int]):
        """
        Background process for specific candidate analysis - runs in separate thread
        
        Args:
            job_emp_id: The employer job ID to process
            jobseeker_ids: List of specific jobseeker IDs to analyze
        """
        try:
            logger.info(f"Starting specific candidate analysis for job ID: {job_emp_id}, candidates: {len(jobseeker_ids)}")
            
            # Step 1: Get job details from job API
            job_details = self._get_job_details(job_emp_id)
            if not job_details:
                logger.error(f"Could not retrieve job details for job ID: {job_emp_id}")
                return
            
            # For type 1 specific analysis, disable country filtering
            logger.info(f"Type 1 specific analysis - country filtering disabled for job {job_emp_id}")
            if job_details.get('country_header_code'):
                logger.info(f"Original job country: {job_details['country_header_code']} - will be ignored for type 1")
                job_details['country_header_code'] = None  # Remove country filter for type 1
            
            # Step 2: Process each specific jobseeker
            processed_count = 0
            indexed_count = 0
            
            for jobseeker_id in jobseeker_ids:
                try:
                    # Convert to string for consistent handling
                    jobseeker_id_str = str(jobseeker_id)
                    
                    # Check if jobseeker is indexed, index if needed
                    if not self._ensure_jobseeker_indexed(jobseeker_id_str):
                        logger.warning(f"Could not ensure indexing for jobseeker: {jobseeker_id_str}")
                        continue
                    
                    indexed_count += 1
                    
                    # Get candidate details
                    candidate_data = self._get_candidate_details(jobseeker_id_str)
                    if not candidate_data:
                        logger.warning(f"Could not get candidate details for jobseeker: {jobseeker_id_str}")
                        continue
                    
                    # Create candidate object for analysis (no scores since this is direct analysis)
                    candidate = {
                        "resume_id": jobseeker_id_str,
                        "keyword_score": 0,
                        "keyword_percentage": 0,
                        "vector_score": 0,
                        "vector_percentage": 0,
                        "combined_score": 0  # Will be set by AI analysis
                    }
                    
                    # Generate AI analysis
                    analysis = self._generate_candidate_analysis(job_details, candidate)
                    
                    if analysis:
                        # Store all screening results for specific candidates (no minimum threshold)
                        self._store_specific_screening_result(job_emp_id, candidate, analysis, job_details)
                        processed_count += 1
                        
                except Exception as e:
                    logger.error(f"Error processing specific candidate {jobseeker_id}: {str(e)}")
                    continue
            
            logger.info(f"Specific candidate analysis completed for job ID: {job_emp_id}. Indexed: {indexed_count}, Processed: {processed_count}")
            
        except Exception as e:
            logger.error(f"Error in background specific candidate analysis for job ID {job_emp_id}: {str(e)}")
    
    def _get_job_details(self, job_emp_id: str) -> Optional[Dict[str, Any]]:
        """
        Get job details from job API and extract required fields
        
        Args:
            job_emp_id: The employer job ID
            
        Returns:
            Dictionary with job details or None if failed
        """
        try:
            result = self.job_api_service.get_employer_job(job_emp_id)
            
            if not result.get('success'):
                logger.error(f"Failed to get job details: {result.get('error')}")
                return None
            
            job_data = result.get('data', {})
            
            # Check if job_data is a list (invalid job ID case)
            if isinstance(job_data, list):
                logger.error(f"Invalid job ID {job_emp_id}: API returned empty list instead of job data")
                return None
            
            # Check if job_data is not a dictionary
            if not isinstance(job_data, dict):
                logger.error(f"Invalid job ID {job_emp_id}: API returned {type(job_data)} instead of dictionary")
                return None
            
            # Check if data is nested under 'job' key
            if 'job' in job_data:
                job_data = job_data['job']
            
            # Extract required fields with multiple possible field names
            job_details = {
                'job_emp_id': job_emp_id,
                'job_title': (job_data.get('job_title', '') or 
                            job_data.get('title', '') or 
                            job_data.get('position', '') or 
                            job_data.get('job_position', '')),
                'job_desc': (job_data.get('job_description', '') or 
                           job_data.get('job_desc', '') or 
                           job_data.get('description', '') or
                           job_data.get('job_details', '')),
                'skill_list': (job_data.get('skill_list', '') or 
                             job_data.get('skills', '') or 
                             job_data.get('required_skills', '')),
                'skill_list_ar': job_data.get('skill_list_ar', ''),
                'workplace_type': (job_data.get('workplace_type', '') or 
                                 job_data.get('work_type', '') or
                                 job_data.get('remote_type', '')),
                'job_type': (job_data.get('job_type', '') or 
                           job_data.get('employment_type', '') or
                           job_data.get('type', '')),
                'country_name': (job_data.get('country_name', '') or 
                               job_data.get('country', '') or
                               job_data.get('location', '')),
                'country_id': job_data.get('country_id'),
                'company_id': (job_data.get('company_id') or 
                             job_data.get('employer_id') or
                             job_data.get('employer_user_id')),
                'country_header_code': None  # Will be populated below
            }
            
            # Get country_header_code from country_update table if country_id is available
            if job_details.get('country_id'):
                try:
                    # Use auto datetime conversion to avoid BSON errors
                    codec_options = CodecOptions(datetime_conversion=DatetimeConversion.DATETIME_AUTO)
                    
                    # Get collection with proper codec options
                    client = mongo_service.client
                    db = client[mongo_service.db.name].with_options(codec_options=codec_options)
                    country_collection = db['country_update']
                    
                    # Query only string fields to avoid datetime issues
                    country_doc = country_collection.find_one(
                        {'id': job_details['country_id']}, 
                        {'id': 1, 'name': 1, 'header_code': 1}
                    )
                    if country_doc and country_doc.get('header_code'):
                        job_details['country_header_code'] = country_doc['header_code'].lower()
                        logger.info(f"Found country header code: {job_details['country_header_code']} for country_id: {job_details['country_id']}")
                    else:
                        logger.warning(f"No header_code found for country_id: {job_details['country_id']}")
                except Exception as e:
                    logger.error(f"Error getting country header code: {str(e)}")
                    # No fallback mapping - rely on database data only
                    logger.warning(f"Could not determine country_header_code for country_id: {job_details.get('country_id')}")
                    job_details['country_header_code'] = None
            
            # Add logging for final country status
            if job_details.get('country_header_code'):
                logger.info(f"Country filtering enabled for job {job_emp_id}: {job_details['country_header_code']}")
            else:
                logger.warning(f"No country filtering for job {job_emp_id} - will search all countries")
            
            # Validate essential job information is present
            if not job_details.get('job_title'):
                logger.error(f"Invalid job ID {job_emp_id}: No job title found in API response")
                return None
            
            logger.info(f"Successfully retrieved job details for job ID: {job_emp_id}")
            return job_details
            
        except Exception as e:
            logger.error(f"Error getting job details for job ID {job_emp_id}: {str(e)}")
            return None
    
    def _search_candidates_without_country_filter(self, job_details: Dict[str, Any], jobseeker_ids: List[str] = None) -> List[Dict[str, Any]]:
        """
        Search for candidates without country filtering (for type 1 and type 3)
        
        Args:
            job_details: Job details dictionary
            jobseeker_ids: Optional list of specific jobseeker IDs to analyze
            
        Returns:
            List of matching candidates without country restrictions
        """
        try:
            # Create search query from job details
            search_query = self._build_search_query(job_details)
            logger.info(f"Search query built without country filtering: '{search_query}'")
            
            # For type 1 and type 3, search without country filtering
            logger.info("Country filtering disabled for type 1 and type 3")
            
            # If specific jobseeker IDs provided, search only those
            if jobseeker_ids:
                logger.info(f"Searching specific {len(jobseeker_ids)} candidates: {jobseeker_ids}")
                # Get candidates by specific IDs (no country filter)
                keyword_results = self.opensearch_service.search_by_ids(jobseeker_ids)
                logger.info(f"OpenSearch returned {len(keyword_results) if keyword_results else 0} specific candidates")
            else:
                # Search all candidates (no country filter) - no limit, get all data
                keyword_results = self.opensearch_service.search(search_query)
                logger.info(f"OpenSearch returned {len(keyword_results) if keyword_results else 0} results")
            
            # Step 2: Filter OpenSearch results by absolute minimum score (still apply quality filter)
            absolute_min_score = float(os.getenv("JOB_MATCHING_ABSOLUTE_MIN_SCORE", 0.3))
            filtered_keyword_results = [r for r in keyword_results if r.get("keyword_score", 0) >= absolute_min_score] if keyword_results else []
            logger.info(f"OpenSearch results above {absolute_min_score} threshold: {len(filtered_keyword_results)}")
            
            # Step 3: Use filtered OpenSearch results for Vector search (no country filter)
            if filtered_keyword_results:
                # Extract resume IDs from filtered OpenSearch results
                filtered_resume_ids = [r.get("resume_id") for r in filtered_keyword_results if r.get("resume_id")]
                logger.info(f"Performing vector search on {len(filtered_resume_ids)} pre-filtered candidates without country restriction")
                
                # Search using Vector search without country filter - no limit, get all data
                vector_results = self.embedder.search_filtered_candidates(search_query, filtered_resume_ids)
                logger.info(f"Vector search returned {len(vector_results.get('result', [])) if vector_results else 0} results from filtered candidates")
                
                # Step 4: Merge and rank results
                final_candidates = self._merge_and_rank_results(filtered_keyword_results, vector_results.get('result', []), job_details)
                return final_candidates
            else:
                logger.info("No candidates met minimum quality threshold")
                return []
            
        except Exception as e:
            logger.error(f"Error searching candidates without country filter: {str(e)}")
            return []

    def _search_candidates_for_specific_analysis(self, job_details: Dict[str, Any], jobseeker_ids: List[str] = None) -> List[Dict[str, Any]]:
        """
        Search for candidates for specific analysis without country filtering (type 1)
        
        Args:
            job_details: Job details dictionary
            jobseeker_ids: Optional list of specific jobseeker IDs to analyze
            
        Returns:
            List of matching candidates without country restrictions
        """
        try:
            # Create search query from job details
            search_query = self._build_search_query(job_details)
            logger.info(f"Search query built for specific analysis: '{search_query}'")
            
            # For specific analysis, search without country filtering
            logger.info("Country filtering disabled for specific analysis (type 1)")
            
            # If specific jobseeker IDs provided, search only those
            if jobseeker_ids:
                logger.info(f"Searching specific {len(jobseeker_ids)} candidates: {jobseeker_ids}")
                # Get candidates by specific IDs (no country filter)
                keyword_results = self.opensearch_service.search_by_ids(jobseeker_ids)
                logger.info(f"OpenSearch returned {len(keyword_results) if keyword_results else 0} specific candidates")
            else:
                # Search all candidates (no country filter) - no limit, get all data
                keyword_results = self.opensearch_service.search(search_query)
                logger.info(f"OpenSearch returned {len(keyword_results) if keyword_results else 0} results")
            
            # Step 2: Filter OpenSearch results by absolute minimum score (still apply quality filter)
            absolute_min_score = float(os.getenv("JOB_MATCHING_ABSOLUTE_MIN_SCORE", 0.3))
            filtered_keyword_results = [r for r in keyword_results if r.get("keyword_score", 0) >= absolute_min_score] if keyword_results else []
            logger.info(f"OpenSearch results above {absolute_min_score} threshold: {len(filtered_keyword_results)}")
            
            # Step 3: Use filtered OpenSearch results for Vector search (no country filter)
            if filtered_keyword_results:
                # Extract resume IDs from filtered OpenSearch results
                filtered_resume_ids = [r.get("resume_id") for r in filtered_keyword_results if r.get("resume_id")]
                logger.info(f"Performing vector search on {len(filtered_resume_ids)} pre-filtered candidates without country restriction")
                
                # Search using Vector search without country filter - no limit, get all data
                vector_results = self.embedder.search_filtered_candidates(search_query, filtered_resume_ids)
                logger.info(f"Vector search returned {len(vector_results.get('result', [])) if vector_results else 0} results from filtered candidates")
                
                # Step 4: Merge and rank results
                final_candidates = self._merge_and_rank_results(filtered_keyword_results, vector_results.get('result', []), job_details)
                return final_candidates
            else:
                logger.info("No candidates met minimum quality threshold for specific analysis")
                return []
            
        except Exception as e:
            logger.error(f"Error searching candidates for specific analysis: {str(e)}")
            return []

    def _search_candidates(self, job_details: Dict[str, Any]) -> List[Dict[str, Any]]:
        """
        Search for candidates using both OpenSearch and Vector search with country filtering
        
        Args:
            job_details: Job details dictionary
            
        Returns:
            List of matching candidates with combined scores
        """
        try:
            # Create search query from job details
            search_query = self._build_search_query(job_details)
            logger.info(f"Search query built: '{search_query}'")
            
            # Get country filter and job title from job details
            country_header_code = job_details.get('country_header_code')
            job_title = job_details.get('job_title', '')
            
            if country_header_code:
                logger.info(f"Country filtering enabled for screening type 2: {country_header_code}")
            else:
                logger.warning("No country_header_code found - search will include all countries")
            
            if job_title:
                logger.info(f"Job title filtering enabled: '{job_title}' - will prioritize relevant candidates")
            
            # Step 1: Search using OpenSearch (keyword search) with country and job title filters
            if country_header_code:
                keyword_results = self.opensearch_service.search_with_country_filter(
                    search_query, 
                    country_header_code,
                    job_title=job_title  # Pass job title for relevance filtering
                )
                logger.info(f"OpenSearch with country filter ({country_header_code}) and job title ('{job_title}') returned {len(keyword_results) if keyword_results else 0} results")
            else:
                keyword_results = self.opensearch_service.search(search_query)
                logger.info(f"OpenSearch returned {len(keyword_results) if keyword_results else 0} results")
            
            # Log OpenSearch results to ai_search_logs collection
            self._log_search_results(
                job_emp_id=job_details.get('job_emp_id'),
                search_type="opensearch",
                results=keyword_results if keyword_results else []
            )
            
            # Step 2: Filter OpenSearch results by absolute minimum score
            absolute_min_score = float(os.getenv("JOB_MATCHING_ABSOLUTE_MIN_SCORE", 0.3))
            filtered_keyword_results = [r for r in keyword_results if r.get("keyword_score", 0) >= absolute_min_score] if keyword_results else []
            logger.info(f"OpenSearch results above {absolute_min_score} threshold: {len(filtered_keyword_results)}")
            
            # Step 3: Use filtered OpenSearch results for Vector search (double filtering)
            if filtered_keyword_results:
                # Extract resume IDs from filtered OpenSearch results
                filtered_resume_ids = [r.get("resume_id") for r in filtered_keyword_results if r.get("resume_id")]
                logger.info(f"Performing vector search on {len(filtered_resume_ids)} pre-filtered candidates")
                
                # Search using Vector search on filtered candidates with country filter
                vector_results = self.embedder.search_filtered_candidates(
                    search_query, 
                    filtered_resume_ids,
                    country_code=country_header_code
                )
                logger.info(f"Vector search returned {len(vector_results.get('result', [])) if vector_results else 0} results from {len(filtered_resume_ids)} pre-filtered candidates")
                
                # Log Vector search results to ai_search_logs collection
                self._log_search_results(
                    job_emp_id=job_details.get('job_emp_id'),
                    search_type="vector_search",
                    results=vector_results.get('result', []) if vector_results else []
                )
                
                # CRITICAL: Check if vector search has results - BOTH OpenSearch AND Vector must have results
                if not vector_results or not vector_results.get('result'):
                    logger.warning(f"Vector search returned no results - stopping candidate matching for job {job_details.get('job_emp_id')}")
                    logger.warning("BOTH OpenSearch and Vector search must return results for candidate matching")
                    return []  # Return empty list - no candidates will be processed
            else:
                logger.info("No candidates passed OpenSearch filtering, skipping vector search")
                vector_results = {"result": []}
                
                # Log empty vector search results
                self._log_search_results(
                    job_emp_id=job_details.get('job_emp_id'),
                    search_type="vector_search",
                    results=[]
                )
                
                # Return empty list since OpenSearch had no valid results
                logger.warning(f"No OpenSearch results - stopping candidate matching for job {job_details.get('job_emp_id')}")
                return []
            
            # Combine and filter results
            candidates = self._combine_and_filter_results(filtered_keyword_results, vector_results)
            
            logger.info(f"Found {len(candidates)} matching candidates for job: {job_details['job_title']}")
            return candidates
            
        except Exception as e:
            logger.error(f"Error searching candidates: {str(e)}")
            return []
    
    def _detect_job_category(self, job_title: str, job_desc: str) -> str:
        """
        Detect job category from job title and description to filter relevant candidates
        
        Args:
            job_title: Job title
            job_desc: Job description
            
        Returns:
            Category keywords string for filtering
        """
        job_title_lower = job_title.lower() if job_title else ""
        job_desc_lower = job_desc.lower() if job_desc else ""
        combined_text = f"{job_title_lower} {job_desc_lower[:500]}"
        
        category_keywords = []
        
        # IT/Software Development categories
        if any(keyword in combined_text for keyword in ['developer', 'programmer', 'software', 'python', 'java', 'javascript', 'react', 'angular', 'node.js', 'php', 'ruby', 'c++', 'c#', '.net', 'ios', 'android', 'flutter', 'kotlin', 'swift', 'backend', 'frontend', 'full stack', 'fullstack', 'mobile app', 'web development', 'software engineer', 'devops', 'cloud engineer', 'aws', 'azure', 'gcp']):
            category_keywords.extend(['software development', 'programming', 'coding', 'software engineer', 'developer', 'IT professional'])
            logger.info(f"Detected IT/Software Development job category")
        
        # Data Science/Analytics categories
        elif any(keyword in combined_text for keyword in ['data scientist', 'data analyst', 'machine learning', 'ai engineer', 'deep learning', 'nlp', 'data engineer', 'big data', 'analytics', 'tableau', 'power bi', 'tensorflow', 'pytorch', 'scikit-learn']):
            category_keywords.extend(['data science', 'data analysis', 'machine learning', 'analytics', 'data engineer'])
            logger.info(f"Detected Data Science/Analytics job category")
        
        # Sales/Business Development categories
        elif any(keyword in combined_text for keyword in ['sales', 'business development', 'account manager', 'sales manager', 'sales executive', 'sales representative', 'sales consultant', 'business analyst', 'crm', 'lead generation', 'client acquisition']):
            category_keywords.extend(['sales', 'business development', 'account management', 'client relationship', 'sales professional'])
            logger.info(f"Detected Sales/Business Development job category")
        
        # Marketing/Digital Marketing categories
        elif any(keyword in combined_text for keyword in ['marketing', 'digital marketing', 'seo', 'sem', 'content marketing', 'social media', 'brand manager', 'marketing manager', 'google ads', 'facebook ads', 'email marketing', 'growth hacking']):
            category_keywords.extend(['marketing', 'digital marketing', 'brand management', 'advertising', 'marketing professional'])
            logger.info(f"Detected Marketing/Digital Marketing job category")
        
        # HR/Recruitment categories
        elif any(keyword in combined_text for keyword in ['hr', 'human resource', 'recruiter', 'recruitment', 'talent acquisition', 'hr manager', 'hr executive', 'employee relations', 'payroll', 'benefits']):
            category_keywords.extend(['human resources', 'recruitment', 'talent acquisition', 'hr professional', 'people management'])
            logger.info(f"Detected HR/Recruitment job category")
        
        # Finance/Accounting categories
        elif any(keyword in combined_text for keyword in ['accountant', 'finance', 'accounting', 'financial analyst', 'auditor', 'bookkeeping', 'tax', 'cpa', 'chartered accountant', 'financial planning', 'budget']):
            category_keywords.extend(['finance', 'accounting', 'financial management', 'auditing', 'finance professional'])
            logger.info(f"Detected Finance/Accounting job category")
        
        # Design/Creative categories
        elif any(keyword in combined_text for keyword in ['designer', 'graphic design', 'ui/ux', 'ui designer', 'ux designer', 'web designer', 'creative', 'illustrator', 'photoshop', 'figma', 'sketch', 'adobe']):
            category_keywords.extend(['design', 'graphic design', 'ui/ux', 'creative professional', 'visual design'])
            logger.info(f"Detected Design/Creative job category")
        
        # Customer Service/Support categories
        elif any(keyword in combined_text for keyword in ['customer service', 'customer support', 'technical support', 'help desk', 'call center', 'customer care', 'support engineer', 'support specialist']):
            category_keywords.extend(['customer service', 'customer support', 'technical support', 'client support'])
            logger.info(f"Detected Customer Service/Support job category")
        
        # Operations/Logistics categories
        elif any(keyword in combined_text for keyword in ['operations', 'logistics', 'supply chain', 'warehouse', 'inventory', 'operations manager', 'logistics manager', 'procurement']):
            category_keywords.extend(['operations', 'logistics', 'supply chain', 'operations management'])
            logger.info(f"Detected Operations/Logistics job category")
        
        # Healthcare/Medical categories
        elif any(keyword in combined_text for keyword in ['doctor', 'nurse', 'physician', 'healthcare', 'medical', 'hospital', 'clinic', 'pharmacist', 'therapist', 'paramedic']):
            category_keywords.extend(['healthcare', 'medical', 'clinical', 'healthcare professional'])
            logger.info(f"Detected Healthcare/Medical job category")
        
        # Education/Teaching categories
        elif any(keyword in combined_text for keyword in ['teacher', 'instructor', 'professor', 'educator', 'trainer', 'tutor', 'academic', 'education', 'teaching']):
            category_keywords.extend(['education', 'teaching', 'training', 'academic', 'educator'])
            logger.info(f"Detected Education/Teaching job category")
        
        else:
            logger.info(f"No specific job category detected - will search broadly")
        
        return " ".join(category_keywords)
    
    def _build_search_query(self, job_details: Dict[str, Any]) -> str:
        """
        Build search query from job details with job category detection
        
        Args:
            job_details: Job details dictionary
            
        Returns:
            Combined search query string with category-specific keywords
        """
        query_parts = []
        
        # Add job title
        if job_details.get('job_title'):
            query_parts.append(job_details['job_title'])
        
        # Detect and add job category keywords for better filtering
        job_category_keywords = self._detect_job_category(
            job_details.get('job_title', ''),
            job_details.get('job_desc', '')
        )
        if job_category_keywords:
            query_parts.append(job_category_keywords)
        
        # Add skills
        if job_details.get('skill_list'):
            query_parts.append(job_details['skill_list'])
        
        # Add job description (first 200 characters)
        if job_details.get('job_desc'):
            job_desc = job_details['job_desc'][:200]
            query_parts.append(job_desc)
        
        return " ".join(query_parts)
    
    def _combine_and_filter_results(self, keyword_results: List[Dict], vector_results: Dict) -> List[Dict[str, Any]]:
        """
        Combine pre-filtered OpenSearch and Vector search results, apply percentage threshold
        
        Args:
            keyword_results: Pre-filtered results from OpenSearch (already above absolute minimum)
            vector_results: Results from Qdrant vector search on filtered candidates
            
        Returns:
            List of filtered candidates with combined scores
        """
        candidates = {}  # Use dict to avoid duplicates
        
        # Process OpenSearch results (store all for reference, but Vector DB will determine final candidates)
        if keyword_results:
            logger.info(f"Processing {len(keyword_results)} OpenSearch results for reference scores")
            
            max_keyword_score = max([r.get("keyword_score", 0) for r in keyword_results]) if keyword_results else 0
            logger.info(f"Max OpenSearch score: {max_keyword_score}")
            
            # Store ALL OpenSearch results without threshold - Vector DB will filter
            for result in keyword_results:
                resume_id = result.get("resume_id")
                score = result.get("keyword_score", 0)
                match_percentage = (score / max_keyword_score) * 100 if max_keyword_score > 0 else 0
                
                if resume_id:
                    candidates[resume_id] = {
                        "resume_id": resume_id,
                        "keyword_score": score,
                        "keyword_percentage": round(match_percentage, 2),
                        "vector_score": 0,
                        "vector_percentage": 0,
                        "combined_score": match_percentage
                    }
            
            logger.info(f"Stored {len(candidates)} OpenSearch candidates for Vector DB filtering")
        else:
            logger.info("No OpenSearch results received")
        
        # Process Vector search results - THESE ARE THE FINAL CANDIDATES that will be screened
        vector_data = vector_results.get("result", []) if vector_results else []
        if vector_data:
            logger.info(f"Processing {len(vector_data)} Vector DB results - THESE WILL BE FINAL SCREENING CANDIDATES")
            
            max_vector_score = max([r.get("score", 0) for r in vector_data])
            logger.info(f"Max vector score: {max_vector_score}")
            
            min_threshold = float(os.getenv("JOB_MATCHING_MIN_PERCENTAGE", 50.0))
            final_candidates = {}
            
            for result in vector_data:
                resume_id = result.get("payload", {}).get("resume_id")
                score = result.get("score", 0)
                # Calculate percentage based on max vector score
                vector_match_percentage = (score / max_vector_score) * 100 if max_vector_score > 0 else 0
                
                # Get OpenSearch data if exists
                if resume_id in candidates:
                    # Candidate has both OpenSearch and Vector scores
                    keyword_percentage = candidates[resume_id]["keyword_percentage"]
                    # Combined score is average of keyword and vector percentages
                    combined = round((keyword_percentage + vector_match_percentage) / 2, 2)
                    
                    # Apply 50% threshold on COMBINED score
                    if combined >= min_threshold:
                        final_candidates[resume_id] = {
                            "resume_id": resume_id,
                            "keyword_score": candidates[resume_id]["keyword_score"],
                            "keyword_percentage": keyword_percentage,
                            "vector_score": score,
                            "vector_percentage": round(vector_match_percentage, 2),
                            "combined_score": combined
                        }
                        logger.info(f"✓ Candidate {resume_id}: combined {combined}% (keyword {keyword_percentage}% + vector {round(vector_match_percentage, 2)}%)")
                    else:
                        logger.info(f"✗ Rejected {resume_id}: combined {combined}% below {min_threshold}% threshold")
                else:
                    # Vector result not in OpenSearch - include only if vector percentage meets threshold
                    logger.warning(f"Vector candidate {resume_id} not in OpenSearch - using vector score only")
                    if vector_match_percentage >= min_threshold:
                        final_candidates[resume_id] = {
                            "resume_id": resume_id,
                            "keyword_score": 0,
                            "keyword_percentage": 0,
                            "vector_score": score,
                            "vector_percentage": round(vector_match_percentage, 2),
                            "combined_score": round(vector_match_percentage, 2)
                        }
            
            # Replace candidates dict with only Vector DB filtered candidates
            candidates = final_candidates
            logger.info(f"✓✓✓ FINAL: {len(candidates)} candidates from Vector DB will proceed to screening ✓✓✓")
        else:
            logger.info("No vector results - clearing all candidates")
            candidates = {}  # No Vector DB results = No screening candidates
        
        # Convert to list and sort by combined score
        candidate_list = list(candidates.values())
        candidate_list.sort(key=lambda x: x["combined_score"], reverse=True)
        
        logger.info(f"After double filtering (absolute + {os.getenv('JOB_MATCHING_MIN_PERCENTAGE', '50.0')}%): {len(candidate_list)} candidates")
        return candidate_list
    
    def _generate_candidate_analysis(self, job_details: Dict[str, Any], candidate: Dict[str, Any]) -> Optional[Dict[str, Any]]:
        """
        Generate AI analysis for a candidate including strengths, weaknesses, and justification
        
        Args:
            job_details: Job details dictionary
            candidate: Candidate information with scores
            
        Returns:
            Analysis dictionary or None if failed
        """
        try:
            # Get detailed candidate data from database
            candidate_data = self._get_candidate_details(candidate["resume_id"])
            
            # If no database data available, return None (will use direct method)
            if not candidate_data:
                logger.info(f"No database candidate data found for {candidate['resume_id']}, will use direct analysis method")
                return None
            
            # Create analysis prompt
            prompt = self._create_analysis_prompt(job_details, candidate_data, candidate)
            
            # Get AI analysis
            system_prompt = "You are an expert HR analyst. Analyze the candidate's fit for the job and provide detailed, professional assessment."
            
            analysis_text = self.ai_service.ai_response(
                question=prompt,
                system_prompt=system_prompt,
                max_tokens=1000,
                company_id=job_details.get('company_id') if job_details else None,
                feature="screening"
            )
            
            # Parse AI response (logging removed for cleaner output)
            
            # Parse the AI response
            parsed_analysis = self._parse_ai_analysis(analysis_text, candidate["combined_score"])
            
            return parsed_analysis
            
        except Exception as e:
            logger.error(f"Error generating candidate analysis for {candidate.get('resume_id')}: {str(e)}")
            return None
    
    def _get_candidate_details(self, resume_id: str) -> Optional[Dict[str, Any]]:
        """
        Get detailed candidate information from MongoDB
        
        Args:
            resume_id: Resume/jobseeker ID
            
        Returns:
            Candidate details or None if not found
        """
        try:
            # Get complete jobseeker data using aggregation
            candidate_data = mongo_service.get_jobseeker_complete_data(resume_id)
            
            # If aggregation returns empty, try direct queries
            if not candidate_data or not candidate_data.get('basic_details'):
                logger.info(f"Aggregation returned empty for {resume_id}, trying direct queries")
                
                jobseeker_id_int = int(resume_id)
                
                # Get basic jobseeker info
                jobseeker = mongo_service.get_collection('jobseekers').find_one(
                    {"id": jobseeker_id_int}
                )
                
                if not jobseeker:
                    logger.warning(f"Jobseeker {resume_id} not found or inactive")
                    return None
                
                # Get basic details directly
                basic_details = mongo_service.get_collection('jobseeker_basic_details').find_one(
                    {"jobseeker_id": jobseeker_id_int}
                )
                
                # Get employment details
                employment_details = list(mongo_service.get_collection('jobseeker_employment_details').find(
                    {"jobseeker_id": jobseeker_id_int}
                ))
                
                # Get education details
                education_details = list(mongo_service.get_collection('jobseeker_education_details').find(
                    {"jobseeker_id": jobseeker_id_int}
                ))
                
                # Build candidate data structure
                candidate_data = {
                    'id': jobseeker_id_int,
                    'basic_details': basic_details or {},
                    'employment_details': employment_details,
                    'education_details': education_details,
                    'certification_details': [],
                    'course_details': [],
                    'project_details': [],
                    'email': jobseeker.get('email', ''),
                    'mobile_number': jobseeker.get('mobile_number', ''),
                    'country_id': jobseeker.get('country_id', ''),
                    'status': jobseeker.get('status', 1),
                    'indexed': jobseeker.get('indexed', 0),
                }
                
                logger.info(f"Retrieved data via direct queries for {resume_id}")
            
            return candidate_data
            
        except Exception as e:
            logger.error(f"Error getting candidate details for {resume_id}: {str(e)}")
            return None
    
    def _create_analysis_prompt(self, job_details: Dict[str, Any], candidate_data: Dict[str, Any], candidate_scores: Dict[str, Any]) -> str:
        """
        Create prompt for AI analysis
        
        Args:
            job_details: Job requirements
            candidate_data: Candidate information
            candidate_scores: Matching scores
            
        Returns:
            Formatted prompt string
        """
        try:
            # Extract candidate information safely
            basic_details = candidate_data.get("basic_details", {}) if candidate_data else {}
            employment_details = candidate_data.get("employment_details", []) if candidate_data else []
            education_details = candidate_data.get("education_details", []) if candidate_data else []
            
            # logger.info(f"Creating prompt - Basic details keys: {list(basic_details.keys()) if basic_details else 'None'}")
            # logger.info(f"Creating prompt - Employment count: {len(employment_details) if employment_details else 0}")
            # logger.info(f"Creating prompt - Education count: {len(education_details) if education_details else 0}")
            
            # Format candidate experience safely
            experience_text = ""
            if employment_details and isinstance(employment_details, list):
                for emp in employment_details[:3]:  # Top 3 experiences
                    if isinstance(emp, dict) and emp.get("role") and emp.get("company"):
                        experience_text += f"- {emp['role']} at {emp['company']}\n"
                    elif isinstance(emp, dict) and emp.get("designation") and emp.get("company_name"):
                        experience_text += f"- {emp['designation']} at {emp['company_name']}\n"
            
            # Format candidate education safely
            education_text = ""
            if education_details and isinstance(education_details, list):
                for edu in education_details[:2]:  # Top 2 educations
                    if isinstance(edu, dict) and edu.get("degree") and edu.get("institution"):
                        education_text += f"- {edu['degree']} from {edu['institution']}\n"
            
            # Safe access to job details
            job_title = job_details.get('job_title', 'N/A') if job_details else 'N/A'
            skill_list = job_details.get('skill_list', 'N/A') if job_details else 'N/A'
            job_type = job_details.get('job_type', 'N/A') if job_details else 'N/A'
            workplace_type = job_details.get('workplace_type', 'N/A') if job_details else 'N/A'
            country_name = job_details.get('country_name', 'N/A') if job_details else 'N/A'
            job_desc = job_details.get('job_desc', 'N/A') if job_details else 'N/A'
            
            # Safe access to candidate scores
            combined_score = candidate_scores.get('combined_score', 0) if candidate_scores else 0
            
            # Get candidate's current designation for job title comparison
            candidate_current_designation = basic_details.get('current_desigation', '') or basic_details.get('current_designation', 'N/A')
            
            # Calculate total experience years from employment details
            total_experience_years = 0
            if employment_details and isinstance(employment_details, list):
                for emp in employment_details:
                    if isinstance(emp, dict):
                        # Try to extract duration information
                        duration = emp.get('job_experience_duration', 0)
                        if isinstance(duration, (int, float)):
                            total_experience_years += duration
                        elif isinstance(duration, str) and duration.replace('.', '').isdigit():
                            total_experience_years += float(duration)
                        
                        # Build detailed experience text with responsibilities
                        company = emp.get('company_name', emp.get('company', 'N/A'))
                        designation = emp.get('designation', emp.get('role', 'N/A'))
                        job_desc = emp.get('job_desc', emp.get('job_description', 'No description'))[:200]
                        
                        experience_text += f"• {designation} at {company}\n  Responsibilities: {job_desc}...\n\n"
            
            # Extract ALL skills mentioned in candidate's CV for verification
            candidate_skills_list = []
            
            # From key_skills field
            key_skills = basic_details.get('key_skills', '')
            if key_skills:
                if isinstance(key_skills, list):
                    candidate_skills_list.extend([str(s).strip() for s in key_skills if s])
                elif isinstance(key_skills, str):
                    candidate_skills_list.extend([s.strip() for s in key_skills.split(',') if s.strip()])
            
            # Extract from experience descriptions - extract ALL words from job descriptions
            if employment_details and isinstance(employment_details, list):
                for emp in employment_details:
                    if isinstance(emp, dict):
                        # Get designation
                        designation = emp.get('designation', '') or emp.get('role', '')
                        if designation:
                            candidate_skills_list.append(designation)
                        
                        # Get job description text and extract ALL mentioned skills/technologies
                        job_desc_text = emp.get('job_desc', '') or emp.get('job_description', '')
                        
                        # Handle string, list, and nested list formats
                        if isinstance(job_desc_text, list):
                            # Flatten nested lists recursively
                            flat_list = []
                            for item in job_desc_text:
                                if isinstance(item, list):
                                    flat_list.extend([str(subitem) for subitem in item if subitem])
                                else:
                                    flat_list.append(str(item))
                            job_desc_text = ' '.join(flat_list)
                        
                        if isinstance(job_desc_text, str) and job_desc_text:
                            # Extract ALL capitalized words and technical patterns (no static list)
                            # This will capture: Python, Laravel, React.js, Node.js, etc. from actual text
                            words = job_desc_text.replace('(', ' ').replace(')', ' ').replace(',', ' ').split()
                            for word in words:
                                word = word.strip()
                                # Capture capitalized words or words with dots (React.js, Node.js)
                                if word and (word[0].isupper() or '.' in word):
                                    # Remove trailing punctuation
                                    word = word.rstrip('.,;:!?')
                                    if len(word) > 2:  # Skip very short words
                                        candidate_skills_list.append(word)
            
            # Remove duplicates and create comma-separated string
            candidate_skills_list = list(set(candidate_skills_list))
            candidate_skills_str = ", ".join(candidate_skills_list) if candidate_skills_list else "No specific technical skills listed"
            
            # Enhanced prompt with line-by-line comparison approach - scoring based on job description only
            prompt = f"""
Analyze this candidate's CV against the job post below.

Job Post:
Position: {job_title}
Description: {job_desc if job_desc != 'N/A' else 'N/A'}

Candidate CV:
Name: {basic_details.get('first_name', '')} {basic_details.get('last_name', '')}
Current Designation: {candidate_current_designation}
Total Experience: ~{total_experience_years} years
Key Skills: {basic_details.get('key_skills', 'N/A')}
Profile Summary: {basic_details.get('profile_summary', 'N/A')[:300] if basic_details.get('profile_summary') else 'N/A'}

**CANDIDATE'S COMPLETE TECHNICAL SKILLS LIST (Extracted from entire CV):**
{candidate_skills_str}

Work Experience:
{experience_text if experience_text else 'No detailed experience data available'}

Education:
{education_text if education_text else 'No education data available'}

**CRITICAL INSTRUCTION - READ CAREFULLY:**

Step 1: Identify the PRIMARY TECHNICAL SKILL required for this role from the job title and description.
- For "Python Developer" → Python is MANDATORY
- For "Java Developer" → Java is MANDATORY  
- For "React Developer" → React is MANDATORY
- For "Sales Manager" → Sales experience is MANDATORY
- And so on...

Step 2: Check if candidate HAS this primary technical skill in their CV.
**IMPORTANT: Search for the EXACT WORD in the "CANDIDATE'S COMPLETE TECHNICAL SKILLS LIST" provided above.**

**EXACT MATCHING RULES (MANDATORY):**
1. Search for the EXACT skill name (e.g., "Python") in the skills list
2. "Node.js" does NOT contain "Python" - they are completely different
3. "JavaScript" does NOT contain "Java" - they are completely different
4. "React.js" is NOT "React Native" - they are different
5. Job title "Node.js Developer" does NOT mean the person knows Python
6. Backend experience does NOT automatically mean Python knowledge
7. "SQL" or "PL/SQL" does NOT mean "Python" - completely different languages

**HOW TO CHECK:**
- Read the skills list: "{candidate_skills_str}"
- Search for the PRIMARY SKILL word (e.g., "Python") in this exact list
- If "Python" appears as a separate word → FOUND
- If "Python" does NOT appear → NOT FOUND (even if Node.js, JavaScript, Java, SQL appear)
- DO NOT infer, DO NOT assume, DO NOT hallucinate

Step 3: Calculate EXACT score using this formula:

**If PRIMARY SKILL MISSING:**
- Maximum score = 35%
- Calculate based on: (Transferable skills × 20%) + (Education relevance × 15%)
- Example: Good education + some relevant experience = 25-30%
- Example: No relevant background at all = 0-15%

**If PRIMARY SKILL PRESENT:**
Base Score Calculation:
A. PRIMARY SKILL presence = 40 points (mandatory)
B. Experience with PRIMARY SKILL:
   - 0-1 years = +5 points (Total: 45%)
   - 1-2 years = +10 points (Total: 50%)
   - 2-3 years = +15 points (Total: 55%)
   - 3-5 years = +20 points (Total: 60%)
   - 5-7 years = +25 points (Total: 65%)
   - 7-10 years = +30 points (Total: 70%)
   - 10+ years = +35 points (Total: 75%)

C. SECONDARY SKILLS match (calculate percentage):
   - Count how many required secondary skills candidate has
   - 0-25% match = +0 points
   - 25-50% match = +5 points
   - 50-75% match = +10 points
   - 75-100% match = +15 points

D. EDUCATION relevance:
   - Relevant degree (CS, IT, Engineering) = +5 points
   - Other degree or no degree = +0 points

E. PROJECT/WORK quality:
   - Strong relevant projects mentioned = +5 points
   - Basic/no relevant projects = +0 points

**Final Score = A + B + C + D + E** (Maximum 100%)

**CRITICAL:** Each candidate MUST get different scores based on their actual experience years and secondary skills match. DO NOT give everyone the same score!

**COMMON MISTAKES TO AVOID:**
 WRONG: "Python - FOUND (Node.js Developer)" - Node.js is NOT Python
 WRONG: "Python - FOUND (Backend Developer)" - Backend ≠ Python
 WRONG: "Python - FOUND (JavaScript, Node.js)" - Neither is Python
 CORRECT: "Python - NOT FOUND (skills: Node.js, JavaScript, React.js - NO Python)"
 CORRECT: "Python - FOUND (appears as: Python, Django, Flask)"

**SCORING RULES (MANDATORY - DO NOT VIOLATE):**
1. If PRIMARY TECHNICAL SKILL is NOT in "CANDIDATE'S COMPLETE TECHNICAL SKILLS LIST" → MAXIMUM score = 35%
2. DO NOT give credit for "transferable skills" or "similar technologies" when primary skill is missing
3. DO NOT assume candidate can learn the skill - score based on CURRENT skills only
4. Node.js experience does NOT qualify for Python roles (SQL ≠ Python, PL/SQL ≠ Python)
5. Backend experience does NOT qualify for Frontend roles (and vice versa)
6. Sales experience does NOT qualify for IT roles (and vice versa)
7. DO NOT hallucinate - if skill is not in the list, it is NOT FOUND

Compare the CV against the Job Description:
1. **PRIMARY TECHNICAL SKILL** - Does candidate have the exact skill mentioned in job title/description?
2. Secondary technical skills and tools
3. Years of relevant experience  
4. Industry/domain knowledge
5. Education and certifications

**OUTPUT FORMAT - YOU MUST USE THIS EXACT STRUCTURE:**

MATCH_SCORE: [Write only the number, e.g., 75% or 20%]

MATCH_JUSTIFICATION:
PRIMARY SKILL CHECK: [Skill Name] - [FOUND/NOT FOUND]. [Explain the score calculation in 2-3 sentences. For FOUND: Base score 40 + Experience points + Secondary skills match + Education + Projects = Final score. For NOT FOUND: Explain why max 35% and how you calculated the actual score based on transferable skills and education.]

STRENGTHS:
[Point 1 - Specific achievement or skill]
[Point 2 - Specific achievement or skill]
[Point 3 - Specific achievement or skill]
[Point 4 - Specific achievement or skill]

WEAKNESSES:
[Point 1 - If primary skill missing, write: "Missing [Skill] - the primary technical requirement for this role"]
[Point 2 - Specific gap or concern]
[Point 3 - Specific gap or concern]
[Point 4 - Specific gap or concern]

TOP_KEYWORDS:
[Keyword1, Keyword2, Keyword3, Keyword4, Keyword5]

**EXAMPLE OUTPUT FOR 80% MATCH:**
MATCH_SCORE: 80%

MATCH_JUSTIFICATION:
PRIMARY SKILL CHECK: Python - FOUND. Base score: 40 points for Python presence. Experience: 5 years of Python development = +25 points. Secondary skills: Django and Flask present (60% match) = +10 points. Education: B.Tech CS = +5 points. Strong projects mentioned = +0. Final Score: 80%.

STRENGTHS:
Strong Python development experience with 5+ years
Expertise in Django and Flask frameworks
Relevant Computer Science degree
Good understanding of web development concepts

WEAKNESSES:
Limited experience with React.js mentioned in job requirements
No AWS cloud experience mentioned
Could benefit from more DevOps knowledge
Missing Kubernetes expertise

TOP_KEYWORDS:
Python, Django, Flask, Web Development, Backend

**EXAMPLE OUTPUT FOR 25% MATCH (Primary Skill Missing):**
MATCH_SCORE: 25%

MATCH_JUSTIFICATION:
PRIMARY SKILL CHECK: PHP - NOT FOUND. The candidate has customer service and hospitality experience but lacks the primary technical requirement (PHP). Maximum possible score is 35%. Transferable skills (team leadership, customer service) = 20%. No relevant technical education = 0%. Final Score: 20%.

STRENGTHS:
Strong customer service background
Excellent team leadership abilities
Experience in upscale hospitality settings
Good communication and interpersonal skills

WEAKNESSES:
Missing PHP - the primary technical requirement for this role
No programming or technical development experience
No relevant IT or Computer Science education
Career background in hospitality, not software development

TOP_KEYWORDS:
Customer Service, Team Leadership, Hospitality, Bartending, Communication

**CRITICAL REMINDERS:**
- MATCH_SCORE must be on its own line with the header "MATCH_SCORE:"
- MATCH_JUSTIFICATION must be on its own line with the header "MATCH_JUSTIFICATION:"
- STRENGTHS, WEAKNESSES, TOP_KEYWORDS must have their section headers
- If primary skill is MISSING, maximum score is 35%
"""
            return prompt
            
        except Exception as e:
            logger.error(f"Error creating analysis prompt: {str(e)}")
            logger.error(f"Job details: {job_details}")
            logger.error(f"Candidate data: {candidate_data}")
            logger.error(f"Candidate scores: {candidate_scores}")
            raise e
    
    def _parse_ai_analysis(self, analysis_text: str, fallback_percentage: float) -> Dict[str, Any]:
        """
        Parse AI analysis response into structured data
        
        Args:
            analysis_text: Raw AI response
            fallback_percentage: Fallback percentage if parsing fails
            
        Returns:
            Structured analysis dictionary
        """
        try:
            analysis = {
                "strengths": [],
                "weaknesses": [],
                "matching_percentage": fallback_percentage,
                "match_justification": "",
                "top_keywords": []
            }
            
            # Split into sections
            sections = analysis_text.split('\n\n')
            
            for section in sections:
                section = section.strip()
                
                if section.startswith('STRENGTHS:'):
                    strengths_text = section.replace('STRENGTHS:', '').strip()
                    # Convert to array by splitting on line breaks and filtering empty lines
                    strengths_list = [s.strip().lstrip('-•[]').strip() for s in strengths_text.split('\n') if s.strip() and not s.strip().startswith('[') and not s.strip().endswith(']')]
                    analysis["strengths"] = strengths_list
                elif section.startswith('WEAKNESSES:'):
                    weaknesses_text = section.replace('WEAKNESSES:', '').strip()
                    # Convert to array by splitting on line breaks and filtering empty lines
                    weaknesses_list = [w.strip().lstrip('-•[]').strip() for w in weaknesses_text.split('\n') if w.strip() and not w.strip().startswith('[') and not w.strip().endswith(']')]
                    analysis["weaknesses"] = weaknesses_list
                elif section.startswith('MATCH_SCORE:') or section.startswith('MATCHING_PERCENTAGE:'):
                    # Support both old and new format
                    percentage_text = section.replace('MATCH_SCORE:', '').replace('MATCHING_PERCENTAGE:', '').strip()
                    # Extract number from text (including decimals)
                    import re
                    numbers = re.findall(r'\d+\.?\d*', percentage_text)
                    if numbers:
                        analysis["matching_percentage"] = min(100.0, max(0.0, float(numbers[0])))  # Allow 0-100 range as float
                elif section.startswith('MATCH_JUSTIFICATION:'):
                    analysis["match_justification"] = section.replace('MATCH_JUSTIFICATION:', '').strip()
                elif section.startswith('TOP_KEYWORDS:'):
                    keywords_text = section.replace('TOP_KEYWORDS:', '').strip()
                    # Parse keywords - can be comma-separated on one line or multiple lines
                    if ',' in keywords_text:
                        # Comma-separated format: "Keyword1, Keyword2, Keyword3"
                        keywords = [k.strip().lstrip('-•[]').strip() for k in keywords_text.split(',') if k.strip()]
                    else:
                        # Line-separated format
                        keywords = [k.strip().lstrip('-•[]').strip() for k in keywords_text.split('\n') if k.strip() and not k.strip().startswith('[') and not k.strip().endswith(']')]
                    
                    # Take only first 5 keywords
                    analysis["top_keywords"] = keywords[:5] if keywords else []
            
            # Fallback: Try to find keywords in single-line format if TOP_KEYWORDS section not found
            if not analysis["top_keywords"]:
                # Check for single line format without section break
                for line in analysis_text.split('\n'):
                    line = line.strip()
                    if line.startswith('TOP_KEYWORDS:'):
                        keywords_text = line.replace('TOP_KEYWORDS:', '').strip()
                        if ',' in keywords_text:
                            keywords = [k.strip().lstrip('-•[]').strip() for k in keywords_text.split(',') if k.strip()]
                            analysis["top_keywords"] = keywords[:5] if keywords else []
                        break
            
            # Only return analysis if all required fields have meaningful content
            if (not analysis["strengths"] or 
                not analysis["weaknesses"] or 
                not analysis["match_justification"] or
                len(analysis["strengths"]) == 0 or
                len(analysis["weaknesses"]) == 0 or
                analysis["match_justification"].strip() == ""):
                logger.warning("AI analysis incomplete - skipping candidate")
                logger.warning(f"Parsed analysis state: strengths={len(analysis['strengths'])} items, weaknesses={len(analysis['weaknesses'])} items, match_justification={'empty' if not analysis['match_justification'].strip() else 'present'}")
                logger.warning(f"Analysis content: {analysis}")
                return None
            
            return analysis
            
        except Exception as e:
            logger.error(f"Error parsing AI analysis: {str(e)}")
            # Return None instead of fallback data - let the candidate be skipped
            return None
    
    def _store_screening_result(self, job_emp_id: str, candidate: Dict[str, Any], analysis: Dict[str, Any], job_details: Dict[str, Any] = None):
        """
        Store screening result in MongoDB screenings collection
        Check for duplicates before inserting
        
        Args:
            job_emp_id: Job employer ID
            candidate: Candidate information
            analysis: AI analysis result
            job_details: Complete job details dictionary
        """
        try:
            # Get database connection
            from services.database import get_database
            db = get_database()
            screenings_collection = db['screenings']
            
            # Check if screening already exists for this job + candidate + type=2
            existing_screening = screenings_collection.find_one({
                "job_id": job_emp_id,
                "jobseeker_id": int(candidate["resume_id"]),
                "type": 2
            })
            
            if existing_screening:
                # Check if we should update the existing screening
                # Update if the new matching_percentage is different by more than 2%
                existing_percentage = existing_screening.get("matching_percentage", 0)
                new_percentage = round(analysis["matching_percentage"], 2)
                
                if abs(existing_percentage - new_percentage) > 2:
                    logger.info(f"Updating screening for candidate {candidate['resume_id']} - score changed from {existing_percentage}% to {new_percentage}%")
                    # Update the screening with new analysis
                    screenings_collection.update_one(
                        {"_id": existing_screening["_id"]},
                        {
                            "$set": {
                                "matching_percentage": new_percentage,
                                "strengths": analysis["strengths"],
                                "weaknesses": analysis["weaknesses"],
                                "match_justification": analysis["match_justification"],
                                "top_keywords": analysis.get("top_keywords", []),
                                "updated_at": datetime.utcnow()
                            }
                        }
                    )
                else:
                    logger.info(f"Screening already exists for candidate {candidate['resume_id']} and job {job_emp_id}, skipping (score within 2% threshold)")
                return True
            
            # Get candidate details for jobseeker_details array - store ALL available data
            candidate_data = self._get_candidate_details(candidate["resume_id"])
            jobseeker_details = []
            
            if candidate_data:
                # Extract ALL details for comprehensive jobseeker_details storage
                basic_details = candidate_data.get("basic_details", {})
                employment_details = candidate_data.get("employment_details", [])
                education_details = candidate_data.get("education_details", [])
                certification_details = candidate_data.get("certification_details", [])
                course_details = candidate_data.get("course_details", [])
                project_details = candidate_data.get("project_details", [])
                language_details = candidate_data.get("language_details", [])
                hobby_details = candidate_data.get("hobby_details", [])
                reference_details = candidate_data.get("reference_details", [])
                portfolio_details = candidate_data.get("portfolio_details", [])
                social_media_details = candidate_data.get("social_media_details", [])
                public_profiles = candidate_data.get("public_profiles", [])  # Get public profiles
                
                # Format comprehensive basic_details with ALL available information
                basic_details_array = []
                if basic_details:
                    # Include ALL fields from basic_details - comprehensive storage
                    basic_detail = {
                        # Personal Information
                        "name": f"{basic_details.get('first_name', '')} {basic_details.get('last_name', '')}".strip(),
                        "first_name": basic_details.get('first_name', ''),
                        "last_name": basic_details.get('last_name', ''),
                        "email": candidate_data.get('email', '') or basic_details.get('email', ''),
                        "phone": candidate_data.get('mobile_number', '') or basic_details.get('phone', ''),
                        "mobile_number": candidate_data.get('mobile_number', ''),
                        "birth_date": basic_details.get('birth_date', ''),
                        "gender_id": basic_details.get('gender_id', ''),
                        "marital_status": basic_details.get('marital_status', ''),
                        "nationality_id": basic_details.get('nationality_id', ''),
                        "religion_id": basic_details.get('religion_id', ''),
                        
                        # Professional Information
                        "current_designation": basic_details.get('current_desigation', '') or basic_details.get('current_designation', ''),
                        "current_company": basic_details.get('current_company', ''),
                        "total_experience_year": basic_details.get('total_experience_year', ''),
                        "total_experience_month": basic_details.get('total_experience_month', ''),
                        "total_experience": f"{basic_details.get('total_experience_year', '')} {basic_details.get('total_experience_month', '')}".strip(),
                        "industry_id": basic_details.get('industry_id', ''),
                        "functional_area_id": basic_details.get('functional_area_id', ''),
                        "profile_type_id": basic_details.get('profile_type_id', ''),
                        
                        # Skills and Profile
                        "key_skills": basic_details.get('key_skills', '').split(',') if basic_details.get('key_skills') else [],
                        "key_skills_id": basic_details.get('key_skills_id', []),
                        "profile_summary": basic_details.get('profile_summary', ''),
                        "cv_headline": basic_details.get('cv_headline', ''),
                        
                        # Salary Information
                        "current_salary": basic_details.get('current_salary', ''),
                        "current_salary_period": basic_details.get('current_salary_period', ''),
                        "current_salary_currency": basic_details.get('current_salary_currency', ''),
                        "expected_salary": basic_details.get('expected_salary', ''),
                        "expected_salary_period": basic_details.get('expected_salary_period', ''),
                        "expected_salary_currency": basic_details.get('expected_salary_currency', ''),
                        
                        # Location Information
                        "current_location": basic_details.get('current_location', ''),
                        "prefered_location": basic_details.get('prefered_location', ''),
                        "country_id": basic_details.get('country_id', ''),
                        "com_address": basic_details.get('com_address', ''),
                        
                        # Visa and Legal Status
                        "visa_status_id": basic_details.get('visa_status_id', ''),
                        "visa_residence_location": basic_details.get('visa_residence_location', ''),
                        "visa_validity_month": basic_details.get('visa_validity_month', ''),
                        "visa_validity_year": basic_details.get('visa_validity_year', ''),
                        "license_opt": basic_details.get('license_opt', ''),
                        
                        # System Information
                        "profile_completion": basic_details.get('profile_completion', ''),
                        "profile_updated_date": basic_details.get('profile_updated_date', ''),
                        "system_original_resume": basic_details.get('system_original_resume', ''),
                        "system_resume_file": basic_details.get('system_resume_file', ''),
                        "original_file_name": basic_details.get('original_file_name', ''),
                        "resume_file": basic_details.get('resume_file', ''),
                        "resume_added_date": basic_details.get('resume_added_date', ''),
                        "profile_pic": basic_details.get('profile_pic', ''),
                        
                        # Language Information
                        "known_language_id": basic_details.get('known_language_id', []),
                        "known_languages": basic_details.get('known_languages', ''),
                        
                        # Additional Profile Data
                        "cv_push": basic_details.get('cv_push', ''),
                        "cv_push_id": basic_details.get('cv_push_id', ''),
                        "cv_download_count": basic_details.get('cv_download_count', ''),
                        "applied_job_count": basic_details.get('applied_job_count', ''),
                        "connection_count": basic_details.get('connection_count', ''),
                        "profile_upload": basic_details.get('profile_upload', ''),
                        "resume_upload": basic_details.get('resume_upload', ''),
                        "es_push": basic_details.get('es_push', ''),
                        "status": basic_details.get('status', ''),
                        "is_deleted": basic_details.get('is_deleted', ''),
                        "created_at": basic_details.get('created_at', ''),
                        "updated_at": basic_details.get('updated_at', ''),
                        "id": basic_details.get('id', '')
                    }
                    basic_details_array.append(basic_detail)
                
                # Store comprehensive jobseeker details with ALL available information
                jobseeker_detail = {
                    "basic_details": basic_details_array,
                    "experience": employment_details if employment_details else [],  # ALL employment details
                    "education": education_details if education_details else [],   # ALL education details
                    "certifications": certification_details if certification_details else [],  # ALL certifications
                    "courses": course_details if course_details else [],  # ALL courses
                    "projects": project_details if project_details else [],  # ALL projects
                    "languages": language_details if language_details else [],  # ALL languages
                    "hobbies": hobby_details if hobby_details else [],  # ALL hobbies
                    "references": reference_details if reference_details else [],  # ALL references
                    "portfolio": portfolio_details if portfolio_details else [],  # ALL portfolio items
                    "social_media": social_media_details if social_media_details else [],  # ALL social media
                    "jobseeker_public_profiles": public_profiles if public_profiles else [],  # Public profiles (type 1,2: if data exists else [], type 3: always [])
                    
                    # Additional top-level candidate information
                    "email": candidate_data.get('email', ''),
                    "mobile_number": candidate_data.get('mobile_number', ''),
                    "country_name": candidate_data.get('country_name', ''),
                    "country_header_code": candidate_data.get('country_header_code', ''),
                    "status": candidate_data.get('status', ''),
                    "indexed": candidate_data.get('indexed', ''),
                    "country_id": candidate_data.get('country_id', ''),
                    "is_subscribed": candidate_data.get('is_subscribed', 0),  # Add is_subscribed from jobseekers table
                    
                    # Complete candidate profile data (if available)
                    "additional_info": {
                        k: v for k, v in candidate_data.items() 
                        if k not in ['basic_details', 'employment_details', 'education_details', 
                                   'certification_details', 'course_details', 'project_details',
                                   'language_details', 'hobby_details', 'reference_details',
                                   'portfolio_details', 'social_media_details']
                    }
                }
                jobseeker_details.append(jobseeker_detail)
            
            # Create new screening data with jobseeker_details
            screening_data = {
                "job_id": job_emp_id,
                "jobseeker_id": int(candidate["resume_id"]),  # Store as integer
                "type": 2,  # As specified - integer type
                "status": 1,
                "strengths": analysis["strengths"],  # Now stored as array
                "weaknesses": analysis["weaknesses"],  # Now stored as array
                "matching_percentage": round(analysis["matching_percentage"], 2),  # Round to 2 decimal places
                "match_justification": analysis["match_justification"],
                "top_keywords": analysis.get("top_keywords", []),  # Add top keywords array
                "jobseeker_details": jobseeker_details,  # Add jobseeker_details for type 2
                "job_details": job_details,  # Add complete job details
                "company_id": job_details.get('company_id') if job_details else None,  # Add company_id
                "created_at": datetime.utcnow(),
                "processed_at": datetime.utcnow()
            }
            
            # Insert new screening record
            result = screenings_collection.insert_one(screening_data)
            
            logger.info(f"Successfully stored new screening result for candidate {candidate['resume_id']} and job {job_emp_id}")
            return True
            
        except Exception as e:
            logger.error(f"Error storing screening result: {str(e)}")
            return False
    
    def _ensure_jobseeker_indexed(self, jobseeker_id: str) -> bool:
        """
        Ensure jobseeker is indexed in OpenSearch, index if needed
        
        Args:
            jobseeker_id: Jobseeker ID to check/index
            
        Returns:
            bool: True if indexed successfully, False otherwise
        """
        try:
            # Check if already indexed
            jobseeker_id_int = int(jobseeker_id)
            jobseeker = mongo_service.get_collection('jobseekers').find_one(
                {"id": jobseeker_id_int, "status": 1}
            )
            
            if not jobseeker:
                logger.warning(f"Jobseeker {jobseeker_id} not found or inactive")
                return False
            
            # Check if already indexed
            if jobseeker.get("indexed") == 1:
                logger.info(f"Jobseeker {jobseeker_id} already indexed")
                return True
            
            # Index the jobseeker
            success = self.opensearch_service.add_resume(jobseeker_id)
            if success:
                logger.info(f"Successfully indexed jobseeker {jobseeker_id}")
                return True
            else:
                logger.error(f"Failed to index jobseeker {jobseeker_id}")
                return False
                
        except Exception as e:
            logger.error(f"Error ensuring jobseeker {jobseeker_id} is indexed: {str(e)}")
            return False
    
    def _store_specific_screening_result(self, job_emp_id: str, candidate: Dict[str, Any], analysis: Dict[str, Any], job_details: Dict[str, Any] = None):
        """
        Store screening result for specific candidate analysis with type=1
        Check for duplicates before inserting
        
        Args:
            job_emp_id: Job employer ID
            candidate: Candidate information
            analysis: AI analysis result
        """
        try:
            # Get database connection
            from services.database import get_database
            db = get_database()
            screenings_collection = db['screenings']
            
            # Check if screening already exists for this job + candidate + type=1
            existing_screening = screenings_collection.find_one({
                "job_id": job_emp_id,
                "jobseeker_id": int(candidate["resume_id"]),
                "type": 1
            })
            
            if existing_screening:
                # Check if we should update the existing screening
                # Update if the new matching_percentage is different by more than 2%
                existing_percentage = existing_screening.get("matching_percentage", 0)
                new_percentage = round(analysis["matching_percentage"], 2)
                
                if abs(existing_percentage - new_percentage) > 2:
                    logger.info(f"Updating type 1 screening for candidate {candidate['resume_id']} - score changed from {existing_percentage}% to {new_percentage}%")
                    # Update the screening with new analysis
                    screenings_collection.update_one(
                        {"_id": existing_screening["_id"]},
                        {
                            "$set": {
                                "matching_percentage": new_percentage,
                                "strengths": analysis["strengths"],
                                "weaknesses": analysis["weaknesses"],
                                "match_justification": analysis["match_justification"],
                                "top_keywords": analysis.get("top_keywords", []),
                                "updated_at": datetime.utcnow()
                            }
                        }
                    )
                else:
                    logger.info(f"Type 1 screening already exists for candidate {candidate['resume_id']} and job {job_emp_id}, skipping (score within 2% threshold)")
                return True
            
            # Get candidate details for jobseeker_details array - store ALL available data for type 1
            candidate_data = self._get_candidate_details(candidate["resume_id"])
            jobseeker_details = []
            
            if candidate_data:
                # Extract ALL details for comprehensive jobseeker_details storage (type 1)
                basic_details = candidate_data.get("basic_details", {})
                employment_details = candidate_data.get("employment_details", [])
                education_details = candidate_data.get("education_details", [])
                certification_details = candidate_data.get("certification_details", [])
                course_details = candidate_data.get("course_details", [])
                project_details = candidate_data.get("project_details", [])
                language_details = candidate_data.get("language_details", [])
                hobby_details = candidate_data.get("hobby_details", [])
                reference_details = candidate_data.get("reference_details", [])
                portfolio_details = candidate_data.get("portfolio_details", [])
                social_media_details = candidate_data.get("social_media_details", [])
                public_profiles = candidate_data.get("public_profiles", [])  # Get public profiles for type 1
                
                # Format comprehensive basic_details with ALL available information for type 1
                basic_details_array = []
                if basic_details:
                    # Include ALL fields from basic_details - comprehensive storage
                    basic_detail = {
                        # Personal Information
                        "name": f"{basic_details.get('first_name', '')} {basic_details.get('last_name', '')}".strip(),
                        "first_name": basic_details.get('first_name', ''),
                        "last_name": basic_details.get('last_name', ''),
                        "email": candidate_data.get('email', '') or basic_details.get('email', ''),
                        "phone": candidate_data.get('mobile_number', '') or basic_details.get('phone', ''),
                        "mobile_number": candidate_data.get('mobile_number', ''),
                        "birth_date": basic_details.get('birth_date', ''),
                        "gender_id": basic_details.get('gender_id', ''),
                        "marital_status": basic_details.get('marital_status', ''),
                        "nationality_id": basic_details.get('nationality_id', ''),
                        "religion_id": basic_details.get('religion_id', ''),
                        
                        # Professional Information
                        "current_designation": basic_details.get('current_desigation', '') or basic_details.get('current_designation', ''),
                        "current_company": basic_details.get('current_company', ''),
                        "total_experience_year": basic_details.get('total_experience_year', ''),
                        "total_experience_month": basic_details.get('total_experience_month', ''),
                        "total_experience": f"{basic_details.get('total_experience_year', '')} {basic_details.get('total_experience_month', '')}".strip(),
                        "industry_id": basic_details.get('industry_id', ''),
                        "functional_area_id": basic_details.get('functional_area_id', ''),
                        "profile_type_id": basic_details.get('profile_type_id', ''),
                        
                        # Skills and Profile
                        "key_skills": basic_details.get('key_skills', '').split(',') if basic_details.get('key_skills') else [],
                        "key_skills_id": basic_details.get('key_skills_id', []),
                        "profile_summary": basic_details.get('profile_summary', ''),
                        "cv_headline": basic_details.get('cv_headline', ''),
                        
                        # Salary Information
                        "current_salary": basic_details.get('current_salary', ''),
                        "current_salary_period": basic_details.get('current_salary_period', ''),
                        "current_salary_currency": basic_details.get('current_salary_currency', ''),
                        "expected_salary": basic_details.get('expected_salary', ''),
                        "expected_salary_period": basic_details.get('expected_salary_period', ''),
                        "expected_salary_currency": basic_details.get('expected_salary_currency', ''),
                        
                        # Location Information
                        "current_location": basic_details.get('current_location', ''),
                        "prefered_location": basic_details.get('prefered_location', ''),
                        "country_id": basic_details.get('country_id', ''),
                        "com_address": basic_details.get('com_address', ''),
                        
                        # Visa and Legal Status
                        "visa_status_id": basic_details.get('visa_status_id', ''),
                        "visa_residence_location": basic_details.get('visa_residence_location', ''),
                        "visa_validity_month": basic_details.get('visa_validity_month', ''),
                        "visa_validity_year": basic_details.get('visa_validity_year', ''),
                        "license_opt": basic_details.get('license_opt', ''),
                        
                        # System Information
                        "profile_completion": basic_details.get('profile_completion', ''),
                        "profile_updated_date": basic_details.get('profile_updated_date', ''),
                        "system_original_resume": basic_details.get('system_original_resume', ''),
                        "system_resume_file": basic_details.get('system_resume_file', ''),
                        "original_file_name": basic_details.get('original_file_name', ''),
                        "resume_file": basic_details.get('resume_file', ''),
                        "resume_added_date": basic_details.get('resume_added_date', ''),
                        "profile_pic": basic_details.get('profile_pic', ''),
                        
                        # Language Information
                        "known_language_id": basic_details.get('known_language_id', []),
                        "known_languages": basic_details.get('known_languages', ''),
                        
                        # Additional Profile Data
                        "cv_push": basic_details.get('cv_push', ''),
                        "cv_push_id": basic_details.get('cv_push_id', ''),
                        "cv_download_count": basic_details.get('cv_download_count', ''),
                        "applied_job_count": basic_details.get('applied_job_count', ''),
                        "connection_count": basic_details.get('connection_count', ''),
                        "profile_upload": basic_details.get('profile_upload', ''),
                        "resume_upload": basic_details.get('resume_upload', ''),
                        "es_push": basic_details.get('es_push', ''),
                        "status": basic_details.get('status', ''),
                        "is_deleted": basic_details.get('is_deleted', ''),
                        "created_at": basic_details.get('created_at', ''),
                        "updated_at": basic_details.get('updated_at', ''),
                        "id": basic_details.get('id', '')
                    }
                    basic_details_array.append(basic_detail)
                
                # Store comprehensive jobseeker details with ALL available information for type 1
                jobseeker_detail = {
                    "basic_details": basic_details_array,
                    "experience": employment_details if employment_details else [],  # ALL employment details
                    "education": education_details if education_details else [],   # ALL education details
                    "certifications": certification_details if certification_details else [],  # ALL certifications
                    "courses": course_details if course_details else [],  # ALL courses
                    "projects": project_details if project_details else [],  # ALL projects
                    "languages": language_details if language_details else [],  # ALL languages
                    "hobbies": hobby_details if hobby_details else [],  # ALL hobbies
                    "references": reference_details if reference_details else [],  # ALL references
                    "portfolio": portfolio_details if portfolio_details else [],  # ALL portfolio items
                    "social_media": social_media_details if social_media_details else [],  # ALL social media
                    "jobseeker_public_profiles": public_profiles if public_profiles else [],  # Public profiles for type 1
                    
                    # Additional top-level candidate information
                    "email": candidate_data.get('email', ''),
                    "mobile_number": candidate_data.get('mobile_number', ''),
                    "country_name": candidate_data.get('country_name', ''),
                    "country_header_code": candidate_data.get('country_header_code', ''),
                    "status": candidate_data.get('status', ''),
                    "indexed": candidate_data.get('indexed', ''),
                    "country_id": candidate_data.get('country_id', ''),
                    "is_subscribed": candidate_data.get('is_subscribed', 0),  # Add is_subscribed from jobseekers table
                    
                    # Complete candidate profile data (if available)
                    "additional_info": {
                        k: v for k, v in candidate_data.items() 
                        if k not in ['basic_details', 'employment_details', 'education_details', 
                                   'certification_details', 'course_details', 'project_details',
                                   'language_details', 'hobby_details', 'reference_details',
                                   'portfolio_details', 'social_media_details']
                    }
                }
                jobseeker_details.append(jobseeker_detail)
            
            # Create new screening data with type=1
            screening_data = {
                "job_id": job_emp_id,
                "jobseeker_id": int(candidate["resume_id"]),  # Store as integer
                "type": 1,  # Type 1 for specific candidate analysis
                "status": 1,
                "strengths": analysis["strengths"],  # Now stored as array
                "weaknesses": analysis["weaknesses"],  # Now stored as array
                "matching_percentage": round(analysis["matching_percentage"], 2),  # Round to 2 decimal places
                "match_justification": analysis["match_justification"],
                "top_keywords": analysis.get("top_keywords", []),  # Add top keywords array
                "jobseeker_details": jobseeker_details,  # Add jobseeker_details for type 1
                "job_details": job_details,  # Add complete job details
                "company_id": job_details.get('company_id') if job_details else None,  # Add company_id
                "created_at": datetime.utcnow(),
                "processed_at": datetime.utcnow()
            }
            
            # Insert new screening record
            result = screenings_collection.insert_one(screening_data)
            
            logger.info(f"Successfully stored specific screening result for candidate {candidate['resume_id']} and job {job_emp_id}")
            return True
            
        except Exception as e:
            logger.error(f"Error storing specific screening result: {str(e)}")
            return False
    
    def _log_search_results(self, job_emp_id: str, search_type: str, results: List[Dict[str, Any]]):
        """
        Log search results to ai_search_logs collection
        
        Args:
            job_emp_id: Job employer ID
            search_type: Type of search ("opensearch" or "vector_search")
            results: Array of search results
        """
        try:
            # Get database connection
            from services.database import get_database
            db = get_database()
            ai_search_logs_collection = db['ai_search_logs']
            
            # Prepare log data
            log_data = {
                "job_emp_id": job_emp_id,
                "type": search_type,
                "results": results,  # Store entire results array
                "total_results": len(results),
                "created_at": datetime.utcnow()
            }
            
            # Insert log
            ai_search_logs_collection.insert_one(log_data)
            logger.info(f"Logged {search_type} results for job {job_emp_id}: {len(results)} results")
            
        except Exception as e:
            logger.error(f"Error logging search results: {str(e)}")
            # Don't fail the main process if logging fails
            pass
    
    def analyze_single_candidate_for_upload(self, job_emp_id: str, jobseeker_id: int, parsed_data: Dict[str, Any], analysis_type: int = 3) -> Dict[str, Any]:
        """
        Analyze a single candidate for upload with AI analysis and store with specified type
        
        Args:
            job_emp_id: The employer job ID
            jobseeker_id: The jobseeker ID from jobseekers_bulk_upload_cv.id
            parsed_data: Parsed resume data
            analysis_type: Type of analysis (3 for upload analysis)
            
        Returns:
            Dict: Analysis result with success status
        """
        try:
            # Get job details using the existing method
            job_details = self._get_job_details(job_emp_id)
            if not job_details:
                return {
                    "success": False,
                    "error": f"Failed to get job details for {job_emp_id}"
                }
            
            # Generate AI analysis using existing method
            # Create candidate object for analysis - same format as types 1 & 2
            candidate = {
                "resume_id": str(jobseeker_id),
                "keyword_score": 0,
                "keyword_percentage": 0,
                "vector_score": 0,
                "vector_percentage": 0,
                "combined_score": 0  # Will be set by AI analysis
            }
            
            # Get complete candidate data from all database tables for type 3
            complete_candidate_data = self._get_complete_bulk_upload_candidate_data(jobseeker_id)
            if not complete_candidate_data:
                logger.warning(f"Could not fetch complete candidate data for jobseeker {jobseeker_id}, using parsed data only")
                # Fallback to parsed data formatting
                complete_candidate_data = self._format_parsed_data_for_storage(parsed_data, jobseeker_id)
            
            # For type 3 (upload), always use direct analysis method with complete candidate data
            # Don't use _generate_candidate_analysis as it requires indexed data in opensearch/qdrant
            logger.info(f"Using direct analysis method for upload candidate {jobseeker_id}")
            analysis_result = self._generate_candidate_analysis_direct(job_details, complete_candidate_data, candidate)
            
            if analysis_result:
                # Store screening result with specified type and complete candidate data
                storage_success = self._store_upload_screening_result(
                    job_emp_id=job_emp_id,
                    jobseeker_id=jobseeker_id,
                    analysis=analysis_result,
                    analysis_type=analysis_type,
                    candidate_data=complete_candidate_data,  # Pass complete candidate data for jobseeker_details
                    job_details=job_details  # Pass job details
                )
                
                if storage_success:
                    logger.info(f"Successfully analyzed and stored upload candidate {jobseeker_id} for job {job_emp_id} with type {analysis_type}")
                    return {
                        "success": True,
                        "jobseeker_id": jobseeker_id,
                        "analysis": analysis_result
                    }
                else:
                    return {
                        "success": False,
                        "error": "Failed to store analysis result"
                    }
            else:
                return {
                    "success": False,
                    "error": "Failed to generate AI analysis"
                }
                
        except Exception as e:
            logger.error(f"Error analyzing single candidate for upload: {str(e)}")
            return {
                "success": False,
                "error": str(e)
            }
    
    def _store_upload_screening_result(self, job_emp_id: str, jobseeker_id: int, analysis: Dict[str, Any], analysis_type: int, candidate_data: Dict[str, Any] = None, job_details: Dict[str, Any] = None):
        """
        Store screening result for upload analysis with jobseeker_details for type 3
        Check for duplicates before inserting
        
        Args:
            job_emp_id: Job employer ID
            jobseeker_id: Jobseeker ID from bulk upload collection
            analysis: AI analysis result
            analysis_type: Type of analysis (3 for upload)
            candidate_data: Candidate data to store in jobseeker_details array
        """
        try:
            # Get database connection
            from services.database import get_database
            db = get_database()
            screenings_collection = db['screenings']
            
            # Check if screening already exists for this job + candidate + type
            existing_screening = screenings_collection.find_one({
                "job_id": job_emp_id,
                "jobseeker_id": jobseeker_id,  # Already integer
                "type": analysis_type
            })
            
            if existing_screening:
                logger.info(f"Upload screening already exists for candidate {jobseeker_id} and job {job_emp_id} with type {analysis_type}, skipping insert")
                return True
            
            # Create comprehensive jobseeker_details array for type 3 with ALL available data
            jobseeker_details = []
            if candidate_data and analysis_type == 3:
                # Extract ALL details for comprehensive jobseeker_details storage (type 3)
                basic_details = candidate_data.get("basic_details", {})
                employment_details = candidate_data.get("employment_details", [])
                education_details = candidate_data.get("education_details", [])
                certification_details = candidate_data.get("certification_details", [])
                course_details = candidate_data.get("course_details", [])
                project_details = candidate_data.get("project_details", [])
                language_details = candidate_data.get("language_details", [])
                hobby_details = candidate_data.get("hobby_details", [])
                reference_details = candidate_data.get("reference_details", [])
                portfolio_details = candidate_data.get("portfolio_details", [])
                social_media_details = candidate_data.get("social_media_details", [])
                
                # For type 3, get email and mobile from jobseekers_bulk_upload_cv table
                email = ""
                mobile = ""
                try:
                    from services.database import get_database
                    db = get_database()
                    bulk_upload_data = db['jobseekers_bulk_upload_cv'].find_one({"id": jobseeker_id})
                    if bulk_upload_data:
                        email = bulk_upload_data.get('email', '')
                        mobile = bulk_upload_data.get('mobile_number', '')
                        logger.info(f"Found bulk upload data for jobseeker {jobseeker_id}: email={email}, mobile={mobile}")
                    else:
                        logger.warning(f"No bulk upload data found for jobseeker {jobseeker_id}")
                except Exception as e:
                    logger.error(f"Error getting bulk upload data for jobseeker {jobseeker_id}: {str(e)}")
                
                # Fallback to candidate_data if bulk upload data not found
                if not email:
                    email = candidate_data.get('email', '') or basic_details.get('email', '')
                if not mobile:
                    mobile = candidate_data.get('mobile_number', '') or basic_details.get('phone', '')
                
                # Format comprehensive basic_details with ALL available information for type 3
                basic_details_array = []
                if basic_details or candidate_data:
                    # Get name from basic_details first, then fallback to additional_info
                    first_name = basic_details.get('first_name', '') if basic_details else ''
                    last_name = basic_details.get('last_name', '') if basic_details else ''
                    
                    # If basic_details names are empty, try to get from additional_info or parse from bulk upload
                    if not first_name and not last_name and candidate_data:
                        additional_info = candidate_data.get('additional_info', {})
                        cv_filename = additional_info.get('cv_filename', '')
                        
                        # Try to extract name from CV filename if available
                        if cv_filename and not cv_filename.startswith('uploads/'):
                            # Remove file extension and common separators
                            name_from_file = cv_filename.replace('.pdf', '').replace('.doc', '').replace('.docx', '').replace('_', ' ').replace('-', ' ').strip()
                            name_parts = name_from_file.split()
                            if len(name_parts) >= 2:
                                first_name = name_parts[0]
                                last_name = ' '.join(name_parts[1:])
                        
                        # If still no name, try to get from parsed basic_details
                        parsed_basic = candidate_data.get('basic_details', {})
                        if not first_name and parsed_basic.get('first_name'):
                            first_name = parsed_basic.get('first_name', '')
                        if not last_name and parsed_basic.get('last_name'):
                            last_name = parsed_basic.get('last_name', '')
                    
                    # Use email and mobile from the bulk upload lookup we did earlier
                    # But also fallback to parsed data if those are empty
                    final_email = email
                    final_mobile = mobile
                    if not final_email and candidate_data:
                        parsed_basic = candidate_data.get('basic_details', {})
                        final_email = parsed_basic.get('email', '')
                    if not final_mobile and candidate_data:
                        parsed_basic = candidate_data.get('basic_details', {})
                        final_mobile = parsed_basic.get('phone', '')
                    
                    # Include ALL fields from basic_details - comprehensive storage
                    basic_detail = {
                        # Personal Information
                        "name": f"{first_name} {last_name}".strip(),
                        "first_name": first_name,
                        "last_name": last_name,
                        "email": final_email,
                        "phone": final_mobile,
                        "mobile_number": final_mobile,
                        "birth_date": basic_details.get('birth_date', ''),
                        "gender_id": basic_details.get('gender_id', ''),
                        "marital_status": basic_details.get('marital_status', ''),
                        "nationality_id": basic_details.get('nationality_id', ''),
                        "religion_id": basic_details.get('religion_id', ''),
                        
                        # Professional Information
                        "current_designation": basic_details.get('current_desigation', '') or basic_details.get('current_designation', ''),
                        "current_company": basic_details.get('current_company', ''),
                        "total_experience_year": basic_details.get('total_experience_year', ''),
                        "total_experience_month": basic_details.get('total_experience_month', ''),
                        "total_experience": f"{basic_details.get('total_experience_year', '')} {basic_details.get('total_experience_month', '')}".strip(),
                        "industry_id": basic_details.get('industry_id', ''),
                        "functional_area_id": basic_details.get('functional_area_id', ''),
                        "profile_type_id": basic_details.get('profile_type_id', ''),
                        
                        # Skills and Profile
                        "key_skills": basic_details.get('key_skills', '').split(',') if basic_details.get('key_skills') else [],
                        "key_skills_id": basic_details.get('key_skills_id', []),
                        "profile_summary": basic_details.get('profile_summary', ''),
                        "cv_headline": basic_details.get('cv_headline', ''),
                        
                        # Salary Information
                        "current_salary": basic_details.get('current_salary', ''),
                        "current_salary_period": basic_details.get('current_salary_period', ''),
                        "current_salary_currency": basic_details.get('current_salary_currency', ''),
                        "expected_salary": basic_details.get('expected_salary', ''),
                        "expected_salary_period": basic_details.get('expected_salary_period', ''),
                        "expected_salary_currency": basic_details.get('expected_salary_currency', ''),
                        
                        # Location Information
                        "current_location": basic_details.get('current_location', ''),
                        "prefered_location": basic_details.get('prefered_location', ''),
                        "country_id": basic_details.get('country_id', ''),
                        "com_address": basic_details.get('com_address', ''),
                        
                        # Visa and Legal Status
                        "visa_status_id": basic_details.get('visa_status_id', ''),
                        "visa_residence_location": basic_details.get('visa_residence_location', ''),
                        "visa_validity_month": basic_details.get('visa_validity_month', ''),
                        "visa_validity_year": basic_details.get('visa_validity_year', ''),
                        "license_opt": basic_details.get('license_opt', ''),
                        
                        # System Information
                        "profile_completion": basic_details.get('profile_completion', ''),
                        "profile_updated_date": basic_details.get('profile_updated_date', ''),
                        "system_original_resume": basic_details.get('system_original_resume', ''),
                        "system_resume_file": basic_details.get('system_resume_file', ''),
                        "original_file_name": basic_details.get('original_file_name', ''),
                        "resume_file": basic_details.get('resume_file', ''),
                        "resume_added_date": basic_details.get('resume_added_date', ''),
                        "profile_pic": basic_details.get('profile_pic', ''),
                        
                        # Language Information
                        "known_language_id": basic_details.get('known_language_id', []),
                        "known_languages": basic_details.get('known_languages', ''),
                        
                        # Additional Profile Data
                        "cv_push": basic_details.get('cv_push', ''),
                        "cv_push_id": basic_details.get('cv_push_id', ''),
                        "cv_download_count": basic_details.get('cv_download_count', ''),
                        "applied_job_count": basic_details.get('applied_job_count', ''),
                        "connection_count": basic_details.get('connection_count', ''),
                        "profile_upload": basic_details.get('profile_upload', ''),
                        "resume_upload": basic_details.get('resume_upload', ''),
                        "es_push": basic_details.get('es_push', ''),
                        "status": basic_details.get('status', ''),
                        "is_deleted": basic_details.get('is_deleted', ''),
                        "created_at": basic_details.get('created_at', ''),
                        "updated_at": basic_details.get('updated_at', ''),
                        "id": basic_details.get('id', '')
                    }
                    basic_details_array.append(basic_detail)
                
                # Store comprehensive jobseeker details with ALL available information for type 3
                jobseeker_detail = {
                    "basic_details": basic_details_array,
                    "experience": employment_details if employment_details else [],  # ALL employment details
                    "education": education_details if education_details else [],   # ALL education details
                    "certifications": certification_details if certification_details else [],  # ALL certifications
                    "courses": course_details if course_details else [],  # ALL courses
                    "projects": project_details if project_details else [],  # ALL projects
                    "languages": language_details if language_details else [],  # ALL languages
                    "hobbies": hobby_details if hobby_details else [],  # ALL hobbies
                    "references": reference_details if reference_details else [],  # ALL references
                    "portfolio": portfolio_details if portfolio_details else [],  # ALL portfolio items
                    "social_media": social_media_details if social_media_details else [],  # ALL social media
                    "jobseeker_public_profiles": [],  # Type 3: Always empty array
                    
                    # Additional top-level candidate information
                    "email": final_email,
                    "mobile_number": final_mobile,
                    "country_name": candidate_data.get('country_name', ''),
                    "country_header_code": candidate_data.get('country_header_code', ''),
                    "status": candidate_data.get('status', ''),
                    "is_subscribed": candidate_data.get('is_subscribed', 0),
                    "indexed": candidate_data.get('indexed', ''),
                    "country_id": candidate_data.get('country_id', ''),
                    
                    # Complete candidate profile data (if available)
                    "additional_info": {
                        k: v for k, v in candidate_data.items() 
                        if k not in ['basic_details', 'employment_details', 'education_details', 
                                   'certification_details', 'course_details', 'project_details',
                                   'language_details', 'hobby_details', 'reference_details',
                                   'portfolio_details', 'social_media_details']
                    }
                }
                jobseeker_details.append(jobseeker_detail)
            
            # Create new screening data
            screening_data = {
                "job_id": job_emp_id,
                "jobseeker_id": jobseeker_id,  # Store as integer (already int)
                "type": analysis_type,
                "status": 1,
                "strengths": analysis["strengths"],  # Now stored as array
                "weaknesses": analysis["weaknesses"],  # Now stored as array
                "matching_percentage": round(analysis["matching_percentage"], 2),  # Store all percentages including 0-100%
                "match_justification": analysis["match_justification"],
                "top_keywords": analysis.get("top_keywords", []),  # Add top keywords array for type 3
                "job_details": job_details,  # Add complete job details
                "company_id": job_details.get('company_id') if job_details else None,  # Add company_id
                "created_at": datetime.utcnow(),
                "processed_at": datetime.utcnow()
            }
            
            # Add jobseeker_details array for type 3
            if analysis_type == 3 and candidate_data:
                screening_data["jobseeker_details"] = jobseeker_details
            elif analysis_type == 3:
                # Ensure jobseeker_details is always present for type 3 even if empty
                screening_data["jobseeker_details"] = []
            
            # Insert new screening record
            result = screenings_collection.insert_one(screening_data)
            
            logger.info(f"Successfully stored upload screening result for candidate {jobseeker_id} and job {job_emp_id} with type {analysis_type}")
            return True
            
        except Exception as e:
            logger.error(f"Error storing upload screening result: {str(e)}")
            return False
    
    def _generate_candidate_analysis_direct(self, job_details: Dict[str, Any], candidate_data: Dict[str, Any], candidate: Dict[str, Any]) -> Optional[Dict[str, Any]]:
        """
        Generate AI analysis for a candidate using direct data (for upload scenarios)
        
        Args:
            job_details: Job details dictionary
            candidate_data: Direct candidate data
            candidate: Candidate information with scores
            
        Returns:
            Analysis dictionary or None if failed
        """
        try:
            # Debug logging to identify the issue
            logger.info(f"Starting AI analysis for candidate {candidate.get('resume_id', 'unknown') if candidate else 'None candidate'}")
            logger.info(f"Job details available: {bool(job_details)}")
            logger.info(f"Candidate data available: {bool(candidate_data)}")
            logger.info(f"Candidate object: {candidate}")
            
            # Create analysis prompt
            prompt = self._create_analysis_prompt(job_details, candidate_data, candidate)
            
            # Debug logging
            logger.info(f"Generated analysis prompt for candidate {candidate.get('resume_id', 'unknown')}")
            
            # Get AI analysis
            system_prompt = "You are an expert HR analyst. Analyze the candidate's fit for the job and provide detailed, professional assessment."
            
            analysis_text = self.ai_service.ai_response(
                question=prompt,
                system_prompt=system_prompt,
                max_tokens=1000,
                company_id=job_details.get('company_id') if job_details else None,
                feature="screening"
            )
            
            # Log the raw AI response for debugging
            # logger.info(f"=== RAW AI RESPONSE for candidate {candidate.get('resume_id', 'unknown')} ===")
            # logger.info(analysis_text)
            # logger.info("=== END RAW AI RESPONSE ===")
            
            # Parse the AI response (allow all percentages 0-100% for upload analysis)
            parsed_analysis = self._parse_ai_analysis(analysis_text, 50.0)  # Default fallback
            
            return parsed_analysis
            
        except Exception as e:
            # Use safer key access for error logging
            candidate_id = candidate.get('resume_id', 'unknown') if candidate else 'unknown'
            logger.error(f"Error generating direct candidate analysis for {candidate_id}: {str(e)}")
            return None
    
    def _get_complete_bulk_upload_candidate_data(self, jobseeker_id: int) -> Dict[str, Any]:
        """
        Get complete candidate data from all bulk upload related tables
        
        Args:
            jobseeker_id: The jobseeker ID from bulk upload collection
            
        Returns:
            Complete candidate data dictionary
        """
        try:
            from services.database import get_database
            db = get_database()
            
            # Initialize complete candidate data
            candidate_data = {
                "basic_details": {},
                "employment_details": [],
                "education_details": [],
                "certification_details": [],
                "course_details": [],
                "project_details": [],
                "language_details": [],
                "hobby_details": [],
                "reference_details": [],
                "portfolio_details": [],
                "social_media_details": []
            }
            
            # Get data from jobseekers_bulk_upload_cv
            main_cv_data = db['jobseekers_bulk_upload_cv'].find_one({"id": jobseeker_id})
            if main_cv_data:
                candidate_data["email"] = main_cv_data.get('email', '')
                candidate_data["mobile_number"] = main_cv_data.get('mobile_number', '')
                candidate_data["cv_filename"] = main_cv_data.get('cv_filename', '')
                candidate_data["cvfile_store_path"] = main_cv_data.get('cvfile_store_path', '')
                candidate_data["last_login_date"] = main_cv_data.get('last_login_date', '')
                
            # Get basic details from jobseeker_basic_details_bulk_upload_cv
            basic_details = db['jobseeker_basic_details_bulk_upload_cv'].find_one({"jobseeker_id": jobseeker_id})
            if basic_details:
                candidate_data["basic_details"] = basic_details
                
            # Get employment details from jobseeker_employment_details_bulk_upload_cv
            employment_details = list(db['jobseeker_employment_details_bulk_upload_cv'].find({"jobseeker_id": jobseeker_id}))
            candidate_data["employment_details"] = employment_details
            
            # Get education details from jobseeker_education_details_bulk_upload_cv
            education_details = list(db['jobseeker_education_details_bulk_upload_cv'].find({"jobseeker_id": jobseeker_id}))
            candidate_data["education_details"] = education_details
            
            # Get course details from jobseeker_course_details_bulk_upload_cv
            course_details = list(db['jobseeker_course_details_bulk_upload_cv'].find({"jobseeker_id": jobseeker_id}))
            candidate_data["course_details"] = course_details
            
            # Get project details from jobseeker_project_details_bulk_upload_cv
            project_details = list(db['jobseeker_project_details_bulk_upload_cv'].find({"jobseeker_id": jobseeker_id}))
            candidate_data["project_details"] = project_details
            
            # Get certification details from jobseeker_certification_details_bulk_upload_cv
            certification_details = list(db['jobseeker_certification_details_bulk_upload_cv'].find({"jobseeker_id": jobseeker_id}))
            candidate_data["certification_details"] = certification_details
            
            logger.info(f"Successfully fetched complete candidate data for jobseeker {jobseeker_id}")
            return candidate_data
            
        except Exception as e:
            logger.error(f"Error fetching complete candidate data for jobseeker {jobseeker_id}: {str(e)}")
            return None
    
    def _format_parsed_data_for_storage(self, parsed_data: Dict[str, Any], jobseeker_id: int) -> Dict[str, Any]:
        """
        Format parsed data to match the expected storage structure
        
        Args:
            parsed_data: The parsed resume data
            jobseeker_id: The jobseeker ID
            
        Returns:
            Formatted candidate data dictionary
        """
        try:
            # Convert parsed data to same format as database candidate data
            formatted_candidate_data = {
                "basic_details": {
                    "jobseeker_id": jobseeker_id,
                    "first_name": parsed_data.get("name", "").split()[0] if parsed_data.get("name") else "",
                    "last_name": " ".join(parsed_data.get("name", "").split()[1:]) if parsed_data.get("name") and len(parsed_data.get("name", "").split()) > 1 else "",
                    "full_name": parsed_data.get("name", ""),
                    "email": parsed_data.get("email", ""),
                    "phone": parsed_data.get("phone", ""),
                    "key_skills": ",".join(parsed_data.get("skills", [])) if parsed_data.get("skills") else "",
                    "profile_summary": parsed_data.get("profile_summary", "") if parsed_data.get("profile_summary") else "",
                    "date_of_birth": parsed_data.get("date_of_birth"),
                    "nationality": parsed_data.get("nationality"),
                    "languages": parsed_data.get("languages", []),
                    "status": 1,
                    "is_deleted": 0,
                    "created_at": parsed_data.get("created_at", ""),
                    "updated_at": parsed_data.get("updated_at", "")
                },
                "email": parsed_data.get("email", ""),
                "mobile_number": parsed_data.get("phone", ""),
                "employment_details": [],
                "education_details": [],
                "certification_details": [],
                "course_details": [],
                "project_details": [],
                "additional_info": {
                    "email": parsed_data.get("email", ""),
                    "mobile_number": parsed_data.get("phone", ""),
                    "cv_filename": f"Parsed Resume - {parsed_data.get('name', 'Unknown')}.pdf",
                    "cvfile_store_path": f"uploads/parsed/{jobseeker_id}/resume.pdf",
                    "last_login_date": parsed_data.get("created_at", "")
                },
            
                "project_details": [],
                "language_details": [],
                "hobby_details": [],
                "reference_details": [],
                "portfolio_details": [],
                "social_media_details": [],
                "email": parsed_data.get("email", ""),
                "mobile_number": parsed_data.get("phone", "")
            }
            
            # Convert experience data to match database format
            if parsed_data.get("experience") and isinstance(parsed_data["experience"], list):
                for exp in parsed_data["experience"]:
                    if isinstance(exp, dict):
                        formatted_exp = {
                            "jobseeker_id": jobseeker_id,
                            "designation": exp.get("role", "") or exp.get("position", ""),
                            "company_name": exp.get("company", ""),
                            "start_date": exp.get("start_date"),
                            "end_date": exp.get("end_date"),
                            "job_description": exp.get("job_description", "") or exp.get("description", ""),
                            "responsibilities": exp.get("responsibilities", [])
                        }
                        formatted_candidate_data["employment_details"].append(formatted_exp)
            
            # Convert education data to match database format
            if parsed_data.get("education") and isinstance(parsed_data["education"], list):
                for edu in parsed_data["education"]:
                    if isinstance(edu, dict):
                        formatted_edu = {
                            "jobseeker_id": jobseeker_id,
                            "degree": edu.get("degree", ""),
                            "institute_name": edu.get("institution", ""),
                            "field_of_study": edu.get("field_of_study", ""),
                            "start_date": edu.get("start_date"),
                            "end_date": edu.get("end_date"),
                            "completion_year": edu.get("completion_year"),
                            "grade": edu.get("grade")
                        }
                        formatted_candidate_data["education_details"].append(formatted_edu)
            
            # Convert certifications data if available
            if parsed_data.get("certifications") and isinstance(parsed_data["certifications"], list):
                for cert in parsed_data["certifications"]:
                    if isinstance(cert, dict):
                        formatted_cert = {
                            "jobseeker_id": jobseeker_id,
                            "certificate_name": cert.get("name", "") or cert.get("title", ""),
                            "institute_name": cert.get("issuer", "") or cert.get("organization", ""),
                            "issue_date": cert.get("issue_date"),
                            "expiry_date": cert.get("expiry_date"),
                            "credential_id": cert.get("credential_id"),
                            "url": cert.get("url")
                        }
                        formatted_candidate_data["certification_details"].append(formatted_cert)
            
            # Convert projects data if available
            if parsed_data.get("projects") and isinstance(parsed_data["projects"], list):
                for proj in parsed_data["projects"]:
                    if isinstance(proj, dict):
                        formatted_proj = {
                            "jobseeker_id": jobseeker_id,
                            "project_title": proj.get("title", "") or proj.get("name", ""),
                            "organization_name": proj.get("organization", "") or proj.get("company", ""),
                            "project_description": proj.get("description", ""),
                            "project_url": proj.get("url", ""),
                            "start_date": proj.get("start_date"),
                            "end_date": proj.get("end_date"),
                            "technologies": proj.get("technologies", [])
                        }
                        formatted_candidate_data["project_details"].append(formatted_proj)
            
            logger.info(f"Successfully formatted parsed data for jobseeker {jobseeker_id}")
            return formatted_candidate_data
            
        except Exception as e:
            logger.error(f"Error formatting parsed data for jobseeker {jobseeker_id}: {str(e)}")
            return {
                "basic_details": {},
                "employment_details": [],
                "education_details": [],
                "certification_details": [],
                "course_details": [],
                "project_details": [],
                "language_details": [],
                "hobby_details": [],
                "reference_details": [],
                "portfolio_details": [],
                "social_media_details": [],
                "email": parsed_data.get("email", ""),
                "mobile_number": parsed_data.get("phone", "")
            }