import json
from typing import List, Dict, Any
from PyPDF2 import PdfReader
from docx import Document
from io import BytesIO
from services.ai_service import AIService
from services.cv_storage import CVStorageService
import logging

# Setup logging
logger = logging.getLogger(__name__)

class ResumeParserService:
    def __init__(self):
        """Initialize resume parser service with AI service"""
        self.ai_service = AIService()
        self.cv_storage = None  # Initialize only when needed
        self.prompt_template = """
You are a precise resume parser assistant. Extract ONLY the information that is EXPLICITLY stated in the given resume. Your primary goal is accuracy, not completeness.

Extract the following fields:
- Name
- Email
- Phone number
- Profile summary
- Languages
- Skills
- Experience
- Education
- Date of birth
- Nationality

CRITICAL INSTRUCTIONS:
1. If a piece of information is not explicitly stated in the resume, set that field to null in the JSON output.
2. Do NOT invent, assume, or generate ANY information.
3. Do NOT use placeholder or dummy data under any circumstances.
4. Accuracy is paramount. It's mandatory to use null for any missing information.
5. For lists (like skills or languages), use an empty list [] if no items are found.
6. For nested objects (like experience or education), include them only if information is available.

Resume: {}

Provide the extracted information in the following JSON format:

{{
    "name": null,
    "email": null,
    "phone": null,
    "profile_summary": null,
    "languages": [],
    "skills": [],
    "date_of_birth": null,
    "nationality": null,
    "experience": [
        {{
            "company": null,
            "role": null,
            "start_date": null,
            "end_date": null,
            "responsibilities": null
        }}
    ],
    "education": [
        {{
            "institution": null,
            "degree": null,
            "start_date": null,
            "end_date": null
        }}
    ]
}}

FINAL CHECKLIST:
- Have you used null for ALL missing information?
- Are ALL fields either filled with accurate data from the resume or set to null?
- Have you refrained from adding ANY assumed or generated information?
- Are lists empty [] if no items are found?
- Are nested objects only included if information is available?

Remember: Your task is solely to extract and report information that is explicitly present in the resume. Do not attempt to fill gaps or make the output look complete.
"""

    def extract_text_from_pdf(self, file_content: bytes) -> str:
        """Extract text from PDF file"""
        try:
            pdf_file = BytesIO(file_content)
            reader = PdfReader(pdf_file)
            text = ""
            for page in reader.pages:
                text += page.extract_text() + "\n"
            return text.strip()
        except Exception as e:
            raise Exception(f"Error reading PDF: {str(e)}")

    def extract_text_from_docx(self, file_content: bytes) -> str:
        """Extract text from Word document"""
        try:
            doc_file = BytesIO(file_content)
            doc = Document(doc_file)
            text = ""
            for paragraph in doc.paragraphs:
                text += paragraph.text + "\n"
            return text.strip()
        except Exception as e:
            raise Exception(f"Error reading Word document: {str(e)}")

    def extract_text_from_file(self, file_content: bytes, filename: str) -> str:
        """Extract text based on file type"""
        if filename.lower().endswith('.pdf'):
            return self.extract_text_from_pdf(file_content)
        elif filename.lower().endswith(('.docx', '.doc')):
            return self.extract_text_from_docx(file_content)
        else:
            raise Exception("Unsupported file type. Only PDF and Word documents are supported.")

    def parse_resume(self, resume_text: str, job_emp_id: str = None) -> Dict[str, Any]:
        """
        Parse resume text using AI and return structured data
        
        Args:
            resume_text: The extracted text from resume
            job_emp_id: Job employer ID to fetch company_id for tracking
            
        Returns:
            Parsed resume data dictionary
        """
        try:
            # Get company_id from job_emp_id if provided
            company_id = None
            if job_emp_id:
                try:
                    from services.job_api_service import JobAPIService
                    job_api_service = JobAPIService()
                    job_result = job_api_service.get_employer_job(job_emp_id)
                    if job_result.get('success'):
                        job_data = job_result.get('data', {})
                        if isinstance(job_data, dict):
                            if 'job' in job_data:
                                job_data = job_data['job']
                            company_id = (job_data.get('company_id') or 
                                        job_data.get('employer_id') or
                                        job_data.get('employer_user_id'))
                except Exception as e:
                    logger.warning(f"Could not fetch company_id for job {job_emp_id}: {str(e)}")
            
            # Create prompt with resume text
            prompt = self.prompt_template.format(resume_text)
            
            # Get AI response with company_id for tracking
            ai_response = self.ai_service.ai_response(
                question=prompt,
                company_id=company_id,
                feature="resume_parsing"
            )
            
            # Try to parse JSON from AI response
            try:
                # Extract JSON from response (in case there's extra text)
                start_idx = ai_response.find('{')
                end_idx = ai_response.rfind('}') + 1
                if start_idx != -1 and end_idx != 0:
                    json_str = ai_response[start_idx:end_idx]
                    parsed_data = json.loads(json_str)
                    return parsed_data
                else:
                    raise ValueError("No valid JSON found in response")
            except (json.JSONDecodeError, ValueError) as e:
                return {
                    "error": f"Failed to parse AI response as JSON: {str(e)}",
                    "raw_response": ai_response
                }
        
        except Exception as e:
            return {
                "error": f"Error parsing resume: {str(e)}"
            }

    def parse_and_store_multiple_resumes(self, files_data: List[tuple], store_in_db: bool = True, job_emp_id: str = None) -> List[Dict[str, Any]]:
        """Parse multiple resume files and optionally store in database"""
        results = []
        
        for file_content, filename in files_data:
            try:
                # Extract text from file
                resume_text = self.extract_text_from_file(file_content, filename)
                
                # Parse resume
                parsed_data = self.parse_resume(resume_text)
                
                result = {
                    "filename": filename,
                    "extracted_text_length": len(resume_text),
                    "parsed_data": parsed_data
                }
                
                # Store in database if requested and parsing was successful
                if store_in_db and not parsed_data.get("error"):
                    if self.cv_storage is None:
                        self.cv_storage = CVStorageService()
                    
                    # Pass job_emp_id to storage method
                    storage_result = self.cv_storage.store_complete_cv_data(
                        filename, resume_text, parsed_data, job_emp_id
                    )
                    result["storage_result"] = storage_result
                
                results.append(result)
                
            except Exception as e:
                results.append({
                    "filename": filename,
                    "error": str(e)
                })
        
        return results

    def parse_multiple_resumes(self, files_data: List[tuple]) -> List[Dict[str, Any]]:
        """Parse multiple resume files (legacy method for backward compatibility)"""
        return self.parse_and_store_multiple_resumes(files_data, store_in_db=False, job_emp_id=None)