import json
import os
from sentence_transformers import SentenceTransformer, util

DATA_DIR = os.path.join(os.path.dirname(__file__), "../data")

with open(os.path.join(DATA_DIR, "role_skills.json")) as f:
    ROLE_SKILLS = json.load(f)

_model = None

def get_model():
    global _model
    if _model is None:
        _model = SentenceTransformer("all-MiniLM-L6-v2")
    return _model

def is_skill_matched(required: str, resume_skills: list, threshold: float = 0.75) -> bool:
    required_lower = required.lower()
    for skill in resume_skills:
        if required_lower == skill.lower():
            return True

    model = get_model()
    req_emb = model.encode(required)
    for skill in resume_skills:
        skill_emb = model.encode(skill)
        score = util.cos_sim(req_emb, skill_emb)[0][0].item()
        if score >= threshold:
            return True
    return False

def analyze_skill_gap(parsed: dict, target_role: str) -> dict:
    role_data = ROLE_SKILLS.get(target_role)
    if not role_data:
        available = list(ROLE_SKILLS.keys())
        raise ValueError(f"Unknown role. Available: {available}")

    resume_skills = parsed.get("skills", [])
    learning_priorities = []
    existing_skills = []
    missing_skills = []

    for tier, priority in [("required", "HIGH"), ("preferred", "MEDIUM"), ("bonus", "LOW")]:
        for skill in role_data.get(tier, []):
            if is_skill_matched(skill, resume_skills):
                existing_skills.append(skill)
            else:
                missing_skills.append(skill)
                learning_priorities.append({"skill": skill, "priority": priority})

    return {
        "targetRole": target_role,
        "existingSkills": existing_skills,
        "missingSkills": missing_skills,
        "learningPriorities": learning_priorities,
    }

def analyze_job_match(parsed: dict, job_description: str) -> dict:
    import re
    import spacy
    nlp = spacy.load("en_core_web_sm")

    doc = nlp(job_description)
    jd_keywords = list(set([
        token.lemma_.lower()
        for token in doc
        if not token.is_stop and not token.is_punct and len(token.text) > 2
    ]))

    resume_skills = [s.lower() for s in parsed.get("skills", [])]
    raw_text = parsed.get("rawText", "").lower()

    matching = [kw for kw in jd_keywords if kw in raw_text]
    missing = [kw for kw in jd_keywords if kw not in raw_text]
    match_pct = int((len(matching) / len(jd_keywords)) * 100) if jd_keywords else 0

    # Semantic similarity bonus
    model = get_model()
    resume_emb = model.encode(raw_text[:1000])
    jd_emb = model.encode(job_description[:1000])
    semantic_score = util.cos_sim(resume_emb, jd_emb)[0][0].item()
    final_pct = int((match_pct * 0.6) + (semantic_score * 100 * 0.4))

    recs = []
    top_missing = missing[:5]
    if top_missing:
        recs.append(f"Add these keywords from the job description: {', '.join(top_missing)}")
    if final_pct < 60:
        recs.append("Tailor your resume summary/objective to mirror the job description language.")

    return {
        "matchPercentage": final_pct,
        "matchingSkills": matching[:15],
        "missingKeywords": top_missing,
        "recommendations": recs,
    }
