import json
import re
from transformers import pipeline

# Load the QA pipeline using the distilbert-base-uncased-distilled-squad model
qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")

def get_next_part(phase):
    """
    Extracts the instructions and questions for the given phase.
    :param phase: The current phase (1 to 12)
    :return: Tuple of sanitized instructions and questions for the phase
    """
    # Load the structured text file
    with open('/var/www/html/test1/structured_text.txt', 'r') as file:
        text = file.read()

    # Define markers for each phase
    phase_markers = [
        ("INSTRUCTIONS for phase 1", "QUESTION and DECISION"),  # Phase 1
        ("INSTRUCTIONS for phase 2", "QUESTION and DECISION"),  # Phase 2
        ("INSTRUCTIONS for phase 3", "QUESTION and DECISION"),  # Phase 3
        ("INSTRUCTIONS for phase 4", "QUESTION and DECISION"),  # Phase 4
        ("INSTRUCTIONS for phase 5", "QUESTION and DECISION"),  # Phase 5
        ("INSTRUCTIONS for phase 6", "QUESTION and DECISION"),  # Phase 6
        ("INSTRUCTIONS for phase 7", "QUESTION and DECISION"),  # Phase 7
        ("INSTRUCTIONS for phase 8", "QUESTION and DECISION"),  # Phase 8
        ("INSTRUCTIONS for phase 9", "QUESTION and DECISION"),  # Phase 9
        ("INSTRUCTIONS for phase 10", "QUESTION and DECISION"),  # Phase 10
        ("INSTRUCTIONS for phase 11", "QUESTION and DECISION"),  # Phase 11
        ("INSTRUCTIONS for phase 12", "QUESTION and DECISION"),  # Phase 12
        ("INSTRUCTIONS for phase 13", "QUESTION and DECISION"),  # Phase 13
        ("INSTRUCTIONS for phase 14", "QUESTION and DECISION"),  # Phase 14
    ]

    # Ensure phase is within range
    if phase < 1 or phase > len(phase_markers):
        raise IndexError("Phase out of range")

    # Get the start and end markers for the current phase
    start_marker, end_marker = phase_markers[phase - 1]

    # Extract the instructions for the phase
    instructions = extract_chapter(text, start_marker, end_marker)

    # Remove headers from the instructions, but leave the question markers intact
    # instructions = remove_headers(instructions)

    # Sanitize instructions (remove problematic characters, but keep question markers)
    instructions = sanitize_text(instructions)

    # Debug: Check if instructions were properly extracted
    print(f"Phase {phase} - Extracted Instructions: {instructions[:100]}...")

    # Find the position of "QUESTION and DECISION"
    questions_start = text.find(end_marker, text.find(start_marker)) + len(end_marker)
    if questions_start == -1:
        print(f"No 'QUESTION and DECISION' found for phase {phase}")
        return instructions, []

    # Find where the questions end (next PART or end of text)
    questions_end = text.find('PART', questions_start)
    if questions_end == -1:
        questions_end = len(text)

    # Extract the text containing the questions
    questions_text = text[questions_start:questions_end].strip()

    # Debug: Check if questions were properly extracted
    print(f"Phase {phase} - Extracted Questions Text: {questions_text}")

    # Handle missing or malformed questions section
    if not questions_text:
        print(f"Warning: No questions found for phase {phase}. Returning default questions.")
        return instructions, get_default_questions()

    # Dynamic question parsing: handle more than just (A) and (B)
    question_pattern = r"\([A-Z]\)\s*"
    questions = []
    for line in questions_text.splitlines():
        match = re.match(question_pattern, line)
        if match:
            choice = match.group(0).strip().replace("(", "").replace(")", "")
            questions.append({"text": sanitize_text(line.strip()), "choice": choice})

    return instructions, questions


def remove_headers(text):
    """
    Removes headers that are all uppercase and surrounded by dashes.
    :param text: The text to process.
    :return: Text with headers removed.
    """
    # Use regex to remove all uppercase headers surrounded by dashes
    return re.sub(r'-[A-Z\s]+-', '', text).strip()


def get_answer(question, context):
    """
    Get an answer from the model based on a question and context.
    :param question: The question asked
    :param context: The context for the question
    :return: The model's answer
    """
    result = qa_pipeline(question=question, context=context)
    return result["answer"]

def get_recommendation(feedback):
    """
    Generate a recommendation based on the feedback given.
    :param feedback: User feedback text
    :return: Recommendation based on feedback
    """
    if "good" in feedback.lower() or "okay" in feedback.lower():
        return "Thank you. It's about you how you will proceed, but I recommend going ahead."
    elif "bad" in feedback.lower() or "not ready" in feedback.lower():
        return "Take your time. If you're not ready, feel free to repeat the phase."
    else:
        return "I understand. You can decide whether to continue or repeat the phase."

def extract_chapter(text, start_marker, end_marker):
    """
    Extract text between two markers, excluding the start and end markers.
    :param text: The full text.
    :param start_marker: Start of the section.
    :param end_marker: End of the section.
    :return: Extracted text.
    """
    start_index = text.find(start_marker)
    end_index = text.find(end_marker, start_index)

    # Debug: Check if the markers are found correctly
    print(f"Start marker found at: {start_index}, End marker found at: {end_index}")


    if start_index == -1 or end_index == -1:
        raise ValueError(f"Could not find markers '{start_marker}' and '{end_marker}' in text.")

    # Extract the text between markers but skip the actual start_marker
    extracted_text = text[start_index + len(start_marker):end_index].strip()

    # Remove headers (all caps surrounded by dashes)
    # extracted_text = remove_headers(extracted_text)

    print(f"Extracted text (after header removal): {extracted_text[:100]}...")  # Debug: Log first 100 chars

    return extracted_text

def sanitize_text(text):
    """
    Sanitizes the text by removing unwanted characters.
    :param text: The text to sanitize.
    :return: Sanitized text.
    """
    return text.replace('\r', '').strip()


def get_default_questions():
    """
    Returns default questions if no specific questions are found for a phase.
    :return: List of default questions.
    """
    return [
        {"text": "(A) Proceed to the next phase.", "choice": "A"},
        {"text": "(B) Repeat the current phase.", "choice": "B"}
    ]

def get_phase_context(phase):
    """
    Returns the context for a given phase, used as input for the AI model.
    :param phase: The current phase
    :return: Context string for the phase
    """
    context_mapping = {
        1: "Context for phase 1",
        2: "Context for phase 2",
        3: "Context for phase 3",
        4: "Context for phase 4",
        5: "Context for phase 5",
        6: "Context for phase 6",
        7: "Context for phase 7",
        8: "Context for phase 8",
        9: "Context for phase 9",
        10: "Context for phase 10",
        11: "Context for phase 11",
        12: "Context for phase 12",
        13: "Context for phase 13",
        14: "Context for phase 14"
    }
    return context_mapping.get(phase, "Unknown phase context")
