# Copyright (c) 2025 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import datetime
import os
import signal
import socket
import sys
import threading
import time

from oslo_config import cfg
from oslo_log import log as logging
import tooz
from tooz import coordination

from vmms import config
from vmms.db.sqlalchemy.database import get_session
from vmms.db.sqlalchemy.models import VMMigration, MigrationState
from vmms.mistral_client import MistralClient

# Initialize logging
LOG = logging.getLogger(__name__)

def sd_notify(state: str):
    """Send a notification to systemd."""
    notify_socket = os.getenv("NOTIFY_SOCKET")
    if not notify_socket:
        return False

    # Abstract namespace socket if starts with '@'
    if notify_socket[0] == '@':
        notify_socket = '\0' + notify_socket[1:]

    sock = socket.socket(socket.AF_UNIX, socket.SOCK_DGRAM)
    try:
        sock.connect(notify_socket)
        sock.sendall(state.encode())
    except Exception as e:
        LOG.debug(f"🐛 Failed to notify systemd: {e}")
        return False
    finally:
        sock.close()
    return True


class WorkerService(object):
    """Main worker service for VM migration scheduling."""

    def __init__(self, CONF):
        self.CONF = CONF
        self.node_id = CONF.worker.my_hostname.encode('utf-8')
        self.group_name = b'vm_migration_workers'
        self.running = False
        self.coordinator = None
        self.scheduler_lock = None
        self._stop_event = threading.Event()

        # Initialize components
        self.mistral_client = MistralClient(CONF)

        # Setup signal handlers
        signal.signal(signal.SIGTERM, self._signal_handler)
        signal.signal(signal.SIGINT, self._signal_handler)

    def _signal_handler(self, signum, frame):
        """Handle shutdown signals."""
        LOG.info(f"⚙ VMMS worker is shutting down...")
        self.stop()

    def initialize_coordination(self):
        """Initialize tooz coordination."""
        if not self.CONF.worker.coordination_url:
            LOG.error(f"⧱ Coordination URL not configured")
            return False

        try:
            self.coordinator = coordination.get_coordinator(
                self.CONF.worker.coordination_url,
                self.node_id
            )
            self.coordinator.start()

            # Join the worker group
            try:
                self.coordinator.join_group(self.group_name).get()
            except coordination.GroupNotCreated:
                # Try to create the group (first worker)
                try:
                    self.coordinator.create_group(self.group_name).get()
                    self.coordinator.join_group(self.group_name).get()
                except coordination.GroupAlreadyExist:
                    # Another worker created it, join normally
                    self.coordinator.join_group(self.group_name).get()
            except coordination.ToozError as e:
                LOG.error(f"⧱ Failed to join coordination group: {e}")
                return False

            # Create scheduler lock
            self.scheduler_lock = self.coordinator.get_lock(b'vm_scheduler_lock')

            LOG.info(f"⚙ Successfully initialized coordination with node_id: {self.CONF.worker.my_hostname}")
            return True

        except coordination.ToozError as e:
            LOG.error(f"⧱ Tooz coordination error: {e}")
            return False
        except Exception as e:
            LOG.error(f"⧱ Failed to initialize coordination: {e}")
            return False

    def start(self):
        """Start the worker service."""
        LOG.info(f"⚙ VMMS worker is starting up...")
        LOG.info(f"⚙ VMMS worker Service (node_id: {self.CONF.worker.my_hostname})")

        if not self.initialize_coordination():
            LOG.error(f"⧱ Failed to initialize coordination, exiting")
            return False

        # Check if Mistral client initialized successfully
        if not self.mistral_client.client:
            LOG.error(f"⧱ Mistral client failed to initialize, exiting")
            return False

        self.running = True

        # Notify systemd that we're ready
        sd_notify("READY=1")
        LOG.info(f"⚙ VMMS worker is ready.")

        # Main worker loop
        while self.running and not self._stop_event.is_set():
            try:
                self.run_periodic_check()
                self._stop_event.wait(self.CONF.worker.check_interval)
            except Exception as e:
                LOG.error(f"⧱ Error in periodic check: {e}", exc_info=True)
                self._stop_event.wait(10)  # Wait before retry

        self.cleanup()
        return True

    def stop(self):
        """Stop the worker service."""
        LOG.info(f"⚙ VMMS worker is shutting down...")
        self.running = False
        self._stop_event.set()
        sd_notify("STOPPING=1")

    def cleanup(self):
        """Cleanup resources."""
        if self.coordinator:
            try:
                self.coordinator.leave_group(self.group_name).get()
                self.coordinator.stop()
            except Exception as e:
                LOG.warning(f"⚠ Error during coordinator cleanup: {e}")

    def parse_migration_result(self, execution_output):
        """Parse migration result from execution output.
        
        Looks for 'Migration successful.' or 'Migration failed.' in the output.
        Returns True for success, False for failure or undetermined result.
        """
        LOG.debug(f"🐛 Execution output type: {type(execution_output)}, content: {execution_output}")
        
        if not execution_output:
            LOG.error(f"⧱ Cannot parse migration result: No execution output provided")
            return False
            
        try:
            # Get the output from execution - check if it's already the output dict
            if isinstance(execution_output, dict):
                if 'output' in execution_output:
                    workflow_output = execution_output['output']
                    LOG.debug(f"🐛 Workflow output type: {type(workflow_output)}, content: {workflow_output}")
                else:
                    # Might be the output directly
                    workflow_output = execution_output
                    LOG.debug(f"🐛 Using execution_output as workflow_output directly")
            else:
                LOG.error(f"⧱ Execution output is not a dict: {type(execution_output)}")
                return False
            
            # Parse JSON string if workflow_output is a string
            if isinstance(workflow_output, str):
                import json
                try:
                    workflow_output = json.loads(workflow_output)
                    LOG.debug(f"🐛 Parsed JSON workflow output type: {type(workflow_output)}, content: {workflow_output}")
                except json.JSONDecodeError as e:
                    LOG.error(f"⧱ Failed to parse workflow output JSON: {e}")
                    return False
            elif not isinstance(workflow_output, dict):
                LOG.error(f"⧱ Workflow output is not a dict or JSON string: {type(workflow_output)}")
                return False
            
            # Get the SSH result
            if isinstance(workflow_output, dict):
                if 'stdout' in workflow_output:
                    # Direct stdout access (simplified case)
                    stdout = workflow_output['stdout']
                    LOG.debug(f"🐛 Stdout from workflow_output: '{stdout}'")
                elif 'result' in workflow_output:
                    ssh_result = workflow_output['result']
                    LOG.debug(f"🐛 SSH result type: {type(ssh_result)}, content: {ssh_result}")
                    
                    # Handle different result formats
                    if isinstance(ssh_result, dict):
                        stdout = ssh_result.get('stdout', '')
                        LOG.debug(f"🐛 Stdout from dict: '{stdout}'")
                    elif isinstance(ssh_result, str):
                        stdout = ssh_result
                        LOG.debug(f"🐛 Stdout as string: '{stdout}'")
                    else:
                        stdout = str(ssh_result)
                        LOG.debug(f"🐛 Stdout converted to string: '{stdout}'")
                else:
                    LOG.error(f"⧱ No stdout or result in workflow output")
                    return False
            else:
                LOG.error(f"⧱ Unexpected workflow output type after JSON parsing: {type(workflow_output)}")
                return False
            
            # Parse the output for migration result messages
            if stdout:
                lines = stdout.strip().split('\n')
                LOG.debug(f"🐛 Parsed lines: {lines}")
                
                # Check lines from bottom up for the result messages
                for line in reversed(lines):
                    line_stripped = line.strip()
                    if 'Migration successful.' in line_stripped:
                        LOG.debug(f"🐛 Migration script reported success in line: '{line_stripped}'")
                        return True
                    elif 'Migration failed.' in line_stripped:
                        LOG.debug(f"🐛 Migration script reported failure in line: '{line_stripped}'")
                        return False
                
                # No clear success/failure message found
                LOG.error(f"⧱ Migration script output found but no clear success/failure message. Last line: '{lines[-1].strip() if lines else 'NONE'}'")
                return False
            else:
                LOG.error(f"⧱ No stdout found in SSH result")
                return False
                
        except Exception as e:
            LOG.error(f"⧱ Error parsing migration result: {e}", exc_info=True)
            return False

    def run_periodic_check(self):
        """Run the periodic check cycle."""
        LOG.debug(f"🐛 Running periodic check cycle")

        # Always monitor running migrations
        try:
            self.monitor_running_migrations()
        except Exception as e:
            LOG.error(f"⧱ Error monitoring running migrations: {e}", exc_info=True)

        # Attempt to become scheduler if we can acquire the lock
        if self.attempt_scheduler_role():
            try:
                self.schedule_new_migrations()
            except Exception as e:
                LOG.error(f"⧱ Error scheduling new migrations: {e}", exc_info=True)
            finally:
                self.release_scheduler_role()

    def attempt_scheduler_role(self):
        """Attempt to acquire scheduler role."""
        if not self.scheduler_lock:
            return False

        try:
            return self.scheduler_lock.acquire(blocking=False)
        except Exception as e:
            LOG.warning(f"⚠ Failed to acquire scheduler lock: {e}")
            return False

    def release_scheduler_role(self):
        """Release scheduler role."""
        if self.scheduler_lock and self.scheduler_lock.acquired:
            try:
                self.scheduler_lock.release()
            except Exception as e:
                LOG.warning(f"⚠ Failed to release scheduler lock: {e}")

    def monitor_running_migrations(self):
        """Monitor MIGRATING migrations and update their status."""
        LOG.debug(f"🐛 Monitoring running migrations")

        session = get_session()
        try:
            # Find all migrations in MIGRATING state that we own
            migrating_migrations = session.query(VMMigration).filter(
                VMMigration.state == MigrationState.MIGRATING,
                VMMigration.workflow_exec.isnot(None)
            ).all()
            
            for migration in migrating_migrations:
                try:
                    self.check_and_update_migration_status(session, migration)
                except Exception as e:
                    LOG.error(f"⧱ Error checking migration {migration.id}: {e}")

        finally:
            session.close()

    def check_and_update_migration_status(self, session, migration):
        """Check workflow execution status and update migration state."""
        if not migration.workflow_exec:
            LOG.debug(f"🐛 Migration {migration.id} has no workflow execution, skipping")
            return

        try:
            LOG.debug(f"🐛 Checking workflow execution {migration.workflow_exec} for migration {migration.id}")

            execution = self.mistral_client.get_execution(migration.workflow_exec)
            if not execution:
                LOG.warning(f"⚠ Could not retrieve execution {migration.workflow_exec} for migration {migration.id}")
                return

            execution_state = execution.get('state', '').upper()
            LOG.debug(f"🐛 Migration {migration.id} workflow execution {migration.workflow_exec} is in state: {execution_state}")

            if execution_state in ['SUCCESS', 'DONE']:
                # Get execution output to check actual migration result
                execution_output = self.mistral_client.get_execution_output(migration.workflow_exec)
                
                if self.parse_migration_result(execution_output):
                    LOG.info(f"⚙ Migration {migration.id} completed successfully (execution: {migration.workflow_exec})")
                    migration.state = MigrationState.MIGRATED
                else:
                    LOG.info(f"⚙ Migration {migration.id} command failed (execution: {migration.workflow_exec})")
                    migration.state = MigrationState.ERROR
                    
                session.commit()
                LOG.info(f"⚙ Migration {migration.id} state updated to {migration.state}")

            elif execution_state in ['ERROR', 'FAILED']:
                LOG.info(f"⚙ Migration {migration.id} failed (execution: {migration.workflow_exec}, state: {execution_state})")
                migration.state = MigrationState.ERROR
                session.commit()
                LOG.info(f"⚙ Migration {migration.id} state updated to ERROR")
            else:
                LOG.debug(f"🐛 Migration {migration.id} execution {migration.workflow_exec} still running (state: {execution_state})")

        except Exception as e:
            LOG.error(f"⧱ Error checking execution {migration.workflow_exec} for migration {migration.id}: {e}", exc_info=True)

    def schedule_new_migrations(self):
        """Schedule new migrations based on availability and timing."""
        LOG.debug(f"🐛 Scheduling new migrations")

        available_slots = self.get_available_slots()
        if available_slots <= 0:
            LOG.debug(f"🐛 No available slots for scheduling")
            return

        # Process scheduled migrations matching current hour
        remaining_slots = self.process_scheduled_migrations(available_slots)

        # Process opportunistic migrations if slots remain and within window
        if remaining_slots > 0 and self.is_within_migration_window():
            self.process_opportunistic_migrations(remaining_slots)

    def get_available_slots(self):
        """Calculate available execution slots based on actual running executions."""
        try:
            # Get currently running executions from Mistral with timeout
            running_executions = self.mistral_client.list_executions(
                state='RUNNING'
            )
            
            if running_executions is None:
                LOG.warning(f"⚠ Failed to get running executions from Mistral, using default slots")
                return 0  # Return 0 to prevent scheduling when Mistral is unreachable
                
            current_running = len(running_executions)
            available_slots = self.CONF.worker.parallel_jobs - current_running
            
            # Prevent returning negative number of job in case
            # mistral_client.list_executions() returns more than
            # CONF.worker.parallel_jobs jobs.
            available_slots = max(0, available_slots)
            
            LOG.debug(f"🐛 Currently running: {current_running}, Available slots: {available_slots}")
            
            return available_slots
            
        except Exception as e:
            LOG.error(f"⧱ Error calculating available slots: {e}")
            # Safe fallback - return 0 to prevent overloading
            return 0

    def process_scheduled_migrations(self, slots_available):
        """Process SCHEDULED migrations matching current date and hour.
        
        Uses database LIMIT to restrict the number of migrations processed
        to the number of available slots, ensuring we don't exceed capacity.
        Only processes migrations scheduled for current date and hour (ignoring minutes/seconds).
        """
        from sqlalchemy import func
        
        current_datetime = datetime.datetime.now()
        current_date = current_datetime.date()
        current_hour = current_datetime.hour
        LOG.debug(f"🐛 Processing scheduled migrations for {current_date} hour {current_hour}, slots available: {slots_available}")

        session = get_session()
        try:
            # Find SCHEDULED migrations for current date and hour, limited to available slots
            # Compare date and hour separately to ignore minutes/seconds
            scheduled_migrations = session.query(VMMigration).filter(
                VMMigration.state == MigrationState.SCHEDULED,
                VMMigration.scheduled_time.isnot(None),
                func.date(VMMigration.scheduled_time) == current_date,
                func.extract('hour', VMMigration.scheduled_time) == current_hour
            ).limit(slots_available).all()

            processed_count = 0
            for migration in scheduled_migrations:
                if self.trigger_migration_workflow(session, migration):
                    processed_count += 1
                        
            LOG.info(f"⚙ Processed {processed_count} scheduled migrations")
            return slots_available - processed_count
            
        finally:
            session.close()

    def process_opportunistic_migrations(self, slots_available):
        """Process opportunistic migrations within time window.
        
        Uses database LIMIT to restrict the number of migrations processed
        to the number of available slots, ensuring we don't exceed capacity.
        """
        LOG.debug(f"🐛 Processing opportunistic migrations, slots available: {slots_available}")

        session = get_session()
        try:
            # Find any SCHEDULED migrations without specific schedule time, limited to available slots
            opportunistic_migrations = session.query(VMMigration).filter(
                VMMigration.state == MigrationState.SCHEDULED,
                VMMigration.scheduled_time.is_(None)
            ).limit(slots_available).all()
            
            processed_count = 0
            for migration in opportunistic_migrations:
                if self.trigger_migration_workflow(session, migration):
                    processed_count += 1
                    
            LOG.info(f"⚙ Processed {processed_count} opportunistic migrations")
            
        finally:
            session.close()

    def is_within_migration_window(self):
        """Check if current time is within allowed migration window and weekday."""
        try:
            now = datetime.datetime.now()
            
            # Parse time window
            start_time = datetime.datetime.strptime(
                self.CONF.worker.migration_start_time, '%H:%M').time()
            end_time = datetime.datetime.strptime(
                self.CONF.worker.migration_end_time, '%H:%M').time()
            
            # Handle overnight windows (e.g., 22:00 to 05:00)
            if start_time > end_time:
                # Overnight window
                current_time = now.time()
                within_time = current_time >= start_time or current_time <= end_time
                
                if within_time:
                    # Check if this overnight period crosses into weekend
                    if current_time >= start_time:
                        # We're in the evening portion - check if tomorrow is weekend
                        tomorrow = now + datetime.timedelta(days=1)
                        if now.weekday() >= 6 or tomorrow.weekday() >= 5:
                            LOG.debug(f"🐛 Skipping migration: overnight window crosses into weekend")
                            return False
                    else:
                        # We're in the morning portion - check if today is weekend
                        if now.weekday() >= 5:  # Today is Saturday or Sunday
                            LOG.debug(f"🐛 Skipping migration: morning portion of overnight window on weekend")
                            return False
            else:
                # Same-day window
                current_time = now.time()
                within_time = start_time <= current_time <= end_time
                
                if within_time:
                    # Check if today is weekend
                    if now.weekday() >= 5:  # Saturday or Sunday
                        LOG.debug(f"🐛 Skipping migration: same-day window on weekend")
                        return False
            
            if not within_time:
                LOG.debug(f"🐛 Outside migration time window ({self.CONF.worker.migration_start_time}-{self.CONF.worker.migration_end_time})")
                
            return within_time
                
        except Exception as e:
            LOG.warning(f"⚠ Error parsing migration window times: {e}")
            return False  # Safe default

    def trigger_migration_workflow(self, session, migration):
        """Trigger Mistral workflow for a migration."""
        try:
            # Prepare workflow input (only vm_id, not vm_name)
            workflow_input = {
                'vm_id': migration.vm_id
            }

            # Start workflow execution
            execution = self.mistral_client.start_workflow(workflow_input)

            if not execution:
                LOG.error(f"⧱ Failed to start workflow for migration {migration.id}")
                return False

            execution_id = execution.get('id')
            if not execution_id:
                LOG.error(f"⧱ Workflow execution missing ID for migration {migration.id}")
                return False

            # Update migration record
            migration.state = MigrationState.MIGRATING
            migration.workflow_exec = execution_id
            session.commit()

            LOG.info(f"⚙ Started migration {migration.id} with workflow execution {execution_id}")
            return True

        except Exception as e:
            LOG.error(f"⧱ Error triggering workflow for migration {migration.id}: {e}", exc_info=True)
            return False


def main():
    """Main entry point for the worker service."""
    try:
        # Initialize configuration
        CONF = config.init_config()

        # Setup logging
        logging.setup(CONF, 'vmms-worker')
        LOG.info(f"⚙ VMMS worker is starting up...")

        # Create and start worker service
        worker = WorkerService(CONF)

        # Start the worker
        success = worker.start()

        if success:
            LOG.info(f"⚙ VMMS worker stopped normally")
            sys.exit(0)
        else:
            LOG.error(f"⧱ VMMS worker failed to start")
            sys.exit(1)

    except Exception as e:
        LOG.error(f"⧱ Fatal error in worker service: {e}", exc_info=True)
        sys.exit(1)
