"""
Mock ServiceNow Application Data Generator
Generates 1000 enterprise-grade applications for a financial services company
"""

import json
import random
import uuid
from datetime import datetime, timedelta
from typing import Any
from faker import Faker

fake = Faker()
Faker.seed(42)
random.seed(42)

# Enterprise Application Categories for Financial Services
APP_CATEGORIES = {
    "Trading & Markets": {
        "prefix": ["Alpha", "Omega", "Delta", "Sigma", "Quant", "Trade", "Market", "FX", "Equity", "Fixed"],
        "suffix": ["Engine", "Platform", "Hub", "Gateway", "Suite", "System", "Pro", "Edge", "Core", "Flow"],
        "base_names": [
            "Algorithmic Trading Engine", "Real-Time Market Data Feed", "Order Management System",
            "Trade Execution Platform", "Market Risk Monitor", "Portfolio Analytics Suite",
            "Derivatives Pricing Engine", "FX Trading Terminal", "Equity Research Platform",
            "Fixed Income Trading System", "Options Valuation Engine", "High-Frequency Trading Platform"
        ]
    },
    "Risk & Compliance": {
        "prefix": ["Risk", "Comply", "Audit", "Guard", "Shield", "Sentinel", "Vigilant", "Secure", "Trust", "Safe"],
        "suffix": ["Manager", "Monitor", "Tracker", "Analyzer", "Hub", "Control", "Watch", "Guard", "Shield", "Pro"],
        "base_names": [
            "Enterprise Risk Management", "Regulatory Compliance Suite", "Anti-Money Laundering System",
            "Know Your Customer Platform", "Fraud Detection Engine", "Credit Risk Analyzer",
            "Operational Risk Manager", "Basel III Compliance Suite", "SOX Compliance Tracker",
            "GDPR Data Protection Manager", "Market Abuse Surveillance", "Transaction Monitoring System"
        ]
    },
    "Core Banking": {
        "prefix": ["Core", "Bank", "Fin", "Capital", "Ledger", "Account", "Prime", "Central", "Global", "Universal"],
        "suffix": ["Banking", "Ledger", "Core", "System", "Platform", "Hub", "Suite", "Engine", "Central", "Pro"],
        "base_names": [
            "Core Banking Platform", "General Ledger System", "Account Management Suite",
            "Payment Processing Engine", "Wire Transfer System", "ACH Processing Platform",
            "SWIFT Messaging Gateway", "Correspondent Banking Hub", "Treasury Management System",
            "Cash Management Platform", "Liquidity Management Suite", "Reconciliation Engine"
        ]
    },
    "Customer Relationship": {
        "prefix": ["Client", "Wealth", "Advisor", "Relation", "Connect", "Engage", "Prospect", "Lead", "Sales", "Service"],
        "suffix": ["360", "Hub", "Connect", "Manager", "Portal", "Suite", "CRM", "Platform", "Central", "Pro"],
        "base_names": [
            "Wealth Management Platform", "Client Onboarding System", "Relationship Manager Portal",
            "Financial Advisor Dashboard", "Client Portfolio Viewer", "Investment Advisory Platform",
            "Private Banking Suite", "HNW Client Management", "Retirement Planning System",
            "Estate Planning Platform", "Insurance Distribution Hub", "Loan Origination System"
        ]
    },
    "Data & Analytics": {
        "prefix": ["Data", "Insight", "Analytics", "Intel", "Metrics", "Report", "Dash", "Visual", "BI", "Smart"],
        "suffix": ["Lake", "Warehouse", "Analytics", "Insights", "Hub", "Vision", "Studio", "Intelligence", "Metrics", "Pro"],
        "base_names": [
            "Enterprise Data Warehouse", "Business Intelligence Platform", "Financial Reporting Suite",
            "Regulatory Reporting System", "Management Dashboard", "Performance Analytics Engine",
            "Data Quality Manager", "Master Data Management", "Data Governance Platform",
            "Real-Time Analytics Hub", "Predictive Analytics Engine", "Customer Analytics Suite"
        ]
    },
    "Infrastructure & Security": {
        "prefix": ["Infra", "Cloud", "Secure", "Cyber", "Net", "System", "Platform", "Enterprise", "Global", "Core"],
        "suffix": ["Guard", "Shield", "Secure", "Manager", "Monitor", "Hub", "Control", "Ops", "Central", "Pro"],
        "base_names": [
            "Identity Access Management", "Single Sign-On Gateway", "Privileged Access Manager",
            "Security Operations Center", "Vulnerability Scanner", "Endpoint Protection Suite",
            "Network Security Monitor", "Cloud Security Posture", "Data Loss Prevention",
            "Encryption Key Manager", "Certificate Lifecycle Manager", "Disaster Recovery Platform"
        ]
    },
    "Human Capital": {
        "prefix": ["People", "HR", "Talent", "Work", "Team", "Employee", "Staff", "Workforce", "Human", "Career"],
        "suffix": ["Hub", "Central", "Manager", "Suite", "Portal", "Connect", "Flow", "Platform", "Pro", "360"],
        "base_names": [
            "Human Capital Management", "Talent Acquisition Platform", "Performance Management Suite",
            "Learning Management System", "Compensation Planning Tool", "Benefits Administration",
            "Time & Attendance Tracker", "Workforce Analytics", "Employee Self-Service Portal",
            "Succession Planning System", "Onboarding Automation", "Payroll Processing Engine"
        ]
    },
    "Operations & Workflow": {
        "prefix": ["Work", "Flow", "Process", "Auto", "Task", "Queue", "Ticket", "Service", "Ops", "Action"],
        "suffix": ["Flow", "Automation", "Manager", "Hub", "Engine", "Central", "Queue", "Desk", "Pro", "Suite"],
        "base_names": [
            "Business Process Automation", "Workflow Orchestration Engine", "Document Management System",
            "Electronic Signature Platform", "Contract Lifecycle Manager", "Vendor Management System",
            "Procurement Automation", "Invoice Processing Engine", "Expense Management Platform",
            "Travel Booking System", "Facilities Management", "Asset Tracking System"
        ]
    },
    "Integration & API": {
        "prefix": ["API", "Connect", "Link", "Bridge", "Sync", "Hub", "Gate", "Flow", "Exchange", "Mesh"],
        "suffix": ["Gateway", "Hub", "Bridge", "Connector", "Mesh", "Bus", "Layer", "Platform", "Central", "Pro"],
        "base_names": [
            "Enterprise Service Bus", "API Gateway Platform", "Integration Hub",
            "Message Queue System", "Event Streaming Platform", "Data Integration Engine",
            "B2B Gateway", "Partner Integration Hub", "Legacy System Connector",
            "Microservices Mesh", "Real-Time Event Broker", "File Transfer Gateway"
        ]
    },
    "Finance & Accounting": {
        "prefix": ["Finance", "Ledger", "Account", "Budget", "Cost", "Revenue", "Profit", "Asset", "Tax", "Audit"],
        "suffix": ["Manager", "Tracker", "Suite", "Pro", "Central", "Hub", "Engine", "Platform", "Control", "360"],
        "base_names": [
            "Financial Close Automation", "Accounts Payable System", "Accounts Receivable Platform",
            "Fixed Asset Management", "Budget Planning Suite", "Cost Allocation Engine",
            "Revenue Recognition System", "Intercompany Settlement", "Tax Calculation Engine",
            "Audit Trail Manager", "Financial Consolidation", "Profitability Analytics"
        ]
    }
}

# Technology stacks used in financial services
TECH_STACKS = [
    {"primary": "Java", "framework": "Spring Boot", "database": "Oracle", "infrastructure": "On-Premise"},
    {"primary": "Java", "framework": "Spring", "database": "DB2", "infrastructure": "Mainframe"},
    {"primary": ".NET", "framework": "ASP.NET Core", "database": "SQL Server", "infrastructure": "Azure"},
    {"primary": "Python", "framework": "Django", "database": "PostgreSQL", "infrastructure": "AWS"},
    {"primary": "Python", "framework": "FastAPI", "database": "MongoDB", "infrastructure": "GCP"},
    {"primary": "Node.js", "framework": "Express", "database": "PostgreSQL", "infrastructure": "AWS"},
    {"primary": "COBOL", "framework": "CICS", "database": "DB2", "infrastructure": "Mainframe"},
    {"primary": "C++", "framework": "Custom", "database": "Oracle", "infrastructure": "On-Premise"},
    {"primary": "Scala", "framework": "Akka", "database": "Cassandra", "infrastructure": "AWS"},
    {"primary": "Go", "framework": "Gin", "database": "PostgreSQL", "infrastructure": "Kubernetes"},
]

# Vendors commonly used in financial services
VENDORS = [
    "FIS Global", "Temenos", "Finastra", "SS&C Technologies", "Broadridge",
    "Bloomberg", "Refinitiv", "FactSet", "Moody's Analytics", "S&P Global",
    "Oracle Financial Services", "SAP", "Salesforce", "ServiceNow", "Workday",
    "Microsoft", "IBM", "Infosys", "TCS", "Wipro", "Accenture", "Deloitte",
    "In-House Development", "Custom Built", "Open Source"
]

# Business criticality levels
CRITICALITY_LEVELS = ["Critical", "High", "Medium", "Low"]
CRITICALITY_WEIGHTS = [0.15, 0.30, 0.40, 0.15]

# Automation potential factors
AUTOMATION_FACTORS = {
    "data_entry": {"weight": 0.20, "description": "Manual data entry and processing"},
    "document_processing": {"weight": 0.18, "description": "Document reading and extraction"},
    "reporting": {"weight": 0.15, "description": "Report generation and distribution"},
    "customer_interaction": {"weight": 0.12, "description": "Customer queries and support"},
    "compliance_checks": {"weight": 0.10, "description": "Regulatory compliance verification"},
    "reconciliation": {"weight": 0.08, "description": "Data matching and reconciliation"},
    "approval_workflows": {"weight": 0.07, "description": "Multi-level approval processes"},
    "notifications": {"weight": 0.05, "description": "Alert and notification management"},
    "scheduling": {"weight": 0.03, "description": "Task scheduling and coordination"},
    "monitoring": {"weight": 0.02, "description": "System and process monitoring"},
}


def generate_app_name(category: str, index: int) -> str:
    """Generate a unique enterprise application name"""
    cat_data = APP_CATEGORIES[category]
    
    if index < len(cat_data["base_names"]):
        return cat_data["base_names"][index]
    
    prefix = random.choice(cat_data["prefix"])
    suffix = random.choice(cat_data["suffix"])
    version = random.choice(["", " 2.0", " 3.0", " Enterprise", " Professional", " Ultimate"])
    
    return f"{prefix}{suffix}{version}"


def generate_description(app_name: str, category: str) -> str:
    """Generate a detailed application description"""
    descriptions = {
        "Trading & Markets": [
            f"{app_name} is a mission-critical trading platform that handles real-time order execution, market data processing, and trade lifecycle management. It supports multi-asset class trading including equities, fixed income, derivatives, and foreign exchange. The system processes over 1 million transactions daily with sub-millisecond latency requirements.",
            f"{app_name} provides sophisticated algorithmic trading capabilities with smart order routing, execution analytics, and transaction cost analysis. It integrates with major exchanges globally and supports FIX protocol messaging.",
            f"{app_name} enables portfolio managers and traders to analyze market conditions, manage positions, and execute complex trading strategies across global markets with real-time risk monitoring."
        ],
        "Risk & Compliance": [
            f"{app_name} is an enterprise risk management solution that provides comprehensive risk identification, assessment, and monitoring capabilities. It supports credit risk, market risk, operational risk, and liquidity risk management in accordance with Basel III/IV requirements.",
            f"{app_name} automates regulatory compliance processes including KYC verification, AML screening, and sanctions checking. It maintains full audit trails and generates regulatory reports for multiple jurisdictions.",
            f"{app_name} monitors transactions in real-time to detect suspicious activities, potential fraud patterns, and market abuse scenarios using advanced analytics and machine learning models."
        ],
        "Core Banking": [
            f"{app_name} is the backbone of our banking operations, managing customer accounts, deposits, loans, and core financial transactions. It processes millions of transactions daily while ensuring data integrity and regulatory compliance.",
            f"{app_name} handles payment processing including wire transfers, ACH transactions, and real-time payments. It integrates with SWIFT network and supports ISO 20022 messaging standards.",
            f"{app_name} provides comprehensive treasury and cash management capabilities including cash positioning, forecasting, and liquidity optimization across multiple currencies and entities."
        ],
        "Customer Relationship": [
            f"{app_name} is a comprehensive wealth management platform serving high-net-worth individuals and institutional clients. It provides portfolio management, financial planning, and investment advisory capabilities.",
            f"{app_name} manages the entire client lifecycle from onboarding through ongoing relationship management. It includes KYC/AML integration, document management, and client communication tools.",
            f"{app_name} enables financial advisors to manage client relationships, track investment performance, and provide personalized recommendations based on client goals and risk tolerance."
        ],
        "Data & Analytics": [
            f"{app_name} is our enterprise data warehouse solution that consolidates data from multiple source systems for reporting and analytics. It supports both structured and unstructured data with near real-time ingestion capabilities.",
            f"{app_name} provides business intelligence and visualization capabilities for executive dashboards, operational metrics, and ad-hoc analysis. It includes pre-built financial services analytics.",
            f"{app_name} generates regulatory reports for multiple jurisdictions including SEC, FINRA, Fed, OCC, and international regulators. It ensures data accuracy and timely submission."
        ],
        "Infrastructure & Security": [
            f"{app_name} manages identity and access controls across the enterprise, ensuring only authorized users can access sensitive systems and data. It supports multi-factor authentication, role-based access control, and privileged access management.",
            f"{app_name} monitors security events and threats across the enterprise infrastructure, providing real-time alerting and incident response capabilities for our security operations center.",
            f"{app_name} protects sensitive financial data through encryption, tokenization, and data loss prevention controls. It ensures compliance with PCI-DSS, SOX, and data privacy regulations."
        ],
        "Human Capital": [
            f"{app_name} is our core HR platform managing employee data, organizational hierarchy, and HR transactions. It integrates with payroll, benefits, and time management systems.",
            f"{app_name} supports the full talent lifecycle from recruitment through retirement, including performance management, learning, compensation planning, and succession planning.",
            f"{app_name} provides employee self-service capabilities for time off requests, benefits enrollment, expense reporting, and other HR transactions."
        ],
        "Operations & Workflow": [
            f"{app_name} automates business processes across the organization, reducing manual effort and improving operational efficiency. It includes workflow design, task management, and process analytics.",
            f"{app_name} manages the document lifecycle including creation, review, approval, storage, and retention. It supports electronic signatures and integrates with content repositories.",
            f"{app_name} handles vendor and contract management including procurement, invoicing, and payment processing. It enforces spending controls and ensures compliance with procurement policies."
        ],
        "Integration & API": [
            f"{app_name} is our enterprise integration platform connecting internal systems with external partners, vendors, and market data providers. It supports multiple protocols and message formats.",
            f"{app_name} provides API management capabilities including design, publishing, security, and analytics. It enables secure exposure of internal services to partners and clients.",
            f"{app_name} handles high-volume event streaming and message processing for real-time data distribution across the enterprise architecture."
        ],
        "Finance & Accounting": [
            f"{app_name} manages the financial close process including journal entries, account reconciliations, and financial statement preparation. It ensures accuracy and timeliness of financial reporting.",
            f"{app_name} automates accounts payable processing including invoice receipt, matching, approval routing, and payment execution. It includes fraud detection and duplicate payment prevention.",
            f"{app_name} calculates and manages tax obligations across multiple jurisdictions, ensuring compliance with tax regulations and optimizing tax positions."
        ]
    }
    
    return random.choice(descriptions.get(category, [f"{app_name} is an enterprise application supporting {category} functions."]))


def generate_dependencies(all_apps: list[dict], current_index: int) -> list[str]:
    """Generate realistic application dependencies"""
    if current_index == 0 or not all_apps:
        return []
    
    num_deps = random.choices([0, 1, 2, 3, 4, 5], weights=[0.1, 0.25, 0.30, 0.20, 0.10, 0.05])[0]
    available_apps = all_apps[:current_index]
    
    if num_deps > len(available_apps):
        num_deps = len(available_apps)
    
    dependencies = random.sample(available_apps, num_deps)
    return [dep["app_id"] for dep in dependencies]


def calculate_automation_potential(app_data: dict) -> dict:
    """Calculate automation potential based on app characteristics"""
    base_score = random.uniform(0.2, 0.9)
    
    # Adjust based on technology stack
    tech = app_data.get("technology_stack", {})
    if tech.get("primary") in ["COBOL", "C++"]:
        base_score *= 0.7  # Legacy systems harder to automate
    elif tech.get("primary") in ["Python", "Node.js"]:
        base_score *= 1.1  # Modern stacks easier to integrate
    
    # Adjust based on criticality
    if app_data.get("business_criticality") == "Critical":
        base_score *= 0.85  # More caution with critical systems
    
    # Generate automation opportunities
    opportunities = {}
    for factor, details in AUTOMATION_FACTORS.items():
        if random.random() > 0.3:
            factor_score = random.uniform(0.3, 1.0)
            opportunities[factor] = {
                "score": round(factor_score, 2),
                "description": details["description"],
                "weight": details["weight"]
            }
    
    # Calculate weighted score
    total_weight = sum(opp["weight"] for opp in opportunities.values())
    weighted_score = sum(opp["score"] * opp["weight"] for opp in opportunities.values()) / max(total_weight, 0.01)
    
    return {
        "overall_score": round(min(base_score * weighted_score * 1.2, 1.0), 2),
        "opportunities": opportunities
    }


def calculate_roi_potential(app_data: dict, automation_data: dict) -> dict:
    """Calculate potential ROI from automation"""
    # Base FTE hours per year
    base_fte_hours = random.randint(500, 5000)
    
    # Hourly cost (blended rate for financial services)
    hourly_cost = random.uniform(75, 200)
    
    # Calculate potential savings
    automation_score = automation_data["overall_score"]
    potential_hours_saved = base_fte_hours * automation_score * random.uniform(0.3, 0.7)
    
    # Implementation costs
    complexity_factor = 1.0 if app_data["business_criticality"] == "Low" else \
                       1.5 if app_data["business_criticality"] == "Medium" else \
                       2.0 if app_data["business_criticality"] == "High" else 3.0
    
    implementation_cost = potential_hours_saved * hourly_cost * complexity_factor * random.uniform(0.8, 1.5)
    
    annual_savings = potential_hours_saved * hourly_cost
    roi_percentage = ((annual_savings - (implementation_cost / 3)) / max(implementation_cost, 1)) * 100
    payback_months = (implementation_cost / max(annual_savings / 12, 1))
    
    return {
        "current_manual_hours_per_year": round(base_fte_hours),
        "potential_hours_saved": round(potential_hours_saved),
        "hourly_cost_usd": round(hourly_cost, 2),
        "annual_savings_usd": round(annual_savings),
        "implementation_cost_usd": round(implementation_cost),
        "roi_percentage": round(roi_percentage, 1),
        "payback_months": round(payback_months, 1),
        "net_present_value_3yr": round(annual_savings * 3 * 0.85 - implementation_cost),
        "risk_adjusted_roi": round(roi_percentage * (1 - complexity_factor * 0.1), 1)
    }


def generate_application(index: int, all_apps: list[dict]) -> dict:
    """Generate a single application with all metadata"""
    category = random.choice(list(APP_CATEGORIES.keys()))
    app_name = generate_app_name(category, index % 15)
    
    # Add variation to avoid duplicates
    if index > 14:
        variation = random.choice([
            f" v{random.randint(2, 5)}.{random.randint(0, 9)}",
            f" ({fake.company_suffix()})",
            f" - {fake.city()}",
            f" {random.choice(['Enterprise', 'Professional', 'Ultimate', 'Standard', 'Premium'])}",
            f" {random.randint(2020, 2024)}",
        ])
        app_name = app_name + variation
    
    tech_stack = random.choice(TECH_STACKS)
    criticality = random.choices(CRITICALITY_LEVELS, weights=CRITICALITY_WEIGHTS)[0]
    
    app_data = {
        "app_id": f"APP{str(index + 1).zfill(5)}",
        "sys_id": str(uuid.uuid4()),
        "name": app_name,
        "category": category,
        "subcategory": random.choice(["Core", "Support", "Ancillary", "Strategic", "Operational"]),
        "description": generate_description(app_name, category),
        "business_criticality": criticality,
        "lifecycle_stage": random.choice(["Production", "Development", "Sunset", "Pilot", "Retired"]),
        "technology_stack": tech_stack,
        "vendor": random.choice(VENDORS),
        "owner": fake.name(),
        "owner_email": fake.company_email(),
        "department": random.choice([
            "Investment Banking", "Retail Banking", "Wealth Management",
            "Risk Management", "Compliance", "Operations", "Technology",
            "Finance", "Human Resources", "Legal", "Marketing"
        ]),
        "cost_center": f"CC{random.randint(1000, 9999)}",
        "annual_cost_usd": random.randint(50000, 5000000),
        "user_count": random.randint(10, 5000),
        "transaction_volume_daily": random.randint(100, 10000000),
        "data_classification": random.choice(["Public", "Internal", "Confidential", "Highly Confidential"]),
        "regulatory_scope": random.sample([
            "SOX", "PCI-DSS", "GDPR", "CCPA", "Basel III", "Dodd-Frank", 
            "MiFID II", "FINRA", "SEC", "OCC", "Fed"
        ], k=random.randint(0, 5)),
        "last_assessment_date": (datetime.now() - timedelta(days=random.randint(30, 365))).isoformat(),
        "next_review_date": (datetime.now() + timedelta(days=random.randint(30, 365))).isoformat(),
        "sla_availability": f"{random.uniform(99.0, 99.99):.2f}%",
        "recovery_time_objective": random.choice(["< 1 hour", "< 4 hours", "< 8 hours", "< 24 hours", "< 72 hours"]),
        "recovery_point_objective": random.choice(["0 minutes", "< 15 minutes", "< 1 hour", "< 4 hours", "< 24 hours"]),
        "dependencies": generate_dependencies(all_apps, index),
        "integration_count": random.randint(0, 25),
        "api_endpoints": random.randint(0, 100),
        "created_date": (datetime.now() - timedelta(days=random.randint(365, 3650))).isoformat(),
        "last_updated": (datetime.now() - timedelta(days=random.randint(1, 90))).isoformat(),
    }
    
    # Add automation and ROI data
    automation_data = calculate_automation_potential(app_data)
    roi_data = calculate_roi_potential(app_data, automation_data)
    
    app_data["automation_potential"] = automation_data
    app_data["roi_analysis"] = roi_data
    
    return app_data


def generate_all_applications(count: int = 1000) -> list[dict]:
    """Generate all applications"""
    print(f"[*] Generating {count} enterprise applications...")
    applications = []
    
    for i in range(count):
        app = generate_application(i, applications)
        applications.append(app)
        
        if (i + 1) % 100 == 0:
            print(f"    Generated {i + 1}/{count} applications...")
    
    print(f"[OK] Successfully generated {count} applications")
    return applications


def save_applications(applications: list[dict], filepath: str = "data/servicenow_apps.json"):
    """Save applications to JSON file"""
    with open(filepath, 'w', encoding='utf-8') as f:
        json.dump({
            "metadata": {
                "generated_at": datetime.now().isoformat(),
                "total_count": len(applications),
                "organization": "Global Financial Services Corp",
                "version": "1.0"
            },
            "applications": applications
        }, f, indent=2)
    print(f"[OK] Saved applications to {filepath}")


if __name__ == "__main__":
    apps = generate_all_applications(1000)
    save_applications(apps)
    
    # Print sample
    print("\n[Sample Application]")
    print(json.dumps(apps[0], indent=2))

