<?php
/**
 * Enhanced Data Mapper Class with Conditional Quality Filtering
 * /public/import/classes/DataMapper.php
 */

if (!defined('IMPORT_ACCESS')) {
    die('Direct access not allowed');
}

class DataMapper {
    private $logger;
    private $db;
    private $importOptions;
    
    public function __construct($database, $logger, $importOptions = []) {
        $this->db = $database;
        $this->logger = $logger;
        $this->importOptions = array_merge([
            'skip_unparseable_hours' => false,
            'strict_quality_mode' => false
        ], $importOptions);
    }
    
    /**
     * Map raw OutScraper record to store database format with conditional quality filtering
     */
    public function mapRecord($record) {
        if (!$this->validateRequiredFields($record)) {
            return false;
        }
        
        // Only apply strict quality standards if enabled
        if ($this->importOptions['strict_quality_mode']) {
            if (!$this->validateQualityStandards($record)) {
                return false;
            }
        }
        
        $storeId = $this->generateStoreId();
        
        $mappedData = array_merge(STORE_DEFAULTS, [
            'store_id' => $storeId,
            'name' => $this->cleanText($record['name'], IMPORT_SETTINGS['max_name_length']),
            'email' => $this->cleanEmail($record['email_1'] ?? ''),
            'website' => $this->cleanUrl($record['site'] ?? ''),
            'phone' => $this->cleanPhone($record['phone'] ?? ''),
            'description' => $this->extractDescription($record),
            'address' => $this->cleanText($record['street'] ?? '', IMPORT_SETTINGS['max_address_length']),
            'city' => $this->cleanText($record['city'] ?? '', 50),
            'state' => $this->convertStateToAbbreviation($record['state'] ?? ''),
            'country' => $this->cleanText($record['country_code'] ?? 'US', 50),
            'postal_code' => $this->cleanPostalCode($record['postal_code'] ?? ''),
            'latitude' => $this->cleanCoordinate($record['latitude'] ?? ''),
            'longitude' => $this->cleanCoordinate($record['longitude'] ?? ''),
            'categories' => $this->cleanText($record['category'] ?? '', 500),
            'image' => $this->cleanUrl($record['photo'] ?? ''),
            'image_type' => $this->determineImageType($record['photo'] ?? ''),
            'x_twitter' => $this->cleanSocialUrl($record['twitter'] ?? ''),
            'youtube' => $this->cleanSocialUrl($record['youtube'] ?? ''),
            'linkedin' => $this->cleanSocialUrl($record['linkedin'] ?? ''),
            'facebook' => $this->cleanSocialUrl($record['facebook'] ?? ''),
            'instagram' => $this->cleanSocialUrl($record['instagram'] ?? ''),
            'tiktok' => $this->cleanSocialUrl($record['tiktok'] ?? ''),
            'snapchat' => $this->cleanSocialUrl($record['snapchat'] ?? ''),
            'store_timezone' => $this->db->getTimezoneId($record['time_zone'] ?? '')
        ]);
        
        return $this->validateMappedData($mappedData) ? $mappedData : false;
    }
    
    /**
     * Enhanced quality validation for premium business directory (only when strict mode enabled)
     */
    private function validateQualityStandards($record) {
        $businessName = $record['name'] ?? 'Unknown Business';
        
        // Require proper website (domain-based, not social media)
        $website = trim($record['site'] ?? '');
        if (empty($website)) {
            $this->logger->logSkipped("No website provided (strict mode)", $businessName);
            return false;
        }
        
        if (!$this->isValidBusinessWebsite($website)) {
            $this->logger->logSkipped("Invalid website: not a proper domain (strict mode)", $businessName);
            return false;
        }
        
        // Require phone number
        $phone = trim($record['phone'] ?? '');
        if (empty($phone)) {
            $this->logger->logSkipped("No phone number provided (strict mode)", $businessName);
            return false;
        }
        
        // Require complete address
        $street = trim($record['street'] ?? '');
        $city = trim($record['city'] ?? '');
        $state = trim($record['state'] ?? '');
        $postalCode = trim($record['postal_code'] ?? '');
        
        if (empty($street) || empty($city) || empty($state)) {
            $this->logger->logSkipped("Incomplete address (missing street, city, or state) (strict mode)", $businessName);
            return false;
        }
        
        // Require coordinates for accurate mapping
        $lat = trim($record['latitude'] ?? '');
        $lng = trim($record['longitude'] ?? '');
        if (empty($lat) || empty($lng) || !is_numeric($lat) || !is_numeric($lng)) {
            $this->logger->logSkipped("Missing or invalid coordinates (strict mode)", $businessName);
            return false;
        }
        
        // Require minimum rating or reviews (strict mode only)
        $rating = floatval($record['rating'] ?? 0);
        if ($rating > 0 && $rating < 3.0) {
            $this->logger->logSkipped("Rating too low: {$rating} (strict mode)", $businessName);
            return false;
        }
        
        return true;
    }
    
    /**
     * Validate business website quality (strict mode only)
     */
    private function isValidBusinessWebsite($url) {
        $url = strtolower(trim($url));
        
        // Remove protocol for checking
        $cleanUrl = preg_replace('/^https?:\/\//', '', $url);
        $cleanUrl = preg_replace('/^www\./', '', $cleanUrl);
        
        // Reject social media and directory sites
        $invalidDomains = [
            'facebook.com', 'fb.com', 'instagram.com', 'twitter.com', 'x.com',
            'linkedin.com', 'youtube.com', 'tiktok.com', 'snapchat.com',
            'yelp.com', 'google.com', 'maps.google.com', 'plus.google.com',
            'yellowpages.com', 'superpages.com', 'whitepages.com',
            'foursquare.com', 'nextdoor.com', 'thumbtack.com', 'angie.com',
            'homeadvisor.com', 'bbb.org'
        ];
        
        foreach ($invalidDomains as $domain) {
            if (strpos($cleanUrl, $domain) === 0) {
                return false;
            }
        }
        
        // Must have proper domain structure
        if (!preg_match('/^[a-z0-9]([a-z0-9\-]{0,61}[a-z0-9])?\.([a-z]{2,})(\/|$)/', $cleanUrl)) {
            return false;
        }
        
        // Reject obvious placeholder or invalid domains
        $invalidPatterns = [
            'example.com', 'test.com', 'placeholder.com', 'domain.com',
            'yoursite.com', 'website.com', 'coming-soon.com'
        ];
        
        foreach ($invalidPatterns as $pattern) {
            if (strpos($cleanUrl, $pattern) !== false) {
                return false;
            }
        }
        
        return true;
    }
    
    /**
     * Validate required fields are present and valid (always applies)
     */
    private function validateRequiredFields($record) {
        $errors = [];
        
        if (empty(trim($record['name'] ?? ''))) {
            $errors[] = 'name';
        }
        
        if (empty(trim($record['street'] ?? ''))) {
            $errors[] = 'street address';
        }
        
        if (empty(trim($record['city'] ?? ''))) {
            $errors[] = 'city';
        }
        
        if (!empty($errors)) {
            $businessName = $record['name'] ?? 'Unknown Business';
            $this->logger->logSkipped("Missing required fields: " . implode(', ', $errors), $businessName);
            return false;
        }
        
        return true;
    }
    
    /**
     * Extract hours data with preference for csv_compatible format
     */
    public function extractHoursData($record) {
        // Prioritize csv_compatible format as most reliable
        $csvHours = $record['working_hours_csv_compatible'] ?? '';
        if (!empty($csvHours)) {
            return [
                'working_hours_csv_compatible' => $csvHours,
                'working_hours' => $record['working_hours'] ?? '',
                'working_hours_old_format' => $record['working_hours_old_format'] ?? ''
            ];
        }
        
        // Fallback to old format if csv not available
        $oldFormat = $record['working_hours_old_format'] ?? '';
        if (!empty($oldFormat)) {
            return [
                'working_hours_csv_compatible' => $this->convertOldFormatToCsv($oldFormat),
                'working_hours' => $record['working_hours'] ?? '',
                'working_hours_old_format' => $oldFormat
            ];
        }
        
        // Last resort: try to parse JSON format
        return [
            'working_hours_csv_compatible' => $this->convertJsonToCsv($record['working_hours'] ?? ''),
            'working_hours' => $record['working_hours'] ?? '',
            'working_hours_old_format' => ''
        ];
    }
    
    /**
     * Convert old format to CSV compatible format
     */
    private function convertOldFormatToCsv($oldFormat) {
        if (empty($oldFormat)) return '';
        
        // Convert "Monday:9:00 AM - 5:00 PM|Tuesday:..." to "Monday,9:00 AM - 5:00 PM|Tuesday,..."
        return preg_replace('/([A-Za-z]+):/', '$1,', $oldFormat);
    }
    
    /**
     * Convert JSON format to CSV compatible format
     */
    private function convertJsonToCsv($jsonHours) {
        if (empty($jsonHours)) return '';
        
        $decoded = json_decode($jsonHours, true);
        if (!is_array($decoded)) return '';
        
        $csvParts = [];
        foreach ($decoded as $day => $hours) {
            $csvParts[] = $day . ',' . $hours;
        }
        
        return implode('|', $csvParts);
    }
    
    /**
     * Generate UUID v4 format store ID matching existing schema pattern
     */
    private function generateStoreId() {
        $data = random_bytes(16);
        
        // Set version (4) and variant bits according to RFC 4122
        $data[6] = chr(ord($data[6]) & 0x0f | 0x40); // Version 4
        $data[8] = chr(ord($data[8]) & 0x3f | 0x80); // Variant 10
        
        return sprintf('%08s-%04s-%04s-%04s-%012s',
            bin2hex(substr($data, 0, 4)),
            bin2hex(substr($data, 4, 2)),
            bin2hex(substr($data, 6, 2)),
            bin2hex(substr($data, 8, 2)),
            bin2hex(substr($data, 10, 6))
        );
    }
    
    /**
     * Extract best available description from multiple source fields
     */
    private function extractDescription($record) {
        $candidates = [
            $record['description'] ?? '',
            $record['about'] ?? ''
        ];
        
        foreach ($candidates as $desc) {
            $cleaned = $this->cleanText($desc, IMPORT_SETTINGS['max_description_length']);
            if (strlen($cleaned) > 25) { // Minimum meaningful description length
                return $cleaned;
            }
        }
        
        return '';
    }
    
    /**
     * Convert full state name to 2-character abbreviation
     */
    private function convertStateToAbbreviation($stateName) {
        $stateName = trim($stateName);
        
        if (strlen($stateName) === 2) {
            return strtoupper($stateName);
        }
        
        if (isset(STATE_ABBREVIATIONS[$stateName])) {
            return STATE_ABBREVIATIONS[$stateName];
        }
        
        if (!empty($stateName)) {
            $this->logger->logValidation('state', $stateName, 'Unknown state name, keeping as-is');
            return substr($stateName, 0, 50);
        }
        
        return '';
    }
    
    /**
     * Clean and truncate text fields
     */
    private function cleanText($text, $maxLength) {
        $cleaned = trim(strip_tags($text));
        
        if (strlen($cleaned) > $maxLength) {
            $cleaned = substr($cleaned, 0, $maxLength - 3) . '...';
        }
        
        return $cleaned;
    }
    
    /**
     * Clean and validate email addresses
     */
    private function cleanEmail($email) {
        $email = trim($email);
        
        if (empty($email)) {
            return '';
        }
        
        if (!filter_var($email, FILTER_VALIDATE_EMAIL)) {
            $this->logger->logValidation('email', $email, 'Invalid email format');
            return '';
        }
        
        return substr($email, 0, 50);
    }
    
    /**
     * Clean and validate URLs
     */
    private function cleanUrl($url) {
        $url = trim($url);
        
        if (empty($url)) {
            return '';
        }
        
        if (!preg_match('/^https?:\/\//', $url)) {
            $url = 'http://' . $url;
        }
        
        if (!filter_var($url, FILTER_VALIDATE_URL)) {
            $this->logger->logValidation('url', $url, 'Invalid URL format');
            return '';
        }
        
        return substr($url, 0, 100);
    }
    
    /**
     * Clean social media URLs with platform-specific handling
     */
    private function cleanSocialUrl($url) {
        $url = trim($url);
        
        if (empty($url)) {
            return '';
        }
        
        if (!preg_match('/^https?:\/\//', $url)) {
            $socialPlatforms = ['facebook.com', 'twitter.com', 'instagram.com', 'linkedin.com', 'tiktok.com', 'snapchat.com', 'youtube.com'];
            
            foreach ($socialPlatforms as $platform) {
                if (strpos($url, $platform) !== false) {
                    $url = 'https://' . $url;
                    break;
                }
            }
        }
        
        return substr($url, 0, 255);
    }
    
    /**
     * Clean phone numbers - Enhanced for OutScraper format
     */
    private function cleanPhone($phone) {
        $phone = trim($phone);
        
        if (empty($phone)) {
            return '';
        }
        
        // Remove common prefixes and formatting
        $phone = preg_replace('/^\+1\s*/', '', $phone);
        $phone = preg_replace('/^1\s*/', '', $phone);
        
        // Keep only valid phone characters
        $phone = preg_replace('/[^0-9\s\-\(\)\.]/', '', $phone);
        
        // Basic US phone format validation (if we have 10 digits)
        $digitsOnly = preg_replace('/\D/', '', $phone);
        if (strlen($digitsOnly) === 10) {
            // Reformat as (xxx) xxx-xxxx
            $phone = sprintf('(%s) %s-%s', 
                substr($digitsOnly, 0, 3),
                substr($digitsOnly, 3, 3),
                substr($digitsOnly, 6, 4)
            );
        }
        
        return substr($phone, 0, 15);
    }
    
    /**
     * Clean postal codes
     */
    private function cleanPostalCode($postal) {
        $postal = trim($postal);
        
        if (empty($postal)) {
            return '';
        }
        
        // Handle ZIP+4 format
        if (preg_match('/^\d{5}-\d{4}$/', $postal)) {
            return $postal;
        }
        
        // Handle 5-digit ZIP
        if (is_numeric($postal) && strlen($postal) === 5) {
            return $postal;
        }
        
        // Handle 9-digit ZIP without dash
        if (is_numeric($postal) && strlen($postal) === 9) {
            return substr($postal, 0, 5) . '-' . substr($postal, 5, 4);
        }
        
        return substr($postal, 0, 10);
    }
    
    /**
     * Clean geographic coordinates
     */
    private function cleanCoordinate($coord) {
        $coord = trim($coord);
        
        if (empty($coord) || !is_numeric($coord)) {
            return '';
        }
        
        return number_format((float)$coord, 8, '.', '');
    }
    
    /**
     * Determine image type (local vs external URL)
     */
    private function determineImageType($imageUrl) {
        return empty($imageUrl) ? 0 : (filter_var($imageUrl, FILTER_VALIDATE_URL) ? 1 : 0);
    }
    
    /**
     * Validate final mapped data before database insertion
     */
    private function validateMappedData($data) {
        $criticalErrors = [];
        
        if (empty($data['store_id'])) {
            $criticalErrors[] = 'Missing store_id';
        }
        
        if (empty($data['name'])) {
            $criticalErrors[] = 'Missing name';
        }
        
        if (empty($data['address'])) {
            $criticalErrors[] = 'Missing address';
        }
        
        if (!empty($criticalErrors)) {
            $this->logger->logError("Validation failed: " . implode(', ', $criticalErrors), [
                'store_id' => $data['store_id'] ?? 'none',
                'name' => $data['name'] ?? 'none'
            ]);
            return false;
        }
        
        return true;
    }
}
?>