#!/usr/bin/env python3
"""
Convert net_dev.log file to CSV format.
Each timestamp becomes one row in the CSV output, with columns for each device metric.
"""

import csv
import re
import sys
from pathlib import Path
from collections import OrderedDict


def parse_net_dev_log(input_file, output_file):
    """
    Parse net_dev.log file and convert to CSV format.
    Each timestamp becomes one row with columns for each device metric.
    
    Args:
        input_file: Path to the input net_dev.log file
        output_file: Path to the output CSV file
    """
    input_path = Path(input_file)
    
    if not input_path.exists():
        print(f"parse_net_dev_log(): Error - Input file not found: {input_file}")
        sys.exit(1)
    
    # Metric names (in order)
    metric_names = ['rxpck_s', 'txpck_s', 'rxkB_s', 'txkB_s', 'rxcmp_s', 'txcmp_s', 'rxmcst_s']
    
    # First pass: collect all data grouped by timestamp
    timestamp_data = OrderedDict()  # timestamp -> {device -> [metrics]}
    all_devices = set()
    
    with open(input_path, 'r', encoding='utf-8') as infile:
        # Skip the first 3 lines (system info header, empty line, first column header)
        for _ in range(3):
            infile.readline()
        
        current_timestamp = None
        
        for line in infile:
            line = line.strip()
            
            # Skip empty lines
            if not line:
                continue
            
            # Check if this is a header line (contains "IFACE")
            if 'IFACE' in line:
                # Extract timestamp from header line
                # Format: "11:08:05 AM     IFACE   rxpck/s   txpck/s..."
                parts = re.split(r'\s+', line)
                if len(parts) >= 2:
                    current_timestamp = f"{parts[0]} {parts[1]}"  # Time and AM/PM
                    if current_timestamp not in timestamp_data:
                        timestamp_data[current_timestamp] = {}
                continue
            
            # This should be a data row
            # Format: "11:08:06 AM     bond1    194.00    193.00..."
            parts = re.split(r'\s+', line)
            
            if len(parts) < 9:
                print(f"parse_net_dev_log(): Warning - Skipping malformed line: {line}")
                continue
            
            # Check if first part is a timestamp (HH:MM:SS format)
            if re.match(r'\d{2}:\d{2}:\d{2}', parts[0]):
                # This line starts with a timestamp, extract it
                timestamp = f"{parts[0]} {parts[1]}"
                device = parts[2]
                metrics = parts[3:10]  # Get 7 metrics
            else:
                # This line doesn't start with timestamp, use current_timestamp
                if current_timestamp is None:
                    print(f"parse_net_dev_log(): Warning - No timestamp available for line: {line}")
                    continue
                timestamp = current_timestamp
                device = parts[0]
                metrics = parts[1:8]  # Get 7 metrics
            
            # Ensure we have exactly 7 metrics (pad with 0.00 if needed)
            while len(metrics) < 7:
                metrics.append('0.00')
            
            # Store the data
            if timestamp not in timestamp_data:
                timestamp_data[timestamp] = {}
            
            timestamp_data[timestamp][device] = metrics
            all_devices.add(device)
    
    # Sort devices for consistent column ordering
    sorted_devices = sorted(all_devices)
    
    # Build CSV headers: timestamp + (for each device: device_metric1, device_metric2, ...)
    csv_headers = ['timestamp']
    for device in sorted_devices:
        for metric in metric_names:
            csv_headers.append(f"{device}_{metric}")
    
    # Second pass: write CSV
    with open(output_file, 'w', newline='', encoding='utf-8') as outfile:
        csv_writer = csv.writer(outfile)
        csv_writer.writerow(csv_headers)
        
        # Write data rows
        row_count = 0
        for timestamp, devices_dict in timestamp_data.items():
            row = [timestamp]
            
            # Add metrics for each device (in sorted order)
            for device in sorted_devices:
                if device in devices_dict:
                    row.extend(devices_dict[device])
                else:
                    # Device not present for this timestamp, fill with zeros
                    row.extend(['0.00'] * 7)
            
            csv_writer.writerow(row)
            row_count += 1
        
        print(f"parse_net_dev_log(): Successfully converted {row_count} rows to CSV")
        print(f"parse_net_dev_log(): Found {len(sorted_devices)} unique devices")


if __name__ == '__main__':
    # Default input and output paths
    default_input = 'sar_logs_20251112_110805/net_dev.log'
    default_output = 'net_dev.csv'
    
    # Allow command line arguments for flexibility
    input_file = sys.argv[1] if len(sys.argv) > 1 else default_input
    output_file = sys.argv[2] if len(sys.argv) > 2 else default_output
    
    parse_net_dev_log(input_file, output_file)
    print(f"parse_net_dev_log(): Output written to: {output_file}")

