#!/usr/bin/env python3
"""
Convert tty_activity.log file to CSV format.
Each timestamp becomes one row in the CSV output, with columns for each TTY device metric.
"""

import csv
import re
import sys
from pathlib import Path
from collections import OrderedDict


def parse_tty_activity_log(input_file, output_file):
    """
    Parse tty_activity.log file and convert to CSV format.
    Each timestamp becomes one row with columns for each TTY device metric.
    
    Args:
        input_file: Path to the input tty_activity.log file
        output_file: Path to the output CSV file
    """
    input_path = Path(input_file)
    
    if not input_path.exists():
        print(f"parse_tty_activity_log(): Error - Input file not found: {input_file}")
        sys.exit(1)
    
    # Metric names (in order)
    metric_names = ['rcvin_s', 'xmtin_s', 'framerr_s', 'prtyerr_s', 'brk_s', 'ovrun_s']
    
    # First pass: collect all data grouped by timestamp
    timestamp_data = OrderedDict()  # timestamp -> {tty -> [metrics]}
    all_ttys = set()
    
    with open(input_path, 'r', encoding='utf-8') as infile:
        # Skip the first 3 lines (system info header, empty line, first column header)
        for _ in range(3):
            infile.readline()
        
        current_timestamp = None
        
        for line in infile:
            line = line.strip()
            
            # Skip empty lines
            if not line:
                continue
            
            # Check if this is a header line (contains "TTY")
            if 'TTY' in line:
                # Extract timestamp from header line
                # Format: "11:08:05 AM       TTY   rcvin/s   xmtin/s..."
                parts = re.split(r'\s+', line)
                if len(parts) >= 2:
                    current_timestamp = f"{parts[0]} {parts[1]}"  # Time and AM/PM
                    if current_timestamp not in timestamp_data:
                        timestamp_data[current_timestamp] = {}
                continue
            
            # This should be a data row
            # Format: "11:08:06 AM         0      0.00      0.00..."
            parts = re.split(r'\s+', line)
            
            if len(parts) < 9:
                print(f"parse_tty_activity_log(): Warning - Skipping malformed line: {line}")
                continue
            
            # Check if first part is a timestamp (HH:MM:SS format)
            if re.match(r'\d{2}:\d{2}:\d{2}', parts[0]):
                # This line starts with a timestamp, extract it
                timestamp = f"{parts[0]} {parts[1]}"
                tty = parts[2]
                metrics = parts[3:9]  # Get 6 metrics
            else:
                # This line doesn't start with timestamp, use current_timestamp
                if current_timestamp is None:
                    print(f"parse_tty_activity_log(): Warning - No timestamp available for line: {line}")
                    continue
                timestamp = current_timestamp
                tty = parts[0]
                metrics = parts[1:7]  # Get 6 metrics
            
            # Ensure we have exactly 6 metrics (pad with 0.00 if needed)
            while len(metrics) < 6:
                metrics.append('0.00')
            
            # Store the data
            if timestamp not in timestamp_data:
                timestamp_data[timestamp] = {}
            
            timestamp_data[timestamp][tty] = metrics
            all_ttys.add(tty)
    
    # Sort TTYs for consistent column ordering (numeric sort)
    sorted_ttys = sorted(all_ttys, key=lambda x: int(x) if x.isdigit() else 999)
    
    # Build CSV headers: timestamp + (for each TTY: tty_metric1, tty_metric2, ...)
    csv_headers = ['timestamp']
    for tty in sorted_ttys:
        for metric in metric_names:
            csv_headers.append(f"tty{tty}_{metric}")
    
    # Second pass: write CSV
    with open(output_file, 'w', newline='', encoding='utf-8') as outfile:
        csv_writer = csv.writer(outfile)
        csv_writer.writerow(csv_headers)
        
        # Write data rows
        row_count = 0
        for timestamp, ttys_dict in timestamp_data.items():
            row = [timestamp]
            
            # Add metrics for each TTY (in sorted order)
            for tty in sorted_ttys:
                if tty in ttys_dict:
                    row.extend(ttys_dict[tty])
                else:
                    # TTY not present for this timestamp, fill with zeros
                    row.extend(['0.00'] * 6)
            
            csv_writer.writerow(row)
            row_count += 1
        
        print(f"parse_tty_activity_log(): Successfully converted {row_count} rows to CSV")
        print(f"parse_tty_activity_log(): Found {len(sorted_ttys)} unique TTY devices")


if __name__ == '__main__':
    # Default input and output paths
    default_input = 'sar_logs_20251112_110805/tty_activity.log'
    default_output = 'tty_activity.csv'
    
    # Allow command line arguments for flexibility
    input_file = sys.argv[1] if len(sys.argv) > 1 else default_input
    output_file = sys.argv[2] if len(sys.argv) > 2 else default_output
    
    parse_tty_activity_log(input_file, output_file)
    print(f"parse_tty_activity_log(): Output written to: {output_file}")

