#!/usr/bin/env python3
"""
Convert tcp_udp.log file to CSV format.
Each timestamp becomes one row in the CSV output, combining TCP and UDP metrics.
"""

import csv
import re
import sys
from pathlib import Path
from collections import OrderedDict


def parse_tcp_udp_log(input_file, output_file):
    """
    Parse tcp_udp.log file and convert to CSV format.
    Each timestamp becomes one row with both TCP and UDP metrics.
    
    Args:
        input_file: Path to the input tcp_udp.log file
        output_file: Path to the output CSV file
    """
    input_path = Path(input_file)
    
    if not input_path.exists():
        print(f"parse_tcp_udp_log(): Error - Input file not found: {input_file}")
        sys.exit(1)
    
    # First pass: collect all data grouped by timestamp
    timestamp_data = OrderedDict()  # timestamp -> {'tcp': [metrics], 'udp': [metrics]}
    
    with open(input_path, 'r', encoding='utf-8') as infile:
        # Skip the first 2 lines (system info header, empty line)
        for _ in range(2):
            infile.readline()
        
        current_section = None  # 'tcp' or 'udp'
        
        for line in infile:
            line = line.strip()
            
            # Skip empty lines
            if not line:
                continue
            
            # Check if this is a header line for TCP section
            if 'active/s' in line and 'passive/s' in line:
                current_section = 'tcp'
                continue
            
            # Check if this is a header line for UDP section
            if 'idgm/s' in line and 'odgm/s' in line:
                current_section = 'udp'
                continue
            
            # This should be a data row
            parts = re.split(r'\s+', line)
            
            # Check if first part is a timestamp (HH:MM:SS format)
            if re.match(r'\d{2}:\d{2}:\d{2}', parts[0]) and len(parts) >= 6:
                # This line starts with a timestamp and has data
                timestamp = f"{parts[0]} {parts[1]}"
                
                if timestamp not in timestamp_data:
                    timestamp_data[timestamp] = {}
                
                if current_section == 'tcp':
                    # TCP metrics: active/s, passive/s, iseg/s, oseg/s
                    timestamp_data[timestamp]['tcp'] = parts[2:6]
                elif current_section == 'udp':
                    # UDP metrics: idgm/s, odgm/s, noport/s, idgmerr/s
                    timestamp_data[timestamp]['udp'] = parts[2:6]
                else:
                    print(f"parse_tcp_udp_log(): Warning - No section context for data row: {line}")
    
    # CSV column headers
    csv_headers = ['timestamp', 'tcp_active_s', 'tcp_passive_s', 'tcp_iseg_s', 'tcp_oseg_s', 
                   'udp_idgm_s', 'udp_odgm_s', 'udp_noport_s', 'udp_idgmerr_s']
    
    # Second pass: write CSV
    with open(output_file, 'w', newline='', encoding='utf-8') as outfile:
        csv_writer = csv.writer(outfile)
        csv_writer.writerow(csv_headers)
        
        # Write data rows
        row_count = 0
        for timestamp, data_dict in timestamp_data.items():
            row = [timestamp]
            
            # Add TCP metrics (pad with 0.00 if missing)
            if 'tcp' in data_dict and len(data_dict['tcp']) >= 4:
                row.extend(data_dict['tcp'])
            else:
                row.extend(['0.00'] * 4)
            
            # Add UDP metrics (pad with 0.00 if missing)
            if 'udp' in data_dict and len(data_dict['udp']) >= 4:
                row.extend(data_dict['udp'])
            else:
                row.extend(['0.00'] * 4)
            
            csv_writer.writerow(row)
            row_count += 1
        
        print(f"parse_tcp_udp_log(): Successfully converted {row_count} rows to CSV")


if __name__ == '__main__':
    # Default input and output paths
    default_input = 'sar_logs_20251112_110805/tcp_udp.log'
    default_output = 'tcp_udp.csv'
    
    # Allow command line arguments for flexibility
    input_file = sys.argv[1] if len(sys.argv) > 1 else default_input
    output_file = sys.argv[2] if len(sys.argv) > 2 else default_output
    
    parse_tcp_udp_log(input_file, output_file)
    print(f"parse_tcp_udp_log(): Output written to: {output_file}")
