import struct
import pandas as pd
from datetime import datetime, timedelta, timezone
import os
import re
import json
import sys

# Debug: Check Python version
print(f"Python version: {sys.version}")

def load_metadata(json_file):
    """Load metadata from JSON file."""
    if not os.path.exists(json_file):
        print(f"Error: Metadata file not found: {json_file}")
        return None
    try:
        with open(json_file, 'r', encoding='utf-8-sig') as f:
            metadata = json.load(f)
        print(f"Loaded metadata: {metadata}")
        return metadata
    except json.JSONDecodeError as e:
        print(f"Error: Invalid JSON in {json_file}: {e}")
        return None

def read_bin_to_csv(bin_file, csv_file, json_file=None, to_parquet=False, csv_columns='minimal'):
    """
    Read .bin file (40 bytes/tick) and convert to CSV/Parquet.
    
    Parameters:
    - bin_file: Path to .bin file
    - csv_file: Output CSV file path
    - json_file: Path to metadata JSON file (optional)
    - to_parquet: Also save to Parquet if True
    - csv_columns: 'minimal' (Time,Bid,Ask) or 'full' (Time,Bid,Ask,Volume)
    """
    if not os.path.exists(bin_file):
        print(f"Error: File not found: {bin_file}")
        return
    
    metadata = load_metadata(json_file) if json_file else None
    is_gmt = metadata.get('convert_to_gmt', True) if metadata else True
    gmt_offset = 7  # Fallback to GMT+7 (server time) if not specified
    if not is_gmt and metadata:
        tz_str = metadata.get('timezone', 'GMT+0')
        match = re.match(r'GMT([+-]\d+)', tz_str)
        if match:
            gmt_offset = int(match.group(1))
        print(f"GMT offset: {gmt_offset} hours")
    
    tick_size = 40  # time (8), time_msc (8), bid (8), ask (8), volume (8)
    ticks = []
    
    with open(bin_file, 'rb') as f:
        chunk_size = tick_size * 1000  # Read 1000 ticks at a time
        while True:
            chunk = f.read(chunk_size)
            if not chunk:
                break
            for i in range(0, len(chunk), tick_size):
                if i + tick_size > len(chunk):
                    print(f"Warning: Incomplete tick data at offset {f.tell() - len(chunk) + i}")
                    break
                data = chunk[i:i + tick_size]
                time, time_msc, bid, ask, volume = struct.unpack('<QQddQ', data)  # Fix: use 'dd' for bid, ask
                # Debug: Print sample tick
                if len(ticks) == 0:
                    print(f"Sample .bin tick: time={time}, time_msc={time_msc}, bid={bid}, ask={ask}, volume={volume}")
                # Adjust time
                time_dt = datetime.fromtimestamp(time, tz=timezone.utc)
                if not is_gmt:
                    time_dt += timedelta(hours=gmt_offset)  # Fix: add offset for server time
                time_str = time_dt.strftime('%Y.%m.%d %H:%M:%S') + f".{time_msc % 1000:03d}"
                ticks.append([time_str, bid, ask, volume])
    
    # Create DataFrame
    columns = ['Time', 'Bid', 'Ask', 'Volume'] if csv_columns == 'full' else ['Time', 'Bid', 'Ask']
    df = pd.DataFrame(ticks, columns=['Time', 'Bid', 'Ask', 'Volume'])[columns]
    
    # Save to CSV
    df.to_csv(csv_file, index=False)
    print(f"Converted {len(ticks)} ticks to {csv_file}")
    
    # Save to Parquet if requested
    if to_parquet:
        parquet_file = csv_file.replace('.csv', '.parquet')
        df.to_parquet(parquet_file, index=False)
        print(f"Converted {len(ticks)} ticks to {parquet_file}")
    
    return len(ticks)

def read_bi5_to_csv(bi5_file, csv_file, json_file=None, to_parquet=False, csv_columns='minimal'):
    if not os.path.exists(bi5_file):
        print(f"❌ File not found: {bi5_file}")
        return 0

    meta = load_metadata(json_file) if json_file else {}
    tick_size = meta.get('tick_size', 1e-5)
    scale = 1.0 / tick_size

    # Detect format: hourly (via path) or daily (via metadata)
    m = re.search(r'([\\/](\d{4})[\\/](\d{2})[\\/](\d{2})[\\/](\d{2})h_ticks\.bi5)$', bi5_file)
    if m:
        year, mon, day, hour = map(int, m.groups()[1:])
        base_time = datetime(year, mon, day, hour, tzinfo=timezone.utc)
    elif 'start_date' in meta:
        try:
            base_time = datetime.strptime(meta['start_date'], '%Y%m%d').replace(tzinfo=timezone.utc)
        except:
            print("❌ Invalid start_date format in metadata.")
            return 0
    else:
        print("❌ Cannot determine base time: path or metadata missing.")
        return 0

    ticks = []
    with open(bi5_file, 'rb') as f:
        while chunk := f.read(20 * 1000):
            for i in range(0, len(chunk), 20):
                if i + 20 > len(chunk): break
                msec, ask_i, bid_i, ask_vol, bid_vol = struct.unpack('>iiiii', chunk[i:i+20])
                msec = (msec + 3600000) % 3600000
                t = base_time + timedelta(milliseconds=msec)
                ticks.append([
                    t.strftime('%Y.%m.%d %H:%M:%S.%f')[:-3],
                    ask_i / scale,
                    bid_i / scale,
                    ask_vol / 1_000_000.0
                ])

    cols = ['Time', 'Bid', 'Ask', 'Volume'] if csv_columns == 'full' else ['Time', 'Bid', 'Ask']
    df = pd.DataFrame(ticks, columns=['Time', 'Ask', 'Bid', 'Volume'])[cols]
    df.to_csv(csv_file, index=False)
    if to_parquet:
        df.to_parquet(csv_file.replace('.csv', '.parquet'), index=False)

    print(f"✅ Parsed {len(df)} ticks → {csv_file}")
    return len(df)
# Example usage
bin_file = r'C:\Users\carwy\AppData\Roaming\MetaQuotes\Terminal\Common\Files\TickData\BTCUSDz\2025\07\01\TickData_Exness_Technologies_Ltd_BTCUSDz_20250701_20250701.bin'
bi5_file = r'C:\Users\carwy\AppData\Roaming\MetaQuotes\Terminal\Common\Files\TickData\BTCUSDz\2025\07\01\17h_ticks.bi5'
json_file = r'C:\Users\carwy\AppData\Roaming\MetaQuotes\Terminal\Common\Files\TickData\BTCUSDz\2025\07\01\TickData_Exness_Technologies_Ltd_BTCUSDz_20250701_20250701.json'
csv_file_bin = 'output_ticks_bin.csv'
csv_file_bi5 = 'output_ticks_bi5.csv'

# Read .bin file (minimal CSV: Time,Bid,Ask)
bin_ticks = read_bin_to_csv(bin_file, csv_file_bin, json_file=json_file, to_parquet=False, csv_columns='minimal')

# Read .bi5 file (full CSV: Time,Bid,Ask,Volume)
bi5_ticks = read_bi5_to_csv(bi5_file, csv_file_bi5, json_file=json_file, to_parquet=False, csv_columns='full')

# Check tick count consistency
if bin_ticks and bi5_ticks and bin_ticks != bi5_ticks:
    print(f"Warning: Tick count mismatch: .bin ({bin_ticks}) vs .bi5 ({bi5_ticks})")