ai_stock/futu.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
富途牛牛股票价格数据抓取工具
功能：解析HTML页面提取股票价格、涨跌额、涨跌幅数据并保存到CSV文件

作者: AI Stock Trading Assistant
日期: 2024年
"""

import requests
import re
import csv
import time
import sys
import argparse
import json
import urllib.parse
from datetime import datetime
from bs4 import BeautifulSoup
from logging_setup import init_logging

class EastMoneyAPI:
    """东方财富API接口类，用于获取美股市值排行数据"""
    
    def __init__(self):
        self.base_url = "https://push2.eastmoney.com/api/qt/clist/get"
        self.headers = {
            'Accept': '*/*',
            'Accept-Language': 'zh-CN,zh;q=0.9',
            'Connection': 'keep-alive',
            'Referer': 'https://quote.eastmoney.com/center/gridlist.html',
            'Sec-Fetch-Dest': 'script',
            'Sec-Fetch-Mode': 'no-cors',
            'Sec-Fetch-Site': 'same-site',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36',
            'sec-ch-ua': '"Google Chrome";v="141", "Not?A_Brand";v="8", "Chromium";v="141"',
            'sec-ch-ua-mobile': '?0',
            'sec-ch-ua-platform': '"Windows"'
        }
        self.cookies = {
            'qgqp_b_id': '6762b4d1088a5df99fef2aaf99350ad6',
            'st_nvi': '5kjYZa9DBnsX5oWsYvA_Fe959',
            'nid': '0e17cb22ecf6960f4858bfd8cbdced17',
            'nid_create_time': '1756556375507',
            'gvi': 'l15-44w-TU820v6GOA4-re3ed',
            'gvi_create_time': '1756556375507',
            'websitepoptg_api_time': '1762206479838',
            'st_si': '15354362676602',
            'st_asi': 'delete',
            'fullscreengg': '1',
            'fullscreengg2': '1',
            'st_pvi': '72179808487060',
            'st_sp': '2025-08-30%2020%3A19%3A35',
            'st_inirUrl': 'https%3A%2F%2Femcreative.eastmoney.com%2Fapp_fortune%2Farticle%2Findex.html',
            'st_sn': '3',
            'st_psi': '20251104055541297-113200301321-2855469466'
        }
    
    def get_us_stocks_top50(self, page_size=50):
        """获取美股市值前N名股票数据"""
        return self.get_us_stocks(page_size=page_size)

    def get_us_stocks(self, page_size=50, page_index=1):
        """获取美股股票数据（支持分页）"""
        try:
            # 构建请求参数
            timestamp = int(time.time() * 1000)
            callback = f"jQuery37106960268121993591_{timestamp}"
            
            params = {
                'np': '1',
                'fltt': '1',
                'invt': '2',
                'cb': callback,
                'fs': 'm:105,m:106,m:107',  # 美股市场代码
                'fields': 'f12,f13,f14,f1,f2,f4,f3,f152,f17,f28,f15,f16,f18,f20,f115',
                'fid': 'f20',  # 按市值排序
                'pn': str(page_index),
                'pz': str(page_size),
                'po': '1',
                'dect': '1',
                'ut': 'fa5fd1943c7b386f172d6893dbfba10b',
                'wbp2u': '|0|0|0|web',
                '_': str(timestamp)
            }
            
            print(f"🌐 正在获取美股数据 (第{page_index}页, 每页{page_size}条)...")
            response = requests.get(
                self.base_url,
                params=params,
                headers=self.headers,
                cookies=self.cookies,
                timeout=30,
                verify=False  # 跳过SSL证书校验
            )
            
            if response.status_code == 200:
                # 解析JSONP响应
                content = response.text
                print("[调试] 东方财富API返回内容:", content[:500])  # 打印前500字符，避免过长
                # 提取JSON部分
                if not content or '(' not in content or ')' not in content:
                    print("❌ 返回内容异常，未包含有效JSONP")
                    return [], 0
                start = content.find('(') + 1
                end = content.rfind(')')
                json_str = content[start:end]
                try:
                    data = json.loads(json_str)
                except Exception as e:
                    print(f"❌ JSON解析失败: {e}")
                    return [], 0
                if data.get('rc') == 0 and 'data' in data:
                    stocks = data['data'].get('diff', [])
                    total = data['data'].get('total', 0)
                    print(f"✅ 成功获取 {len(stocks)} 只股票数据 (总数: {total})")
                    return stocks, total
                else:
                    print(f"❌ 接口返回错误: {data}")
                    return [], 0
            else:
                print(f"❌ 请求失败，状态码: {response.status_code}")
                print("[调试] 返回内容:", response.text[:500])
                return [], 0
                
        except Exception as e:
            print(f"❌ 获取数据失败: {e}")
            return [], 0
    
    def parse_stock_data(self, stock_item):
        """解析单个股票数据，返回数值化字段

        约定：
        - 价格/涨跌额 等返回 float（货币数值）
        - 涨跌幅 change_ratio 返回小数（如 0.0402 表示 4.02%）
        """
        try:
            def _to_float(x, default=0.0):
                if x in (None, '-', ''):
                    return default
                try:
                    return float(x)
                except Exception:
                    return default

            def _normalize_ratio(v):
                """将东财返回的涨跌幅统一转为小数。
                兼容两种可能：
                - v 为百分数值（如 4.02 表示 4.02%）
                - v 为基点/扩大100 的数值（如 402 表示 4.02%）
                """
                fv = _to_float(v, 0.0)
                # 若绝对值大于100，优先认为是扩大100的百分数
                percent = fv / 100.0 if abs(fv) > 100 else fv
                return percent / 100.0

            symbol = stock_item.get('f12', '')
            name = stock_item.get('f14', '')
            current_price = _to_float(stock_item.get('f2', 0.0))
            # f4 通常为涨跌额
            change_amount = _to_float(stock_item.get('f4', 0.0))
            # f3 通常为涨跌幅（百分数值），统一转为小数
            change_ratio = _normalize_ratio(stock_item.get('f3', 0.0))

            market_cap = _to_float(stock_item.get('f20', 0.0))
            high_price = _to_float(stock_item.get('f15', 0.0))
            low_price = _to_float(stock_item.get('f16', 0.0))
            open_price = _to_float(stock_item.get('f17', 0.0))
            prev_close = _to_float(stock_item.get('f18', 0.0))
            # 交易所/货币简单填充（东财 US 列表）
            exchange = 'US'
            currency = 'USD'

            return {
                'symbol': symbol,
                'name': name,
                'current_price': current_price,
                'change_amount': change_amount,
                'change_ratio': change_ratio,
                'market_cap': market_cap,
                'high_price': high_price,
                'low_price': low_price,
                'open_price': open_price,
                'prev_close': prev_close,
                'exchange': exchange,
                'currency': currency,
            }
        except Exception as e:
            print(f"❌ 解析股票数据失败: {e}")
            return None

    def _format_price(self, price_value):
        """返回价格的 float 数值（为保持接口名不变）。"""
        if price_value in (None, '-', ''):
            return 0.0
        try:
            return float(price_value)
        except Exception:
            return 0.0

    def _format_ratio(self, ratio_value):
        """返回涨跌幅的小数（为保持接口名不变）。"""
        if ratio_value in (None, '-', ''):
            return 0.0
        try:
            rv = float(ratio_value)
        except Exception:
            return 0.0
        percent = rv / 100.0 if abs(rv) > 100 else rv
        return percent / 100.0


class FutuStockParser:
    def __init__(self):
        """初始化富途股票解析器"""
        self.cookies = {
            'cipher_device_id': '1757556073667578',
            'device_id': '1757556073667578',
            '_gcl_au': '1.1.1663570279.1758365279',
            'showWatch': '1',
            'invite_from': '10237865',
            'sensorsdata2015jssdkcross': '%7B%22distinct_id%22%3A%22ftv1PuOG%2BAdnk9zxdFTbZjIrOSbcir6XtNvwdxf2Y34zO%2FCriKNPyEOfzRH7jhboo2SL%22%2C%22first_id%22%3A%2219936818c19622-028fe866d247376-26061951-1024000-19936818c1b100%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E8%87%AA%E7%84%B6%E6%90%9C%E7%B4%A2%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC%22%2C%22%24latest_referrer%22%3A%22https%3A%2F%2Fwww.baidu.com%2Flink%22%7D%2C%22identities%22%3A%22eyIkaWRlbnRpdHlfY29va2llX2lkIjoiMTk5MzY4MThjMTk2MjItMDI4ZmU4NjZkMjQ3Mzc2LTI2MDYxOTUxLTEwMjQwMDAtMTk5MzY4MThjMWIxMDAiLCIkaWRlbnRpdHlfbG9naW5faWQiOiJmdHYxUHVPRytBZG5rOXp4ZEZUYlpqSXJPU2JjaXI2WHROdndkeGYyWTM0ek8vQ3JpS05QeUVPZnpSSDdqaGJvbzJTTCJ9%22%2C%22history_login_id%22%3A%7B%22name%22%3A%22%24identity_login_id%22%2C%22value%22%3A%22ftv1PuOG%2BAdnk9zxdFTbZjIrOSbcir6XtNvwdxf2Y34zO%2FCriKNPyEOfzRH7jhboo2SL%22%7D%7D',
            'futu-csrf': 'oiTa//eJsjCp/OY8h3KrAY8REws=',
            'locale': 'zh-cn',
            'csrfToken': 'VRY8_4JPRRdq5GEsxaC4wio5',
            'Hm_lvt_f3ecfeb354419b501942b6f9caf8d0db': '1760076566,1762203125',
            'HMACCOUNT': '98F1F80B74EBD3E2',
            'Hm_lpvt_f3ecfeb354419b501942b6f9caf8d0db': '1762203146',
            'locale.sig': 'ObiqV0BmZw7fEycdGJRoK-Q0Yeuop294gBeiHL1LqgQ',
        }

        self.headers = {
            'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
            'accept-language': 'zh-CN,zh;q=0.9',
            'cache-control': 'max-age=0',
            'priority': 'u=0, i',
            'referer': 'https://www.futunn.com/',
            'sec-ch-ua': '"Google Chrome";v="141", "Not?A_Brand";v="8", "Chromium";v="141"',
            'sec-ch-ua-mobile': '?0',
            'sec-ch-ua-platform': '"Windows"',
            'sec-fetch-dest': 'document',
            'sec-fetch-mode': 'navigate',
            'sec-fetch-site': 'same-origin',
            'sec-fetch-user': '?1',
            'upgrade-insecure-requests': '1',
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Safari/537.36',
        }

    def fetch_stock_page(self, url):
        """
        获取股票页面HTML内容
        
        Args:
            url (str): 股票页面URL
            
        Returns:
            str: HTML内容，失败返回None
        """
        try:
            response = requests.get(url, cookies=self.cookies, headers=self.headers, timeout=30)
            response.raise_for_status()
            return response.text
        except requests.RequestException as e:
            print(f"❌ 获取页面失败: {e}")
            return None

    def parse_javascript_data(self, html_content):
        """
        解析HTML中的JavaScript数据，提取window.__INITIAL_STATE__中的股票信息
        
        Args:
            html_content (str): HTML内容
            
        Returns:
            dict: 包含股票信息的字典，失败返回None
        """
        if not html_content:
            return None
            
        try:
            # 查找包含window.__INITIAL_STATE__的script标签
            script_pattern = r'window\.__INITIAL_STATE__\s*=\s*({.*?});'
            match = re.search(script_pattern, html_content, re.DOTALL)
            
            if not match:
                print("❌ 未找到window.__INITIAL_STATE__数据")
                return None
            
            # 提取JSON字符串
            json_str = match.group(1)
            
            # 解析JSON数据
            initial_state = json.loads(json_str)
            
            # 提取stock_info
            stock_info = initial_state.get('stock_info')
            if not stock_info:
                print("❌ 未找到stock_info数据")
                return None
            
            # 提取before_open_stock_info数据
            before_open_info = stock_info.get('before_open_stock_info', {})
            before_price = before_open_info.get('price')
            before_change = before_open_info.get('change')
            before_change_ratio = before_open_info.get('changeRatio')
            
            # 提取data数据
            data_info = stock_info.get('data', {})
            current_price = data_info.get('price')
            current_change_ratio = data_info.get('changeRatio')
            
            # 构建结果字典
            result = {
                'before_open_price': before_price,
                'before_open_change': before_change,
                'before_open_change_ratio': before_change_ratio,
                'current_price': current_price,
                'current_change_ratio': current_change_ratio,
                'timestamp': int(time.time()),
                'datetime': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            }
            
            return result
            
        except json.JSONDecodeError as e:
            print(f"❌ JSON解析失败: {e}")
            return None
        except Exception as e:
            print(f"❌ 解析JavaScript数据失败: {e}")
            return None

    def parse_price_data(self, html_content):
        """
        解析HTML内容提取价格数据（包括盘后交易数据）
        
        Args:
            html_content (str): HTML内容
            
        Returns:
            dict: 包含价格、涨跌额、涨跌幅、盘后数据的字典
        """
        if not html_content:
            return None
            
        try:
            soup = BeautifulSoup(html_content, 'html.parser')
            
            # 查找常规交易时间价格容器
            price_container = soup.find('ul', class_='flex-end price-current')
            if not price_container:
                print("❌ 未找到价格容器")
                return None
            
            # 提取当前价格 (mg-r-8 price direct-up/down)
            price_element = price_container.find('li', class_=re.compile(r'mg-r-8 price'))
            current_price = None
            if price_element:
                price_text = price_element.get_text(strip=True)
                # 使用正则表达式提取数字
                price_match = re.search(r'[\d,]+\.?\d*', price_text)
                if price_match:
                    current_price = price_match.group().replace(',', '')
            
            # 提取涨跌信息
            change_element = price_container.find('li', class_=re.compile(r'change'))
            change_price = None
            change_ratio = None
            
            if change_element:
                # 提取涨跌额 (change-price)
                change_price_span = change_element.find('span', class_='change-price')
                if change_price_span:
                    change_price_text = change_price_span.get_text(strip=True)
                    # 提取数字，保留正负号
                    price_match = re.search(r'[+-]?[\d,]+\.?\d*', change_price_text)
                    if price_match:
                        change_price = price_match.group().replace(',', '')
                
                # 提取涨跌幅 (mg-l-8 change-ratio)
                change_ratio_span = change_element.find('span', class_=re.compile(r'mg-l-8 change-ratio'))
                if change_ratio_span:
                    change_ratio_text = change_ratio_span.get_text(strip=True)
                    # 提取百分比
                    ratio_match = re.search(r'[+-]?[\d,]+\.?\d*%', change_ratio_text)
                    if ratio_match:
                        change_ratio = ratio_match.group()
            
            # 判断涨跌方向
            direction = "up" if "direct-up" in str(price_container) else "down" if "direct-down" in str(price_container) else "flat"
            
            # 查找盘后交易数据
            after_hours_data = self._parse_after_hours_data(soup)
            
            result = {
                'current_price': current_price,
                'change_price': change_price,
                'change_ratio': change_ratio,
                'direction': direction,
                'timestamp': int(time.time()),
                'datetime': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            }
            
            # 合并盘后数据
            if after_hours_data:
                result.update(after_hours_data)
            
            return result
            
        except Exception as e:
            print(f"❌ 解析HTML失败: {e}")
            return None

    def _parse_after_hours_data(self, soup):
        """
        解析盘后交易数据
        
        Args:
            soup: BeautifulSoup对象
            
        Returns:
            dict: 盘后交易数据
        """
        after_hours_data = {
            'after_hours_price': None,
            'after_hours_change': None,
            'after_hours_ratio': None,
            'after_hours_direction': None,
            'after_hours_status': None
        }
        
        try:
            # 查找盘后信息容器
            disc_info = soup.find('div', class_='disc-info')
            if not disc_info:
                return after_hours_data
            
            # 查找盘后价格容器
            after_price_container = disc_info.find('ul', class_='flex-end price-current')
            if not after_price_container:
                return after_hours_data
            
            # 提取盘后价格 (mg-r-8 disc-price direct-down/up)
            after_price_element = after_price_container.find('li', class_=re.compile(r'mg-r-8 disc-price'))
            if after_price_element:
                after_price_text = after_price_element.get_text(strip=True)
                price_match = re.search(r'[\d,]+\.?\d*', after_price_text)
                if price_match:
                    after_hours_data['after_hours_price'] = price_match.group().replace(',', '')
                
                # 判断盘后涨跌方向
                if "direct-up" in after_price_element.get('class', []):
                    after_hours_data['after_hours_direction'] = "up"
                elif "direct-down" in after_price_element.get('class', []):
                    after_hours_data['after_hours_direction'] = "down"
                else:
                    after_hours_data['after_hours_direction'] = "flat"
            
            # 提取盘后涨跌信息
            after_change_element = after_price_container.find('li', class_=re.compile(r'direct-'))
            if after_change_element:
                # 提取盘后涨跌额和涨跌幅
                change_spans = after_change_element.find_all('span')
                if len(change_spans) >= 2:
                    # 第一个span是涨跌额
                    change_text = change_spans[0].get_text(strip=True)
                    # 确保提取的是涨跌额，不是价格
                    if change_text.startswith(('+', '-')):
                        change_match = re.search(r'[+-]?[\d,]+\.?\d*', change_text)
                        if change_match:
                            after_hours_data['after_hours_change'] = change_match.group().replace(',', '')
                    
                    # 第二个span是涨跌幅 (mg-l-8)
                    ratio_span = change_spans[1]
                    ratio_text = ratio_span.get_text(strip=True)
                    ratio_match = re.search(r'[+-]?[\d,]+\.?\d*%', ratio_text)
                    if ratio_match:
                        after_hours_data['after_hours_ratio'] = ratio_match.group()
                elif len(change_spans) == 1:
                    # 如果只有一个span，可能包含涨跌额和涨跌幅
                    span_text = change_spans[0].get_text(strip=True)
                    # 只有当文本以+/-开头时才是涨跌额
                    if span_text.startswith(('+', '-')):
                        # 尝试提取涨跌额
                        change_match = re.search(r'[+-]?[\d,]+\.?\d*(?!%)', span_text)
                        if change_match:
                            after_hours_data['after_hours_change'] = change_match.group().replace(',', '')
                        # 尝试提取涨跌幅
                        ratio_match = re.search(r'[+-]?[\d,]+\.?\d*%', span_text)
                        if ratio_match:
                            after_hours_data['after_hours_ratio'] = ratio_match.group()
                else:
                    # 如果没有span，直接从li元素中提取
                    full_text = after_change_element.get_text(strip=True)
                    # 提取涨跌额（必须以+/-开头）
                    change_match = re.search(r'([+-][\d,]+\.?\d*)(?!\s*%)', full_text)
                    if change_match:
                        after_hours_data['after_hours_change'] = change_match.group(1).replace(',', '')
                    # 提取涨跌幅（包含%的数字）
                    ratio_match = re.search(r'([+-]?[\d,]+\.?\d*%)', full_text)
                    if ratio_match:
                        after_hours_data['after_hours_ratio'] = ratio_match.group(1)
            
            # 提取盘后状态信息
            status_element = disc_info.find('div', class_='status')
            if status_element:
                after_hours_data['after_hours_status'] = status_element.get_text(strip=True)
            
        except Exception as e:
            print(f"⚠️ 解析盘后数据失败: {e}")
        
        return after_hours_data

    def save_to_csv_js(self, data, filename=None):
        """
        将JavaScript解析的数据保存到CSV文件
        
        Args:
            data (dict): JavaScript解析的价格数据
            filename (str): 文件名，如果为None则自动生成
        """
        if not data:
            print("❌ 没有数据可保存")
            return False
            
        if filename is None:
            timestamp = int(time.time())
            filename = f"futu_{timestamp}.csv"
        
        try:
            # 检查文件是否存在，决定是否写入表头
            file_exists = False
            try:
                with open(filename, 'r', encoding='utf-8-sig'):
                    file_exists = True
            except FileNotFoundError:
                pass
            
            with open(filename, 'a', newline='', encoding='utf-8-sig') as csvfile:
                fieldnames = ['timestamp', 'datetime', 'before_open_price', 'before_open_change', 
                            'before_open_change_ratio', 'current_price', 'current_change_ratio']
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                
                # 如果文件不存在，写入表头
                if not file_exists:
                    writer.writeheader()
                
                writer.writerow(data)
            
            print(f"✅ 数据已保存到: {filename}")
            return True
            
        except Exception as e:
            print(f"❌ 保存CSV失败: {e}")
            return False

    def save_to_csv(self, data, filename=None):
        """
        将数据保存到CSV文件
        
        Args:
            data (dict): 价格数据
            filename (str): 文件名，如果为None则自动生成
        """
        if not data:
            print("❌ 没有数据可保存")
            return False
            
        if filename is None:
            timestamp = int(time.time())
            filename = f"futu_{timestamp}.csv"
        
        try:
            # 检查文件是否存在，决定是否写入表头
            file_exists = False
            try:
                with open(filename, 'r', encoding='utf-8-sig'):
                    file_exists = True
            except FileNotFoundError:
                pass
            
            with open(filename, 'a', newline='', encoding='utf-8-sig') as csvfile:
                fieldnames = ['timestamp', 'datetime', 'current_price', 'change_price', 'change_ratio', 'direction',
                            'after_hours_price', 'after_hours_change', 'after_hours_ratio', 'after_hours_direction', 'after_hours_status']
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                
                # 如果文件不存在，写入表头
                if not file_exists:
                    writer.writeheader()
                
                writer.writerow(data)
            
            print(f"✅ 数据已保存到: {filename}")
            return True
            
        except Exception as e:
            print(f"❌ 保存CSV失败: {e}")
            return False

    def parse_from_html_string(self, html_string):
        """
        从HTML字符串解析价格数据
        
        Args:
            html_string (str): HTML字符串
            
        Returns:
            dict: 解析结果
        """
        return self.parse_price_data(html_string)

class StockDataIntegrator:
    """股票数据整合器，结合东方财富和富途数据"""
    
    def __init__(self):
        self.eastmoney_api = EastMoneyAPI()
        self.futu_parser = FutuStockParser()
    
    def get_futu_stock_details(self, symbol):
        """根据股票代码获取富途详细数据"""
        try:
            # 构建富途URL
            futu_url = f"https://www.futunn.com/stock/{symbol}-US"
            print(f"🔍 正在获取 {symbol} 的富途数据...")
            
            html_content = self.futu_parser.fetch_stock_page(futu_url)
            if not html_content:
                print(f"❌ 无法获取 {symbol} 的富途页面")
                return None
            
            # 尝试解析JavaScript数据
            js_data = self.futu_parser.parse_javascript_data(html_content)
            if js_data:
                return {
                    'before_open_price': js_data.get('before_open_price', ''),
                    'before_open_change': js_data.get('before_open_change', ''),
                    'before_open_change_ratio': js_data.get('before_open_change_ratio', ''),
                    'current_price': js_data.get('current_price', ''),
                    'current_change_ratio': js_data.get('current_change_ratio', '')
                }
            
            # 如果JavaScript解析失败，尝试HTML解析
            html_data = self.futu_parser.parse_price_data(html_content)
            if html_data:
                return {
                    'before_open_price': '',
                    'before_open_change': '',
                    'before_open_change_ratio': '',
                    'current_price': html_data.get('current_price', ''),
                    'current_change_ratio': html_data.get('change_ratio', '')
                }
            
            return None
            
        except Exception as e:
            print(f"❌ 获取 {symbol} 富途数据失败: {e}")
            return None
    
    def get_top50_integrated_data(self, limit=50, fetch_all=False):
        """
        获取美股整合数据
        Args:
            limit: 限制数量
            fetch_all: 是否获取所有股票（忽略limit）
        """
        if fetch_all:
            print("📊 开始获取所有美股整合数据...")
            # 获取第一页以确定总数
            _, total_count = self.eastmoney_api.get_us_stocks(page_size=1)
            limit = total_count
            print(f"📊 预计总数: {total_count}")
        else:
            print(f"📊 开始获取美股市值前{limit}名整合数据...")
        
        # 东方财富一次最多获取约100条比较稳定，如果数量大需要分页
        # 这里为了简化，如果limit很大，我们分批获取
        
        all_stocks = []
        page_size = 100
        total_pages = (limit + page_size - 1) // page_size
        
        for page in range(1, total_pages + 1):
            current_limit = min(page_size, limit - (page-1)*page_size)
            if current_limit <= 0:
                break
                
            stocks, _ = self.eastmoney_api.get_us_stocks(page_size=page_size, page_index=page)
            if not stocks:
                break
            all_stocks.extend(stocks)
            # 稍微延时防止封IP
            time.sleep(0.2)
            
        if not all_stocks:
            print("❌ 无法获取东方财富数据")
            return []
            
        # 截取需要的数量
        all_stocks = all_stocks[:limit]
        
        integrated_data = []
        
        print(f"📋 已获取 {len(all_stocks)} 条基础数据，开始处理详情...")
        
        # 注意：如果要处理3000+股票，逐个请求富途会非常慢且容易被封
        # 建议：如果是全量抓取，仅使用东方财富数据；或者只对Top N进行富途详情抓取
        # 这里我们做一个策略：如果是全量抓取(>100)，则只抓取东方财富数据，除非特殊指定
        
        skip_futu_details = len(all_stocks) > 100
        if skip_futu_details:
            print("⚠️ 股票数量较多，将跳过富途详情页抓取以提高速度...")
        
        for i, stock_item in enumerate(all_stocks, 1):
            try:
                # 解析东方财富数据
                eastmoney_data = self.eastmoney_api.parse_stock_data(stock_item)
                if not eastmoney_data:
                    continue
                
                symbol = eastmoney_data['symbol']
                if not skip_futu_details:
                    print(f"📈 处理第 {i}/{len(all_stocks)}: {symbol} - {eastmoney_data['name']}")
                elif i % 100 == 0:
                    print(f"📈 处理进度 {i}/{len(all_stocks)}...")
                
                # 整合数据
                integrated_item = {
                    'rank': i,
                    'symbol': symbol,
                    'name': eastmoney_data['name'],
                    'eastmoney_price': eastmoney_data['current_price'],
                    'eastmoney_change': eastmoney_data['change_amount'],
                    'eastmoney_change_ratio': eastmoney_data['change_ratio'],
                    'market_cap': eastmoney_data['market_cap'],
                    'high_price': eastmoney_data['high_price'],
                    'low_price': eastmoney_data['low_price'],
                    'open_price': eastmoney_data['open_price'],
                    'prev_close': eastmoney_data['prev_close'],
                    'timestamp': int(time.time()),
                    'datetime': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
                    # 默认空值
                    'futu_before_open_price': '',
                    'futu_before_open_change': '',
                    'futu_before_open_change_ratio': '',
                    'futu_current_price': '',
                    'futu_current_change_ratio': ''
                }

                # 获取富途数据（仅在数量较少时）
                if not skip_futu_details:
                    futu_data = self.get_futu_stock_details(symbol)
                    if futu_data:
                        integrated_item.update({
                            'futu_before_open_price': futu_data['before_open_price'],
                            'futu_before_open_change': futu_data['before_open_change'],
                            'futu_before_open_change_ratio': futu_data['before_open_change_ratio'],
                            'futu_current_price': futu_data['current_price'],
                            'futu_current_change_ratio': futu_data['current_change_ratio']
                        })
                    # 添加延迟
                    time.sleep(0.5)
                
                integrated_data.append(integrated_item)
                
            except Exception as e:
                print(f"❌ 处理股票 {i} 失败: {e}")
                continue
        
        print(f"✅ 成功整合 {len(integrated_data)} 只股票数据")
        return integrated_data
    
    def save_to_csv(self, integrated_data, filename=None):
        """保存整合数据到CSV文件"""
        if not integrated_data:
            print("❌ 没有数据可保存")
            return
        
        if not filename:
            timestamp = int(time.time())
            filename = f"futu_{timestamp}.csv"
        
        try:
            with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
                fieldnames = [
                    'rank', 'symbol', 'name', 'timestamp', 'datetime',
                    'eastmoney_price', 'eastmoney_change', 'eastmoney_change_ratio',
                    'market_cap', 'high_price', 'low_price', 'open_price', 'prev_close',
                    'futu_before_open_price', 'futu_before_open_change', 'futu_before_open_change_ratio',
                    'futu_current_price', 'futu_current_change_ratio'
                ]
                
                writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                writer.writeheader()
                
                for item in integrated_data:
                    writer.writerow(item)
            
            print(f"✅ 数据已保存到: {filename}")
            
        except Exception as e:
            print(f"❌ 保存CSV文件失败: {e}")


def main():
    """主函数"""
    # 初始化日志
    init_logging()
    parser = argparse.ArgumentParser(description='富途牛牛股票价格数据抓取工具')
    parser.add_argument('--url', '-u', help='股票页面URL')
    parser.add_argument('--html', '-f', help='本地HTML文件路径')
    parser.add_argument('--output', '-o', help='输出CSV文件名')
    parser.add_argument('--test', '-t', action='store_true', help='使用示例HTML测试')
    parser.add_argument('--js', '-j', action='store_true', help='解析JavaScript数据（window.__INITIAL_STATE__）')
    parser.add_argument('--top50', action='store_true', help='获取美股市值前N名数据（整合东方财富和富途数据）')
    parser.add_argument('--all', action='store_true', help='获取所有美股数据（注意：数量巨大，默认跳过富途详情）')
    parser.add_argument('--limit', type=int, default=50, help='指定获取股票的数量，默认为50')
    parser.add_argument('--eastmoney-only', action='store_true', help='仅使用东方财富数据，不获取富途数据')
    
    args = parser.parse_args()
    
    # 如果使用top50模式 或 all模式
    if args.top50 or args.all:
        limit = args.limit
        if args.all:
            print("🚀 启动全量美股数据获取模式...")
        else:
            print(f"🚀 启动美股市值前{limit}名数据获取模式...")
            
        integrator = StockDataIntegrator()
        
        if args.eastmoney_only:
            print("📊 仅获取东方财富数据...")
            eastmoney_api = EastMoneyAPI()
            
            if args.all:
                # 获取所有
                _, total = eastmoney_api.get_us_stocks(page_size=1)
                limit = total
                print(f"📊 准备获取全部 {total} 只股票...")
            
            # 分页获取
            all_stocks = []
            page_size = 100
            total_pages = (limit + page_size - 1) // page_size
            
            for page in range(1, total_pages + 1):
                current_limit = min(page_size, limit - (page-1)*page_size)
                if current_limit <= 0: break
                
                stocks, _ = eastmoney_api.get_us_stocks(page_size=page_size, page_index=page)
                if stocks:
                    all_stocks.extend(stocks)
                    print(f"📥 已获取 {len(all_stocks)}/{limit}...")
                time.sleep(0.2)
            
            if all_stocks:
                integrated_data = []
                for i, stock_item in enumerate(all_stocks, 1):
                    eastmoney_data = eastmoney_api.parse_stock_data(stock_item)
                    if eastmoney_data:
                        item = {
                            'rank': i,
                            'symbol': eastmoney_data['symbol'],
                            'name': eastmoney_data['name'],
                            'eastmoney_price': eastmoney_data['current_price'],
                            'eastmoney_change': eastmoney_data['change_amount'],
                            'eastmoney_change_ratio': eastmoney_data['change_ratio'],
                            'market_cap': eastmoney_data['market_cap'],
                            'high_price': eastmoney_data['high_price'],
                            'low_price': eastmoney_data['low_price'],
                            'open_price': eastmoney_data['open_price'],
                            'prev_close': eastmoney_data['prev_close'],
                            'timestamp': int(time.time()),
                            'datetime': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
                            'futu_before_open_price': '',
                            'futu_before_open_change': '',
                            'futu_before_open_change_ratio': '',
                            'futu_current_price': '',
                            'futu_current_change_ratio': ''
                        }
                        integrated_data.append(item)
                
                # 保存数据
                output_file = args.output if args.output else None
                integrator.save_to_csv(integrated_data, output_file)
            else:
                print("❌ 无法获取东方财富数据")
        else:
            # 获取整合数据
            integrated_data = integrator.get_top50_integrated_data(limit, fetch_all=args.all)
            if integrated_data:
                output_file = args.output if args.output else None
                integrator.save_to_csv(integrated_data, output_file)
            else:
                print("❌ 无法获取整合数据")
        
        return
    
    # 原有的单股票模式
    # 创建解析器实例
    futu_parser = FutuStockParser()
    
    html_content = None
    
    if args.test:
        if args.js:
            # 使用示例JavaScript数据进行测试
            test_html = '''
            <script>
                window.__INITIAL_STATE__ = {
                    "stock_info": {
                        "before_open_stock_info": {
                            "price": "253.560",
                            "change": "-0.471",
                            "changeRatio": "-0.19%"
                        },
                        "data": {
                            "price": "254.031",
                            "changeRatio": "+4.02%"
                        }
                    }
                };
            </script>
            '''
            html_content = test_html
            print("🧪 使用示例JavaScript数据进行测试...")
        else:
            # 使用示例HTML进行测试（包含盘后数据）
            test_html = '''
            <ul class="flex-end price-current" data-v-6afeb239>
                <li class="mg-r-8 price direct-up" data-v-6afeb239>
                    253.740
                    <i class="icon-direct-status icon-direct-up" data-v-6afeb239></i>
                </li>
                <li class="change direct-up" data-v-6afeb239>
                    <span class="change-price" data-v-6afeb239>+9.520</span>
                    <span class="mg-l-8 change-ratio" data-v-6afeb239>+3.90%</span>
                </li>
            </ul>
            <div class="disc-info" data-v-6afeb239>
                <ul class="flex-end price-current" data-v-6afeb239>
                    <li class="mg-r-8 disc-price direct-down" data-v-6afeb239>253.516</li>
                    <li class="direct-down" data-v-6afeb239>
                        <span data-v-6afeb239>-0.515</span>
                        <span class="mg-l-8" data-v-6afeb239>-0.20%</span>
                    </li>
                </ul>
                <div class="status" data-v-6afeb239>盘后 16:14 (美东)</div>
            </div>
            '''
            html_content = test_html
            print("🧪 使用示例HTML进行测试（包含盘后数据）...")
        
    elif args.html:
        # 从本地HTML文件读取
        try:
            with open(args.html, 'r', encoding='utf-8') as f:
                html_content = f.read()
            print(f"📁 从本地文件读取: {args.html}")
        except Exception as e:
            print(f"❌ 读取本地文件失败: {e}")
            return
            
    elif args.url:
        # 从URL获取
        print(f"🌐 正在获取页面: {args.url}")
        html_content = futu_parser.fetch_stock_page(args.url)
        
    else:
        # 默认使用原有的AMZN URL
        default_url = 'https://www.futunn.com/stock/AMZN-US?global_content=%7B%22promote_id%22%3A13766,%22sub_promote_id%22%3A36,%22invite%22%3A%2210237865%22,%22promote_content%22%3A%22nn%3Afeed%3A115061320123972%22,%22f%22%3A%22q.futunn.com%2Ffeed%2F115061320123972%22%7D&chain_id=KcFts02dZGw_d-.1kgi5g0'
        print(f"🌐 使用默认URL获取AMZN股票数据...")
        html_content = futu_parser.fetch_stock_page(default_url)
    
    if not html_content:
        print("❌ 无法获取HTML内容")
        return
    
    # 根据参数选择解析方式
    if args.js:
        # 解析JavaScript数据
        print("🔍 正在解析JavaScript数据...")
        js_data = futu_parser.parse_javascript_data(html_content)
        
        if js_data:
            print("\n📊 JavaScript解析结果:")
            print(f"盘前价格: {js_data['before_open_price']}")
            print(f"盘前涨跌额: {js_data['before_open_change']}")
            print(f"盘前涨跌幅: {js_data['before_open_change_ratio']}")
            print(f"当前价格: {js_data['current_price']}")
            print(f"当前涨跌幅: {js_data['current_change_ratio']}")
            print(f"时间: {js_data['datetime']}")
            
            # 保存到CSV，使用Unix时间戳命名
            if args.output:
                output_file = args.output
            else:
                timestamp = int(time.time())
                output_file = f"futu_{timestamp}.csv"
            
            futu_parser.save_to_csv_js(js_data, output_file)
            
        else:
            print("❌ JavaScript数据解析失败")
    else:
        # 解析HTML价格数据
        print("🔍 正在解析HTML价格数据...")
        price_data = futu_parser.parse_price_data(html_content)
        
        if price_data:
            print("\n📊 HTML解析结果:")
            print(f"当前价格: {price_data['current_price']}")
            print(f"涨跌额: {price_data['change_price']}")
            print(f"涨跌幅: {price_data['change_ratio']}")
            print(f"方向: {price_data['direction']}")
            print(f"时间: {price_data['datetime']}")
            
            # 显示盘后数据（如果存在）
            if price_data.get('after_hours_price'):
                print("\n🌙 盘后交易数据:")
                print(f"盘后价格: {price_data['after_hours_price']}")
                print(f"盘后涨跌额: {price_data['after_hours_change']}")
                print(f"盘后涨跌幅: {price_data['after_hours_ratio']}")
                print(f"盘后方向: {price_data['after_hours_direction']}")
                print(f"盘后状态: {price_data['after_hours_status']}")
            
            # 保存到CSV
            output_file = args.output if args.output else None
            futu_parser.save_to_csv(price_data, output_file)
            
        else:
            print("❌ HTML数据解析失败，未能提取到价格数据")

if __name__ == "__main__":
    main()