Initial commit: 百家号文章采集系统

2025-12-19 22:48:58 +08:00
commit 0d5bbb1864
37 changed files with 11774 additions and 0 deletions
--- a/baidu_api.py
+++ b/baidu_api.py
@@ -0,0 +1,633 @@
+import asyncio
+import json
+import random
+import time
+from typing import Dict, Any, Optional
+from urllib.parse import quote
+
+import aiohttp
+from playwright.async_api import async_playwright
+from fake_useragent import UserAgent
+import logging
+
+from test2 import display_simple_data
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+
+class BaiduBJHSpider:
+    def __init__(self, use_proxy: bool = False, proxy_api_url: str = None, proxy_username: str = None, proxy_password: str = None):
+        self.ua = UserAgent()
+        self.use_proxy = use_proxy
+        self.proxy_api_url = proxy_api_url or 'http://api.tianqiip.com/getip?secret=lu29e593&num=1&type=txt&port=1&mr=1&sign=4b81a62eaed89ba802a8f34053e2c964'
+        self.proxy_username = proxy_username
+        self.proxy_password = proxy_password
+        self.current_proxy = None
+        self.session_cookie = None
+    
+    def get_proxy(self):
+        """从代理池获取一个代理IP"""
+        if not self.use_proxy:
+            return None
+        
+        try:
+            import requests
+            logger.info(f"从代理池获取IP: {self.proxy_api_url}")
+            response = requests.get(self.proxy_api_url, timeout=5)  # 优化超时为5秒
+            content = response.content.decode("utf-8").strip()
+            logger.info(f"提取代理IP: {content}")
+            
+            if ':' in content:
+                ip, port = content.strip().split(":", 1)
+                
+                # 如果有认证信息，添加到代理URL中
+                if self.proxy_username and self.proxy_password:
+                    proxy_url = f"http://{self.proxy_username}:{self.proxy_password}@{ip}:{port}"
+                    logger.info(f"代理配置成功（带认证）: http://{self.proxy_username}:****@{ip}:{port}")
+                else:
+                    proxy_url = f"http://{ip}:{port}"
+                    logger.info(f"代理配置成功: {proxy_url}")
+                
+                self.current_proxy = proxy_url
+                return proxy_url
+            else:
+                logger.error("代理IP格式错误")
+                return None
+        except Exception as e:
+            logger.error(f"获取代理IP失败: {e}")
+            return None
+
+    async def init_browser(self):
+        """初始化浏览器环境获取Cookie"""
+        playwright = await async_playwright().start()
+
+        # 配置浏览器参数
+        browser_args = [
+            '--disable-blink-features=AutomationControlled',
+            '--disable-web-security',
+            '--disable-features=IsolateOrigins,site-per-process',
+            '--no-sandbox',
+            '--disable-setuid-sandbox',
+        ]
+
+        # 启动浏览器
+        browser = await playwright.chromium.launch(
+            headless=True,  # 设置为True可以无头模式运行
+            args=browser_args
+        )
+
+        # 创建上下文
+        context = await browser.new_context(
+            viewport={'width': 1920, 'height': 1080},
+            user_agent=self.ua.random,
+            locale='zh-CN',
+            timezone_id='Asia/Shanghai'
+        )
+
+        # 设置额外的HTTP头
+        await context.set_extra_http_headers({
+            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
+            'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
+            'Accept-Encoding': 'gzip, deflate, br',
+            'Connection': 'keep-alive',
+            'Upgrade-Insecure-Requests': '1',
+        })
+
+        page = await context.new_page()
+
+        # 首先访问百度首页获取基础Cookie
+        await page.goto('https://www.baidu.com', wait_until='networkidle')
+        await asyncio.sleep(random.uniform(2, 4))
+
+        # 访问百家号页面
+        await page.goto('https://baijiahao.baidu.com/', wait_until='networkidle')
+        await asyncio.sleep(random.uniform(3, 5))
+
+        # 获取Cookie
+        cookies = await context.cookies()
+        self.session_cookie = '; '.join([f"{c['name']}={c['value']}" for c in cookies])
+
+        logger.info(f"获取到Cookie: {self.session_cookie[:50]}...")
+
+        await browser.close()
+        await playwright.stop()
+
+        return cookies
+
+    def build_headers(self, referer: str = "https://baijiahao.baidu.com/") -> Dict:
+        """构建请求头"""
+        timestamp = int(time.time() * 1000)
+
+        headers = {
+            'User-Agent': self.ua.random,
+            'Accept': '*/*',
+            'Accept-Language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7',
+            'Accept-Encoding': 'gzip, deflate, br',
+            'Referer': referer,
+            'Connection': 'keep-alive',
+            'Sec-Fetch-Dest': 'script',
+            'Sec-Fetch-Mode': 'no-cors',
+            'Sec-Fetch-Site': 'same-site',
+            'Pragma': 'no-cache',
+            'Cache-Control': 'no-cache',
+        }
+
+        if self.session_cookie:
+            headers['Cookie'] = self.session_cookie
+
+        return headers
+
+    def generate_callback_name(self) -> str:
+        """生成随机的callback函数名"""
+        timestamp = int(time.time() * 1000)
+        return f"__jsonp{timestamp}"
+
+    async def fetch_data_directly(self, uk: str = "ntHidnLhrlfclJar2z8wBg", use_browser: bool = False, num: int = 10,
+                                  ctime: str = None) -> Optional[Dict]:
+        """直接请求接口（可能需要多次尝试）
+        
+        Args:
+            uk: 作者UK
+            use_browser: 是否使用浏览器获取Cookie，默认False不启动浏览器
+            num: 请求数据条数，API固定为10
+            ctime: 分页参数，上一次请求返回的query.ctime值
+        """
+        # 只在use_browser=True时才初始化浏览器获取Cookie
+        if use_browser:
+            await self.init_browser()
+        
+        # 如果启用代理，必须先获取一个代理IP（失败则抛出异常，不使用本机IP）
+        if self.use_proxy:
+            if not self.current_proxy:
+                proxy = self.get_proxy()
+                if not proxy:
+                    raise Exception("启用了代理但无法获取代理IP，拒绝使用本机IP")
+
+        async with aiohttp.ClientSession() as session:
+            for attempt in range(10):  # 增加到10次重试，应对IP池限流
+                try:
+                    callback_name = self.generate_callback_name()
+                    timestamp = int(time.time() * 1000)
+
+                    # 构建URL参数
+                    params = {
+                        'tab': 'main',
+                        'num': '10',  # API固定为10
+                        'uk': uk,
+                        'source': 'pc',
+                        'type': 'newhome',
+                        'action': 'dynamic',
+                        'format': 'jsonp',
+                        'callback': callback_name,
+                        'otherext': f'h5_{time.strftime("%Y%m%d%H%M%S")}',
+                        'Tenger-Mhor': str(timestamp),
+                        '_': str(timestamp)  # 添加时间戳参数
+                    }
+
+                    # 如果有ctime参数,添加到请求中(用于分页)
+                    if ctime:
+                        params['ctime'] = ctime
+
+                    url = "https://mbd.baidu.com/webpage"
+                    headers = self.build_headers()
+
+                    logger.info(f"尝试第{attempt + 1}次请求，URL: {url}")
+                    
+                    # 准备请求参数
+                    request_kwargs = {
+                        'params': params,
+                        'headers': headers,
+                        'timeout': aiohttp.ClientTimeout(total=30)
+                    }
+                    
+                    # 如果使用代理，添加代理配置（必须有代理才请求）
+                    if self.use_proxy:
+                        if not self.current_proxy:
+                            raise Exception("启用了代理但当前无代理IP，拒绝使用本机IP")
+                        logger.info(f"使用代理: {self.current_proxy}")
+                        request_kwargs['proxy'] = self.current_proxy
+
+                    async with session.get(url, **request_kwargs) as response:
+                        text = await response.text()
+                        # 提取JSONP数据
+                        if text.startswith(callback_name + '(') and text.endswith(')'):
+                            json_str = text[len(callback_name) + 1:-1]
+                            data = json.loads(json_str)
+                            
+                            # 检查是否被反爬
+                            if data.get('data', {}).get('foe', {}).get('is_need_foe') == True:
+                                logger.warning(f"检测到反爬标识(is_need_foe=True)，尝试第{attempt + 1}次")
+                                # 如果启用了代理，立即切换IP
+                                if self.use_proxy:
+                                    logger.info("检测到反爬，立即切换代理IP（无需等待）")
+                                    self.get_proxy()
+                                # 继续重试
+                                if attempt < 9:  # 还有重试机会（总共10次）
+                                    continue
+                            
+                            return data
+
+                except aiohttp.ClientConnectorError as e:
+                    logger.error(f"❌ 网络连接失败 (尝试{attempt + 1}/10): {type(e).__name__} - {str(e)[:100]}")
+                    if self.use_proxy:
+                        logger.info("🔄 网络错误，立即切换代理IP")
+                        self.get_proxy()
+                except asyncio.TimeoutError as e:
+                    logger.error(f"❌ 请求超时 (尝试{attempt + 1}/10): 代理响应超过30秒")
+                    if self.use_proxy:
+                        logger.info("🔄 超时，立即切换代理IP")
+                        self.get_proxy()
+                except aiohttp.ClientProxyConnectionError as e:
+                    logger.error(f"❌ 代理连接失败 (尝试{attempt + 1}/10): {e}")
+                    # 代理失败，立即重新获取
+                    if self.use_proxy:
+                        logger.info("🔄 代理失败，立即切换代理IP（无需等待）")
+                        self.get_proxy()
+                    # 代理错误不需要等待，直接重试
+                except aiohttp.ClientResponseError as e:
+                    # 检查是否是407错误（代理认证失败/IP池限流）
+                    if e.status == 407:
+                        logger.warning(f"检测到407错误（代理IP池限流），等待10秒后重新获取IP...")
+                        await asyncio.sleep(10)  # 增加到10秒，给IP池缓冲时间
+                        if self.use_proxy:
+                            logger.info("重新获取代理IP...")
+                            self.get_proxy()
+                        # 继续重试
+                    else:
+                        logger.error(f"❌ HTTP错误 (尝试{attempt + 1}/10): {e.status}, {e.message}")
+                        await asyncio.sleep(random.uniform(1, 2))
+                except Exception as e:
+                    logger.error(f"❌ 未知错误 (尝试{attempt + 1}/10): {type(e).__name__} - {str(e)[:100]}")
+                    await asyncio.sleep(random.uniform(1, 2))  # 减少到1-2秒
+
+        # 10次重试全部失败
+        logger.error("请求失败：已经重试10次仍然失败，可能是IP池限流或网络问题")
+        return None
+
+    async def fetch_via_browser(self, uk: str = "ntHidnLhrlfclJar2z8wBg") -> Optional[Dict]:
+        """通过浏览器直接执行获取数据（最可靠的方法）"""
+        playwright = await async_playwright().start()
+
+        try:
+            browser = await playwright.chromium.launch(
+                headless=False,  # 调试时可设为False
+                args=[
+                    '--disable-blink-features=AutomationControlled',
+                    '--no-sandbox'
+                ]
+            )
+
+            context = await browser.new_context(
+                viewport={'width': 1920, 'height': 1080},
+                user_agent=self.ua.random,
+                locale='zh-CN'
+            )
+
+            page = await context.new_page()
+
+            # 监听网络请求
+            results = []
+
+            def handle_response(response):
+                if "mbd.baidu.com/webpage" in response.url and "format=jsonp" in response.url:
+                    try:
+                        # 尝试提取JSONP数据
+                        text = response.text()
+                        if "callback=" in response.url:
+                            # 从URL提取callback名称
+                            import re
+                            match = re.search(r'callback=([^&]+)', response.url)
+                            if match:
+                                callback = match.group(1)
+                                if text.startswith(callback + '(') and text.endswith(')'):
+                                    json_str = text[len(callback) + 1:-1]
+                                    data = json.loads(json_str)
+                                    results.append(data)
+                    except:
+                        pass
+
+            page.on("response", handle_response)
+
+            # 访问百家号页面
+            await page.goto(f"https://baijiahao.baidu.com/u?app_id={uk}", wait_until='networkidle')
+
+            # 模拟用户滚动
+            for _ in range(3):
+                await page.evaluate("window.scrollBy(0, window.innerHeight)")
+                await asyncio.sleep(random.uniform(1, 2))
+
+            # 等待数据加载
+            await asyncio.sleep(5)
+
+            await browser.close()
+
+            if results:
+                return results[0]
+
+        except Exception as e:
+            logger.error(f"浏览器方式获取失败: {e}")
+        finally:
+            await playwright.stop()
+
+        return None
+
+    async def fetch_with_signature(self, uk: str = "ntHidnLhrlfclJar2z8wBg") -> Optional[Dict]:
+        """尝试使用签名参数请求"""
+        # 百度接口可能需要特定的签名参数
+        # 这里需要分析JavaScript找到签名算法
+
+        async with aiohttp.ClientSession() as session:
+            # 先获取必要的token
+            token_url = "https://mbd.baidu.com/staticx/search/dynamic/config"
+
+            headers = {
+                'User-Agent': self.ua.random,
+                'Referer': 'https://baijiahao.baidu.com/',
+            }
+
+            try:
+                # 获取配置信息
+                async with session.get(token_url, headers=headers) as resp:
+                    config_text = await resp.text()
+                    logger.info(f"配置响应: {config_text[:200]}")
+
+                # 构建完整请求
+                timestamp = int(time.time() * 1000)
+                params = {
+                    'tab': 'main',
+                    'num': '10',
+                    'uk': uk,
+                    'source': 'pc',
+                    'type': 'newhome',
+                    'action': 'dynamic',
+                    'format': 'json',
+                    't': str(timestamp),
+                    'callback': f'__jsonp{timestamp}',
+                }
+
+                # 尝试JSON格式（非JSONP）
+                params['format'] = 'json'
+                del params['callback']
+
+                url = "https://mbd.baidu.com/webpage"
+
+                async with session.get(url, params=params, headers=headers) as response:
+                    text = await response.text()
+                    logger.info(f"JSON响应: {text[:500]}")
+
+                    try:
+                        return json.loads(text)
+                    except:
+                        return None
+
+            except Exception as e:
+                logger.error(f"签名方式失败: {e}")
+                return None
+
+
+async def fetch_baidu_data(uk: str = "ntHidnLhrlfclJar2z8wBg", months: int = 6, use_proxy: bool = False, proxy_api_url: str = None,
+                           on_page_fetched=None, start_page: int = 1, start_ctime: str = None) -> Optional[Dict]:
+    """获取百家号数据的主函数
+    
+    Args:
+        uk: 作者UK
+        months: 获取近几个月的数据，默认6个月（支持小数，如0.33代衡10天）
+        use_proxy: 是否启用代理IP池
+        proxy_api_url: 代理API地址，留空使用默认
+        on_page_fetched: 回调函数，每页数据抽取后调用，signature: (page, items, ctime) -> None
+        start_page: 起始页码（断点续传）
+        start_ctime: 起始分页参数（断点续传）
+    """
+    from datetime import datetime, timedelta
+    import re
+
+    spider = BaiduBJHSpider(use_proxy=use_proxy, proxy_api_url=proxy_api_url)
+
+    # 计算目标日期（支持小数月份）
+    days = int(months * 30)
+    target_date = datetime.now() - timedelta(days=days)
+
+    # 日志输出优化
+    if months < 1:
+        logger.info(f"开始获取百家号数据(近{days}天, 目标日期: {target_date.strftime('%Y-%m-%d')})")
+    else:
+        logger.info(f"开始获取百家号数据(近{int(months)}个月, 目标日期: {target_date.strftime('%Y-%m-%d')})")
+
+    # 先获取第一页数据(每次固定10条)
+    # 注意：不再使用 all_articles 累加，每页直接通过回调保存
+    page = start_page  # 支持从指定页码开始
+    current_ctime = start_ctime  # 支持使用之前的分页参数
+
+    # 如果是断点续传，直接跳过第一页，使用保存的ctime
+    if start_page > 1 and start_ctime:
+        logger.info(f"断点续传：从第{start_page}页开始，ctime={start_ctime}")
+        data = None  # 不需要第一页的data结构
+    else:
+        # 优化: 直接请求API,只有失败时才启动浏览器
+        logger.info("尝试直接请求API(不启动浏览器)...")
+        data = await spider.fetch_data_directly(uk, use_browser=False, ctime=current_ctime)
+
+    # 如果第一次失败,再启动浏览器重试
+    if not data or not data.get('data', {}).get('list'):
+        if start_page == 1:  # 只有第一页才需要浏览器重试
+            logger.warning("直接请求失败,启动浏览器获取Cookie...")
+            # 打印第一次请求的返回数据
+            if data:
+                logger.warning(f"第一次请求返回数据: {json.dumps(data, ensure_ascii=False, indent=2)}")
+            else:
+                logger.warning("第一次请求返回数据: None")
+            
+            data = await spider.fetch_data_directly(uk, use_browser=True)
+
+            if not data or not data.get('data', {}).get('list'):
+                logger.error("启动浏览器后仍然失败,放弃")
+                # 打印最终的返回数据
+                if data:
+                    logger.error(f"最终返回数据: {json.dumps(data, ensure_ascii=False, indent=2)}")
+                else:
+                    logger.error("最终返回数据: None")
+                return None
+
+    # 第一次请求成功，处理数据（只有非断点续传时）
+    if data and data.get('data', {}).get('list'):
+        items = data.get('data', {}).get('list', [])
+        logger.info(f"第{page}页获取成功，数据条数: {len(items)}")
+        
+        # 调用回调保存第一页数据
+        if on_page_fetched:
+            on_page_fetched(page, items, current_ctime)
+        
+        # 提取第一页的ctime用于分页 - 注意路径是 data.data.query.ctime
+        current_ctime = data.get('data', {}).get('query', {}).get('ctime', current_ctime)
+        if current_ctime:
+            logger.info(f"获取到分页参数 ctime={current_ctime}")
+        else:
+            logger.warning("未获取到ctime分页参数")
+
+    # 使用ctime(Unix时间戳)进行时间判断,更准确
+    def get_article_datetime(item_data: dict) -> datetime:
+        """从ittemData中提取文章时间
+        
+        优先使用ctime(Unix时间戳),更准确
+        """
+        # 优先使用ctime(秒级Unix时间戳)
+        if 'ctime' in item_data and item_data['ctime']:
+            try:
+                timestamp = int(item_data['ctime'])
+                return datetime.fromtimestamp(timestamp)
+            except:
+                pass
+
+        # 备用: 使用time字段(相对时间或绝对时间)
+        time_str = item_data.get('time', '')
+        if not time_str:
+            return datetime.now()
+
+        now = datetime.now()
+        if '分钟前' in time_str:
+            minutes = int(re.search(r'(\d+)', time_str).group(1))
+            return now - timedelta(minutes=minutes)
+        elif '小时前' in time_str:
+            hours = int(re.search(r'(\d+)', time_str).group(1))
+            return now - timedelta(hours=hours)
+        elif '天前' in time_str or '昨天' in time_str:
+            if '昨天' in time_str:
+                days = 1
+            else:
+                days = int(re.search(r'(\d+)', time_str).group(1))
+            return now - timedelta(days=days)
+        elif '-' in time_str:  # 绝对时间格式
+            try:
+                return datetime.strptime(time_str, '%Y-%m-%d %H:%M')
+            except:
+                try:
+                    return datetime.strptime(time_str, '%Y-%m-%d')
+                except:
+                    return now
+        return now
+
+    # 检查最后一篇文章的时间,判断是否需要继续请求
+    need_more = True
+    if data and data.get('data', {}).get('list'):
+        items = data.get('data', {}).get('list', [])
+        if items:
+            last_item = items[-1]
+            item_data = last_item.get('itemData', {})
+            article_date = get_article_datetime(item_data)
+            logger.info(f"最后一篇文章时间: {article_date.strftime('%Y-%m-%d %H:%M:%S')}")
+            if article_date < target_date:
+                need_more = False
+                if months < 1:
+                    logger.info(
+                        f"最后一篇文章时间: {article_date.strftime('%Y-%m-%d %H:%M:%S')}, 已超出{days}天范围，停止请求")
+                else:
+                    logger.info(
+                        f"最后一篇文章时间: {article_date.strftime('%Y-%m-%d %H:%M:%S')}, 已超出{int(months)}个月范围，停止请求")
+        else:
+            need_more = False
+    elif start_page > 1:
+        # 断点续传时，默认需要继续
+        need_more = True
+    else:
+        need_more = False
+
+    # 循环请求后续页面，直到达到目标日期或无数据（不限制页数）
+    while need_more:
+        page += 1
+        logger.info(f"需要更多数据，请求第{page}页...")
+
+        # 优化：使用随机延迟8-12秒，避免被识别为机器行为
+        delay = random.uniform(8, 12)
+        logger.info(f"等待 {delay:.1f} 秒后请求...")
+        await asyncio.sleep(delay)
+
+        # 继续请求(使用上一次返回的ctime作为分页参数)
+        next_data = await spider.fetch_data_directly(uk, use_browser=False, ctime=current_ctime)
+
+        # fetch_data_directly已经处理了反爬检测和重试，这里只需检查是否成功获取数据
+        if not next_data or not next_data.get('data', {}).get('list'):
+            # 如果还是失败，检查是否因为反爬
+            if next_data and next_data.get('data', {}).get('foe', {}).get('is_need_foe') == True:
+                logger.error(f"第{page}页多次重试后仍然触发反爬，停止请求")
+                logger.error(f"返回数据: {json.dumps(next_data, ensure_ascii=False, indent=2)}")
+            else:
+                logger.warning(f"第{page}页无数据，停止请求")
+                # 打印完整的返回结果以便调试
+                if next_data:
+                    logger.warning(f"返回数据: {json.dumps(next_data, ensure_ascii=False, indent=2)}")
+                else:
+                    logger.warning("返回数据: None")
+            break
+
+        next_items = next_data.get('data', {}).get('list', [])
+        logger.info(f"第{page}页获取成功，数据条数: {len(next_items)}")
+        
+        # 调用回调保存这一页数据
+        if on_page_fetched:
+            on_page_fetched(page, next_items, current_ctime)
+
+        # 更新ctime为下一次请求做准备 - 注意路径是 data.data.query.ctime
+        current_ctime = next_data.get('data', {}).get('query', {}).get('ctime', current_ctime)
+        if current_ctime:
+            logger.info(f"更新分页参数 ctime={current_ctime}")
+
+        # 检查最后一篇文章的时间
+        if next_items:
+            last_item = next_items[-1]
+            item_data = last_item.get('itemData', {})
+            article_date = get_article_datetime(item_data)
+            logger.info(f"最后一篇文章时间: {article_date.strftime('%Y-%m-%d %H:%M:%S')}")
+
+            if article_date < target_date:
+                need_more = False
+                if months < 1:
+                    logger.info(
+                        f"最后一篇文章时间: {article_date.strftime('%Y-%m-%d %H:%M:%S')}, 已超出{days}天范围，停止请求")
+                else:
+                    logger.info(
+                        f"最后一篇文章时间: {article_date.strftime('%Y-%m-%d %H:%M:%S')}, 已超出{int(months)}个月范围，停止请求")
+        else:
+            need_more = False
+
+    # 返回最后的分页信息（用于断点续传）
+    result = {
+        'last_page': page,
+        'last_ctime': current_ctime,
+        'completed': not need_more  # 是否已完成
+    }
+    
+    logger.info(f"抓取完成，最后页码: {page}, ctime: {current_ctime}")
+    return result
+
+
+# 同步包装函数（便于在同步代码中调用）
+def get_baidu_data_sync(uk: str = "ntHidnLhrlfclJar2z8wBg", months: int = 6, use_proxy: bool = False, 
+                        proxy_api_url: str = None, on_page_fetched=None, 
+                        start_page: int = 1, start_ctime: str = None) -> Optional[Dict]:
+    """同步方式获取数据
+    
+    Args:
+        uk: 作者UK
+        months: 获取近几个月的数据，默认6个月
+        use_proxy: 是否启用代理IP池
+        proxy_api_url: 代理API地址，留空使用默认
+        on_page_fetched: 回调函数，每页数据抽取后调用
+        start_page: 起始页码（断点续传）
+        start_ctime: 起始分页参数（断点续传）
+    """
+    return asyncio.run(fetch_baidu_data(uk, months, use_proxy, proxy_api_url, 
+                                        on_page_fetched, start_page, start_ctime))
+
+
+# 保留原有的main函数用于测试
+async def main():
+    data = await fetch_baidu_data()
+    if data:
+        print(json.dumps(data, ensure_ascii=False, indent=2))
+        from test2 import display_simple_data
+        display_simple_data(data)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())