from dataclasses import dataclass from datetime import datetime from typing import List, Optional, Any, Dict # 修改后的数据类(添加 Optional 和默认值) @dataclass class Author: _id: Optional[str] = None name: Optional[str] = None hidden: Optional[bool] = None @dataclass class Paper: id: Optional[str] = None authors: List[Author] = None publishedAt: Optional[datetime] = None title: Optional[str] = None summary: Optional[str] = None upvotes: Optional[int] = None discussionId: Optional[str] = None @dataclass class SubmittedBy: _id: Optional[str] = None avatarUrl: Optional[str] = None fullname: Optional[str] = None name: Optional[str] = None type: Optional[str] = None isPro: Optional[bool] = None isHf: Optional[bool] = None isMod: Optional[bool] = None followerCount: Optional[int] = None @dataclass class Article: paper: Optional[Paper] = None publishedAt: Optional[datetime] = None title: Optional[str] = None thumbnail: Optional[str] = None numComments: Optional[int] = None submittedBy: Optional[SubmittedBy] = None isAuthorParticipating: Optional[bool] = None def safe_get(data: Dict, *keys: str) -> Any: """安全获取嵌套字典值""" for key in keys: data = data.get(key, {}) if isinstance(data, dict) else None return data if data != {} else None def parse_article(data: Dict[str, Any]) -> Article: """容错式解析函数""" def parse_datetime(dt_str: Optional[str]) -> Optional[datetime]: """安全解析时间""" if not dt_str: return None try: if dt_str.endswith('Z'): dt_str = dt_str[:-1] + '+00:00' return datetime.fromisoformat(dt_str) except ValueError: return None # 解析作者列表 authors = [] for author_data in safe_get(data, "paper", "authors") or []: authors.append(Author( _id=author_data.get("_id"), name=author_data.get("name"), hidden=author_data.get("hidden") )) # 解析论文 paper = Paper( id=safe_get(data, "paper", "id"), authors=authors, publishedAt=parse_datetime(safe_get(data, "paper", "publishedAt")), title=safe_get(data, "paper", "title"), summary=safe_get(data, "paper", "summary"), upvotes=safe_get(data, "paper", "upvotes"), discussionId=safe_get(data, "paper", "discussionId") ) if safe_get(data, "paper") else None # 解析提交者 submitted_by_data = safe_get(data, "submittedBy") submitted_by = SubmittedBy( _id=submitted_by_data.get("_id") if submitted_by_data else None, avatarUrl=submitted_by_data.get("avatarUrl") if submitted_by_data else None, fullname=submitted_by_data.get("fullname") if submitted_by_data else None, name=submitted_by_data.get("name") if submitted_by_data else None, type=submitted_by_data.get("type") if submitted_by_data else None, isPro=submitted_by_data.get("isPro") if submitted_by_data else None, isHf=submitted_by_data.get("isHf") if submitted_by_data else None, isMod=submitted_by_data.get("isMod") if submitted_by_data else None, followerCount=submitted_by_data.get("followerCount") if submitted_by_data else None ) if submitted_by_data else None # 构建最终对象 return Article( paper=paper, publishedAt=parse_datetime(data.get("publishedAt")), title=data.get("title"), thumbnail=data.get("thumbnail"), numComments=data.get("numComments"), submittedBy=submitted_by, isAuthorParticipating=data.get("isAuthorParticipating") ) # 使用示例 if __name__ == "__main__": import json from rich import print # 假设您的原始数据保存在 article.json 文件中 with open("article.json") as f: raw_data = json.load(f) articles = [] for raw_article in raw_data: article = parse_article(raw_article) articles.append(article) print(articles[0]) print(len(articles))