import arxiv
import json
import os
from typing import List
from mcp.server.fastmcp import FastMCP
PAPER_DIR = "papers"
# Initialize FastMCP server
mcp = FastMCP("research")
@mcp.tool()
def search_papers(topic: str, max_results: int = 5) -> List[str]:
"""
Search for papers on arXiv based on a topic and store their information.
Args:
topic: The topic to search for
max_results: Maximum number of results to retrieve (default: 5)
Returns:
List of paper IDs found in the search
"""
# Use arxiv to find the papers
client = arxiv.Client()
# Search for the most relevant articles matching the queried topic
search = arxiv.Search(
query = topic,
max_results = max_results,
sort_by = arxiv.SortCriterion.Relevance
)
papers = client.results(search)
# Create directory for this topic
path = os.path.join(PAPER_DIR, topic.lower().replace(" ", "_"))
os.makedirs(path, exist_ok=True)
file_path = os.path.join(path, "papers_info.json")
# Try to load existing papers info
try:
with open(file_path, "r") as json_file:
papers_info = json.load(json_file)
except (FileNotFoundError, json.JSONDecodeError):
papers_info = {}
# Process each paper and add to papers_info
paper_ids = []
for paper in papers:
paper_ids.append(paper.get_short_id())
paper_info = {
'title': paper.title,
'authors': [author.name for author in paper.authors],
'summary': paper.summary,
'pdf_url': paper.pdf_url,
'published': str(paper.published.date())
}
papers_info[paper.get_short_id()] = paper_info
# Save updated papers_info to json file
with open(file_path, "w") as json_file:
json.dump(papers_info, json_file, indent=2)
print(f"Results are saved in: {file_path}")
return paper_ids
@mcp.tool()
def extract_info(paper_id: str) -> str:
"""
Search for information about a specific paper across all topic directories.
Args:
paper_id: The ID of the paper to look for
Returns:
JSON string with paper information if found, error message if not found
"""
for item in os.listdir(PAPER_DIR):
item_path = os.path.join(PAPER_DIR, item)
if os.path.isdir(item_path):
file_path = os.path.join(item_path, "papers_info.json")
if os.path.isfile(file_path):
try:
with open(file_path, "r") as json_file:
papers_info = json.load(json_file)
if paper_id in papers_info:
return json.dumps(papers_info[paper_id], indent=2)
except (FileNotFoundError, json.JSONDecodeError) as e:
print(f"Error reading {file_path}: {str(e)}")
continue
return f"There's no saved information related to paper {paper_id}."
@mcp.resource("papers://folders")
def get_available_folders() -> str:
"""
List all available topic folders in the papers directory.
This resource provides a simple list of all available topic folders.
"""
folders = []
# Get all topic directories
if os.path.exists(PAPER_DIR):
for topic_dir in os.listdir(PAPER_DIR):
topic_path = os.path.join(PAPER_DIR, topic_dir)
if os.path.isdir(topic_path):
papers_file = os.path.join(topic_path, "papers_info.json")
if os.path.exists(papers_file):
folders.append(topic_dir)
# Create a simple markdown list
content = "# Available Topics\n\n"
if folders:
for folder in folders:
content += f"- {folder}\n"
content += f"\nUse @{folder} to access papers in that topic.\n"
else:
content += "No topics found.\n"
return content
@mcp.resource("papers://{topic}")
def get_topic_papers(topic: str) -> str:
"""
Get detailed information about papers on a specific topic.
Args:
topic: The research topic to retrieve papers for
"""
topic_dir = topic.lower().replace(" ", "_")
papers_file = os.path.join(PAPER_DIR, topic_dir, "papers_info.json")
if not os.path.exists(papers_file):
return f"# No papers found for topic: {topic}\n\nTry searching for papers on this topic first."
try:
with open(papers_file, 'r') as f:
papers_data = json.load(f)
# Create markdown content with paper details
content = f"# Papers on {topic.replace('_', ' ').title()}\n\n"
content += f"Total papers: {len(papers_data)}\n\n"
for paper_id, paper_info in papers_data.items():
content += f"## {paper_info['title']}\n"
content += f"- **Paper ID**: {paper_id}\n"
content += f"- **Authors**: {', '.join(paper_info['authors'])}\n"
content += f"- **Published**: {paper_info['published']}\n"
content += f"- **PDF URL**: [{paper_info['pdf_url']}]({paper_info['pdf_url']})\n\n"
content += f"### Summary\n{paper_info['summary'][:500]}...\n\n"
content += "---\n\n"
return content
except json.JSONDecodeError:
return f"# Error reading papers data for {topic}\n\nThe papers data file is corrupted."
@mcp.prompt()
def generate_search_prompt(topic: str, num_papers: int = 5) -> str:
"""Generate a prompt for Claude to find and discuss academic papers on a specific topic."""
return f"""Search for {num_papers} academic papers about '{topic}' using the search_papers tool.
Follow these instructions:
1. First, search for papers using search_papers(topic='{topic}', max_results={num_papers})
2. For each paper found, extract and organize the following information:
- Paper title
- Authors
- Publication date
- Brief summary of the key findings
- Main contributions or innovations
- Methodologies used
- Relevance to the topic '{topic}'
3. Provide a comprehensive summary that includes:
- Overview of the current state of research in '{topic}'
- Common themes and trends across the papers
- Key research gaps or areas for future investigation
- Most impactful or influential papers in this area
4. Organize your findings in a clear, structured format with headings and bullet points for easy readability.
Please present both detailed information about each paper and a high-level synthesis of the research landscape in {topic}."""
if __name__ == "__main__":
# Initialize and run the server
mcp.run(transport='stdio')
这个research_server_prompt_resource.py
文件实现了一个基于MCP(Model Context Protocol)的研究服务器,主要用于搜索、存储和检索arXiv上的学术论文。下面是对代码的详细解释:
import arxiv
import json
import os
from typing import List
from mcp.server.fastmcp import FastMCP
PAPER_DIR = "papers"
# Initialize FastMCP server
mcp = FastMCP("research")
arxiv
用于访问arXiv API,json
用于处理JSON数据,os
用于文件操作papers
@mcp.tool()
def search_papers(topic: str, max_results: int = 5) -> List[str]:
这个函数被注册为MCP工具,用于根据主题搜索arXiv上的论文:
topic
:搜索主题max_results
:最大结果数(默认5篇)核心实现:
@mcp.tool()
def extract_info(paper_id: str) -> str:
这个函数被注册为MCP工具,用于根据论文ID检索特定论文的详细信息:
paper_id
:要查找的论文ID实现逻辑:
papers
目录下的所有主题文件夹papers_info.json
文件@mcp.resource("papers://folders")
def get_available_folders() -> str:
这个函数被注册为MCP资源,提供URI为papers://folders
的访问点:
papers
目录中所有可用的主题文件夹实现方式:
papers
目录,查找包含papers_info.json
文件的子目录@mcp.resource("papers://{topic}")
def get_topic_papers(topic: str) -> str:
这个函数被注册为MCP资源,提供动态URI papers://{topic}
的访问点:
topic
:要检索的研究主题实现细节:
@mcp.prompt()
def generate_search_prompt(topic: str, num_papers: int = 5) -> str:
这个函数被注册为MCP提示,用于生成结构化的搜索指令:
topic
:要搜索的主题num_papers
:要检索的论文数量(默认5篇)提示内容包括:
search_papers
工具搜索论文的指令if __name__ == "__main__":
# Initialize and run the server
mcp.run(transport='stdio')
当脚本作为主程序运行时,启动MCP服务器,使用标准输入/输出(stdio)作为传输方式。
这个服务器实现了以下核心功能:
这个服务器设计为与MCP兼容的工具,可以被MCP客户端(如聊天机器人)调用,为用户提供学术论文搜索和分析功能。