工业日志AI大模型智能分析系统-后端实现

目录

项目主要架构

完整系统架构

主要系统架构解析图

思路解析

模板json 示例

主要核心代码示例

LangGraph工作流 (backend/ai/workflows.py)

LangChain Agents (backend/ai/agents.py)

Django视图 (backend/core/views.py)

配置 (config.py)

响应示例

关键优势


项目主要架构

        LangGraph、LangChain、Django、向量数据库、Ollama、Mysql数据库

完整系统架构

log-ai-analyzer/
├── backend/                  # Django后端
│   ├── core/                # 核心业务逻辑
│   │   ├── models.py        # 数据模型
│   │   └── parsers/        # 日志解析器
│   ├── ai/                 # AI模块
│   │   ├── workflows.py    # LangGraph流程
│   │   └── agents.py       # LangChain Agents
│   └── manage.py
├── knowledge/              # 知识库
│   ├── error_templates/    # 错误模板
│   └── solutions/         # 解决方案库
├── scripts/               # 运维脚本
└── config.py              # 配置

主要系统架构解析图

工业日志AI大模型智能分析系统-后端实现_第1张图片

思路解析

1. 解析数据结构:

        首先将报错日志交给LLM 进行解析成结构化数据:

        pattern(正则数据信息)、component(组件如CPU、内存)、source

2. 配置模板:

        根据component 和日志信息过滤出相似的多个 id(索引ID) 和 confidence(相似度)

3. 检索数据:

        根据索引ID 查询出所有的解决方案solutions和步骤steps及 id

4. LLM选择更优解:

        LLM 根据上下文的 结构数据和可选的解决方案智能选择最优解,返回方案ID来从数据查询出可行性方案。当前无可用方案时LLM 无法回答时根据上下文智能检索返回给用户来参考

模板json 示例
{
  "pattern": "xxx restoration fail,.*not in \"0.0.0.0\" error",
  "component": "BMC",
  "source": "BMC",
  "solutions": [
    {
      "description": "重新xxx",
      "steps": ["1. 关闭xxx", "2. 找到xxx", "3. 重新xxx", "4. 重启xxx"],
      "priority": 1
    },
    {
      "description": "检查BMC网络配置",
      "steps": ["1. 登录BMC管理界面", "2. 检查网络设置", "3. 恢复默认设置"],
      "priority": 2
    }
  ]
}

主要核心代码示例

LangGraph工作流 (backend/ai/workflows.py)
from langgraph.graph import StateGraph
from typing import TypedDict
from ai.agents import LogParser, SolutionFinder


class AnalysisState(TypedDict):
    raw_log: str
    parsed_log: dict
    matched_template: dict
    possible_solutions: list[dict]
    final_solution: dict


def build_workflow():
    workflow = StateGraph(AnalysisState)

    # 定义节点
    workflow.add_node("parse", LogParser().parse)
    workflow.add_node("match", LogParser().match_template)
    workflow.add_node("find_solutions", SolutionFinder().retrieve)
    workflow.add_node("select_solution", SolutionFinder().select_best)

    # 构建流程
    workflow.set_entry_point("parse")
    workflow.add_edge("parse", "match")
    workflow.add_edge("match", "find_solutions")
    workflow.add_edge("find_solutions", "select_solution")

    return workflow.compile()
LangChain Agents (backend/ai/agents.py)
import pdb

from langchain_community.llms import Ollama
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from config import config
import numpy as np
from core.models import Solution


class LogParser:
    def __init__(self):
        self.llm = Ollama(
            model=config.OLLAMA_MODEL,
            base_url=config.OLLAMA_ENDPOINT,
            temperature=0.1
        )
        print(config.VECTOR_DB_PATH)
        self.vectorstore = Chroma(
            persist_directory=config.VECTOR_DB_PATH,
            embedding_function=self._get_embeddings()
        )

    def _get_embeddings(self):
        from langchain_community.embeddings import OllamaEmbeddings
        return OllamaEmbeddings(
            model=config.EMBEDDING_MODEL,
            base_url=config.OLLAMA_ENDPOINT,
        )

    def parse(self, state: dict) -> dict:
        """解析原始日志"""
        prompt = ChatPromptTemplate.from_template("""
        从以下工业设备日志中提取结构化信息:
        {log}

        返回JSON包含字段:
        - component (受影响的部件)
        - source (错误来源)
        - error_type (错误类型)
        - critical_level (严重程度1-5)
        """)
        chain = prompt | self.llm | JsonOutputParser()
        return {"parsed_log": chain.invoke({"log": state["raw_log"]})}

    def match_template(self, state: dict) -> dict:
        """匹配错误模板"""
        embedding = self._get_embeddings().embed_query(state["raw_log"])
        docs = self.vectorstore.similarity_search_by_vector(
            embedding=embedding,
            k=3,
            filter={"component": state["parsed_log"]["component"]}
        )

        return {
            "matched_template": {
                "id": docs[0].metadata.get("id"),
                "confidence": np.dot(embedding, docs[0].metadata.get('embedding'))
            }
        }


class SolutionFinder:
    def __init__(self):
        self.llm = Ollama(
            model=config.OLLAMA_MODEL,
            base_url=config.OLLAMA_ENDPOINT,
            temperature=0.3
        )

    def retrieve(self, state: dict) -> dict:
        """检索解决方案"""
        solutions = Solution.objects.filter(
            error_template_id=state["matched_template"]["id"]
        ).order_by("priority")

        return {
            "possible_solutions": [{
                "id": s.id,
                "description": s.description,
                "steps": s.steps
            } for s in solutions]
        }

    def select_best(self, state: dict) -> dict:
        """选择最佳方案"""
        prompt = ChatPromptTemplate.from_template("""
        根据以下日志和解决方案,选择最合适的方案:
        日志: {log}
        解析结果: {parsed_log}
        可选方案: {solutions}

        返回JSON包含:
        - selected_solution_id (选择的方案ID)
        - confidence (置信度0-1)
        """)
        chain = prompt | self.llm | JsonOutputParser()
        return {
            "final_solution": chain.invoke({
                "log": state["raw_log"],
                "parsed_log": state["parsed_log"],
                "solutions": state["possible_solutions"]
            })
        }
Django视图 (backend/core/views.py)
from rest_framework.views import APIView
from rest_framework.response import Response
from ai.workflows import build_workflow
from core.models import DeviceLog
from core.models import Solution


class LogAnalysisAPI(APIView):
    def post(self, request):
        # 保存原始日志
        log = DeviceLog.objects.create(
            raw_message=request.data["log"]
        )

        # 执行工作流
        workflow = build_workflow()
        result = workflow.invoke({
            "raw_log": log.raw_message
        })
        print(f'result:{result}')
        # 更新处理结果
        log.component = result["parsed_log"]["component"]
        log.source = result["parsed_log"]["source"]
        log.error_type = result["parsed_log"]["error_type"]
        log.solution_id = result["final_solution"]["selected_solution_id"]
        log.processed = True
        log.save()

        return Response({
            "log_id": log.id,
            "solution": self._format_solution(result)
        })

    def _format_solution(self, result):

        solution = Solution.objects.get(
            id=result["final_solution"]["selected_solution_id"]
        )
        return {
            "description": solution.description,
            "steps": solution.steps,
            "confidence": result["final_solution"]["confidence"]
        }
配置 (config.py)
from pathlib import Path


class Config:
    BASE_DIR = Path(__file__).parent
    KNOWLEDGE_DIR = BASE_DIR / "knowledge"

    # Ollama配置
    OLLAMA_ENDPOINT = "http://localhost:11434"
    OLLAMA_MODEL = "llama3"
    EMBEDDING_MODEL = "nomic-embed-text"

    # 向量数据库
    VECTOR_DB_PATH = str(BASE_DIR / "chroma_db")

    # 数据库
    DATABASES = {
        'default': {
            'ENGINE': 'django.db.backends.mysql',
            'NAME': 'log_analysis',
            'USER': 'xxx',
            'PASSWORD': 'password',
            'HOST': 'localhost',
            'PORT': '3306',
        }
    }


config = Config()

响应示例

{
  "log_id": 123,
  "solution": {
    "description": "重新xxx",
    "steps": [
      "1. 关闭xxx",
      "2. 找到xxx",
      "3. 重新xxx",
      "4. 重启xxx"
    ],
    "confidence": 0.92
  }
}

关键优势

  1. 智能解析:利用LLM理解非结构化日志

  2. 精准匹配:向量搜索+关键词过滤确保准确性

  3. 生产就绪

    • 完整的工作流状态管理

    • 与Django深度集成

    • 支持高并发处理

参考来源: deepseek...

你可能感兴趣的:(工业日志AI大模型智能分析系统-后端实现)