Daddy Makers: LangGraph 기반 다중 LLM 에이전트 개발하기

이 글은 Langgraph를 이용해, 간단히 다중 LLM 에이전트를 개발하는 방법을 정리한 것이다.

Local Agentic RAG with LangGraph and Llama

개발 환경 설정

다음과 같이 개발 환경을 설치한다.

pip install langchain langgraph milvus

RAG 처리 및 에이전트 프롬프트 정의하기

웹 사이트에서 얻은 BIM(Building Information Modeling), GIS, 기술 표준 전문 지식들을 RAG 처리한다. 이후, 에이전트에서 사용할 프롬프트들을 정의한다. 각 프롬프트가 포함된 LangChain Expression Language (LCEL) 체인은 RAG 벡터 데이터베이스에 문서에 대한 QA 질문, 질문 결과에 대한 정확도 및 환각 유무 평가, 만약, 질문에 대한 적절한 대답이 없다면 웹페이지 검색하도록 유도하도록 구성된다.

다음 코드를 입력 후 실행한다.

import os

from dotenv import load_dotenv

from typing import List

from typing_extensions import TypedDict

from langchain import hub

from langchain.globals import set_verbose, set_debug

from langchain_community.embeddings import HuggingFaceEmbeddings

from langchain_community.document_loaders import WebBaseLoader

from langchain_community.vectorstores import Chroma

from langchain_community.chat_models import ChatOllama

from langchain.prompts import PromptTemplate

from langchain.text_splitter import RecursiveCharacterTextSplitter

from langchain_core.output_parsers import JsonOutputParser

from langchain_core.output_parsers import StrOutputParser

from langchain_core.output_parsers import JsonOutputParser

from langchain.schema import Document

# setup environment for LLM RAG

load_dotenv()

set_debug(True)

set_verbose(True)

# Load documents

urls = [

"https://www.mdpi.com/2220-9964/7/5/162/",

"https://www.tandfonline.com/doi/full/10.1080/19475683.2020.1743355/",

"https://knowledge.bsigroup.com/articles/harmonize-digitize-and-rationalize-exchange-information-confidently-with-bs/",

]

docs = [WebBaseLoader(url).load() for url in urls]

docs_list = [item for sublist in docs for item in sublist]

text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(

chunk_size=250, chunk_overlap=0

)

doc_splits = text_splitter.split_documents(docs_list)

# Create vectorstore and retriever

vectorstore = Chroma.from_documents(

documents=doc_splits,

collection_name="BIM_GIS",

embedding=HuggingFaceEmbeddings(),

persist_directory="./Chroma_rag.db",

)

retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 3}) # type={similarity_score_threshold, mmr, bm25}, https://wikidocs.net/234016

# Load LLM model using Ollama

local_llm = 'llama3'

llm = ChatOllama(model=local_llm, format="json", temperature=0)

# Test grader prompt with JSON output

prompt = PromptTemplate(

template="""You are a grader assessing relevance

of a retrieved document to a user question. If the document contains keywords related to the user question, grade it as relevant. It does not need to be a stringent test. The goal is to filter out erroneous retrievals.

Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.

Provide the binary score as a JSON with a single key 'score' and no premable or explaination.

Here is the retrieved document:

{document}

Here is the user question:

{question}

""",

input_variables=["question", "document"],

)

retrieval_grader = prompt | llm | JsonOutputParser()

question = "ISO 19166's BIM to GIS Element Mapping"

docs = retriever.invoke(question, ) # https://www.kaggle.com/code/marcinrutecki/rag-mmr-search-in-langchain

doc_txt = docs[0].metadata if 'description' in docs[0].metadata else docs[0].page_content

answer = retrieval_grader.invoke({"question": question, "document": doc_txt})

print(answer)

# Test QA prompt

prompt = PromptTemplate(

template="""You are an assistant for question-answering tasks.

Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know.

Use three sentences maximum and keep the answer concise:

Question: {question}

Context: {context}

Answer:

""",

input_variables=["question", "document"],

)

llm = ChatOllama(model=local_llm, temperature=0)

rag_chain = prompt | llm | StrOutputParser() # Chain

question = "ISO 19166's BIM to GIS Element Mapping"

docs = retriever.invoke(question)

answer = rag_chain.invoke({"context": docs, "question": question})

print(answer)

# Test hallucination grader prompt with JSON output

llm = ChatOllama(model=local_llm, format="json", temperature=0)

prompt = PromptTemplate(

template="""You are a grader assessing whether

an answer is grounded in / supported by a set of facts. Give a binary score 'yes' or 'no' score to indicate whether the answer is grounded in / supported by a set of facts. Provide the binary score as a JSON with a single key 'score' and no preamble or explanation.

Here are the facts:

{documents}

Here is the answer:

{generation}

""",

input_variables=["generation", "documents"],

)

hallucination_grader = prompt | llm | JsonOutputParser()

answer = hallucination_grader.invoke({"documents": docs, "generation": answer})

print(answer)

출력이 다음과 같다면, 성공한 것이다.

LLM 기반 다중 에이전트 구현하기

이제 앞에서 정의된 체인들을 이용해, 에이전트를 정의하고, 그래프를 만든 후 빌드한다.

# Web search tool setup

from langchain_community.tools.tavily_search import TavilySearchResults

tavily_api_key = os.environ['TAVILY_API_KEY'] = <Travily API>

web_search_tool = TavilySearchResults(k=3, tavily_api_key=tavily_api_key)

# Define the graph state using langgraph

from langgraph.graph import END, StateGraph

class GraphState(TypedDict):

"""

Represents the state of our graph.

Attributes:

question: question

generation: LLM generation

web_search: whether to add search

documents: list of documents

"""

question : str

generation : str

web_search : str

documents : List[str]

def retrieve(state): # node. Retrieve documents from vectorstore

# Args. state (dict): The current graph state

# Returns. state (dict): New key added to state, documents, that contains retrieved documents

print("---RETRIEVE---")

question = state["question"]

documents = retriever.invoke(question)

return {"documents": documents, "question": question}

def generate(state): # node. Generate answer using RAG on retrieved documents

# Args: state (dict): The current graph state

# Returns: state (dict): New key added to state, generation, that contains LLM generation

print("---GENERATE---")

question = state["question"]

documents = state["documents"]

generation = rag_chain.invoke({"context": documents, "question": question})

return {"documents": documents, "question": question, "generation": generation}

def grade_documents(state): # node. Determines whether the retrieved documents are relevant to the question If any document is not relevant, we will set a flag to run web search

# Args: state (dict): The current graph state

# Returns: state (dict): Filtered out irrelevant documents and updated web_search state

print("---CHECK DOCUMENT RELEVANCE TO QUESTION---")

question = state["question"]

documents = state["documents"]

filtered_docs = [] # Score each doc

web_search = "No"

for d in documents:

score = retrieval_grader.invoke({"question": question, "document": d.page_content})

grade = score['score']

if grade.lower() == "yes": # Document relevant

print("---GRADE: DOCUMENT RELEVANT---")

filtered_docs.append(d)

else: # Document not relevant

print("---GRADE: DOCUMENT NOT RELEVANT---")

# We do not include the document in filtered_docs. We set a flag to indicate that we want to run web search

web_search = "Yes"

continue

return {"documents": filtered_docs, "question": question, "web_search": web_search}

def web_search(state): # Web search based based on the question

# Args: state (dict): The current graph state

# Returns: state (dict): Appended web results to documents

print("---WEB SEARCH---")

question = state["question"]

documents = state["documents"]

docs = web_search_tool.invoke({"query": question}) # Web search

web_results = "\n".join([d["content"] for d in docs])

web_results = Document(page_content=web_results)

if documents is not None:

documents.append(web_results)

else:

documents = [web_results]

return {"documents": documents, "question": question}

def route_question(state): # Conditional edge. Route question to web search or RAG.

# Args: state (dict): The current graph state

# Returns: str: Next node to call

print("---ROUTE QUESTION---")

question = state["question"]

print(question)

source = question_router.invoke({"question": question})

print(source)

print(source['datasource'])

if source['datasource'] == 'web_search':

print("---ROUTE QUESTION TO WEB SEARCH---")

return "websearch"

elif source['datasource'] == 'vectorstore':

print("---ROUTE QUESTION TO RAG---")

return "vectorstore"

def decide_to_generate(state): # Determines whether to generate an answer, or add web search

# Args: state (dict): The current graph state

# Returns: str: Binary decision for next node to call

print("---ASSESS GRADED DOCUMENTS---")

question = state["question"]

web_search = state["web_search"]

filtered_documents = state["documents"]

if web_search == "Yes": # All documents have been filtered check_relevance. We will re-generate a new query

print("---DECISION: ALL DOCUMENTS ARE NOT RELEVANT TO QUESTION, INCLUDE WEB SEARCH---")

return "websearch"

else: # We have relevant documents, so generate answer

print("---DECISION: GENERATE---")

return "generate"

def grade_generation_v_documents_and_question(state): # Conditional edge. Determines whether the generation is grounded in the document and answers question.

# Args: state (dict): The current graph state

# Returns: str: Decision for next node to call

print("---CHECK HALLUCINATIONS---")

question = state["question"]

documents = state["documents"]

generation = state["generation"]

score = hallucination_grader.invoke({"documents": documents, "generation": generation})

grade = score['score']

if grade == "yes": # Check hallucination

print("---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")

print("---GRADE GENERATION vs QUESTION---") # Check question-answering

score = answer_grader.invoke({"question": question,"generation": generation})