Use Claude 3 on Amazon Bedrock via Boto3, LangChain and Llama-Index
3 min readMar 19, 2024
Update (03/06/2024) — added the LangChain callback to estimate costs
Update (26/04/2024) — newlangchain-aws
package now available! Updated the LangChain section below.
Anthropic Claude 3 model IDs available in Amazon Bedrock
From smallest to big:
anthropic.claude-3-haiku-20240307-v1:0
anthropic.claude-3-sonnet-20240229-v1:0
anthropic.claude-3-opus-20240229-v1:0
In the snippets below, replace the model ID with the model you want to use.
Using Claude 3 via Boto3
# -----------------------------------------------
# Setting up Amazon Bedrock with AWS SDK Boto3 🐍️
# -----------------------------------------------
import boto3, json
# Bedrock Runtime
bedrock_runtime = boto3.client(
service_name="bedrock-runtime",
region_name="us-west-2",
# aws_access_key_id=ACCESS_KEY, # optional - set this value if you haven't run `aws configure`
# aws_secret_access_key=SECRET_KEY, # optional - set this value if you haven't run `aws configure`
# aws_session_token=SESSION_TOKEN, # optional - set this value if you haven't run `aws configure`
)
# Model configuration
model_id = "anthropic.claude-3-haiku-20240307-v1:0"
model_kwargs = {
"max_tokens": 2048, "temperature": 0.1,
"top_k": 250, "top_p": 1, "stop_sequences": ["\n\nHuman"],
}
# Input configuration
prompt = "What is a trullo?"
body = {
"anthropic_version": "bedrock-2023-05-31",
"system": "You are a honest and helpful bot.",
"messages": [
{"role": "user", "content": [{"type": "text", "text": prompt}]},
],
}
body.update(model_kwargs)
# Invoke
response = bedrock_runtime.invoke_model(
modelId=model_id,
body=json.dumps(body),
)
# Process and print the response
result = json.loads(response.get("body").read()).get("content", [])[0].get("text", "")
print(result)
Using Claude 3 via LangChain
Remember to install the langchain-aws
package first by running: pip install langchain-aws boto3 -qU
. Then, you can run the following code.
# ---------------------------------------------
# Setting up Amazon Bedrock with LangChain 🦜️🔗
# ---------------------------------------------
import boto3
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_aws import ChatBedrock
# Bedrock Runtime
bedrock_runtime = boto3.client(
service_name="bedrock-runtime",
region_name="us-west-2",
# aws_access_key_id=ACCESS_KEY, # optional - set this value if you haven't run `aws configure`
# aws_secret_access_key=SECRET_KEY, # optional - set this value if you haven't run `aws configure`
# aws_session_token=SESSION_TOKEN, # optional - set this value if you haven't run `aws configure`
)
# Model configuration
model_id = "anthropic.claude-3-haiku-20240307-v1:0"
model_kwargs = {
"max_tokens": 2048, "temperature": 0.1,
"top_k": 250, "top_p": 1, "stop_sequences": ["\n\nHuman"],
}
# LangChain class for chat
model = ChatBedrock(
client=bedrock_runtime,
model_id=model_id,
model_kwargs=model_kwargs,
)
# Set up the Messages API
messages = [
("system", "You are an honest and helpful bot. You reply to the question in a concise and direct way."),
("human", "{question}"),
]
# Configure Prompt and Chain (using LCEL)
prompt = ChatPromptTemplate.from_messages(messages)
chain = prompt | model | StrOutputParser()
# Invoke the chain
response = chain.invoke({"question": "What is the biggest city in Puglia, Italy?"})
print(response)
To compute the cost of invocation, use the langchain callbacks:
from langchain_community.callbacks.manager import get_bedrock_anthropic_callback
with get_bedrock_anthropic_callback() as cb:
response = chain.invoke({"question": "What is the biggest city in Puglia, Italy?"})
print(response)
print(cb)
Using Claude 3 via Llama-Index
Install dependencies first by running:
%pip install llama-index llama-index-llms-bedrock llama-index-embeddings-bedrock llama-index-readers-web html2text -qU
# ---------------------------------------------
# Setting up Amazon Bedrock with Llama-Index 🦙️
# ---------------------------------------------
# Load dependencies
from llama_index.core import VectorStoreIndex, Settings
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.bedrock import BedrockEmbedding
from llama_index.llms.bedrock import Bedrock
from llama_index.readers.web import SimpleWebPageReader
## STEP 1 - INDEXING DATA
# Model that will be used to generate the embeddings
Settings.embed_model = BedrockEmbedding(model="cohere.embed-multilingual-v3", region_name="us-west-2")
# Load Data
documents = SimpleWebPageReader(html_to_text=True).load_data(["https://www.gutenberg.org/cache/epub/52484/pg52484.txt"])
sentence_splitter = SentenceSplitter(chunk_size=400, chunk_overlap=50)
# Create the Vectore Store and Index Store in-memory
index = VectorStoreIndex.from_documents(documents, transformations=[sentence_splitter])
## STEP 2 - QUERYING
# Model that will be used to generate the answer given the most relevant chunks
Settings.llm = Bedrock(
model="anthropic.claude-3-haiku-20240307-v1:0",
region_name="us-west-2",
context_size=200000
)
## STEP 3 - Query
query_engine = index.as_query_engine()
response = query_engine.query("Chi è Pinocchio?")
print(response)