You can add web search to a local Llama model by defining a search function as a tool and calling the Scavio search API when the model decides to use it. This works with Ollama's OpenAI-compatible endpoint and any framework that supports tool calling.
Prerequisites
- Ollama with llama3.2 or llama3.3 installed
- Python 3.9+
- openai Python SDK
- Scavio API key
Walkthrough
Step 1: Start Ollama and verify tool support
Llama 3.2 and later support function calling. Run a quick test to confirm.
# Pull and start the model
ollama pull llama3.3
ollama serve
# Verify the OpenAI-compatible endpoint
curl http://localhost:11434/v1/modelsStep 2: Define the search tool schema
Tools are defined as JSON schemas. This tells Llama when and how to call the search function.
SEARCH_TOOL = {
"type": "function",
"function": {
"name": "web_search",
"description": "Search the web for current information. Use this when you need up-to-date facts, prices, or news.",
"parameters": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "The search query"
},
"platform": {
"type": "string",
"enum": ["google", "amazon", "reddit", "youtube"],
"description": "Search platform. Defaults to google."
}
},
"required": ["query"]
}
}
}Step 3: Implement the tool executor
When Llama calls the tool, execute the real API request and return the result.
import json
import requests
from openai import OpenAI
SCAVIO_KEY = "your-scavio-api-key"
def execute_web_search(query: str, platform: str = "google") -> str:
r = requests.post(
"https://api.scavio.dev/api/v1/search",
json={"query": query, "platform": platform, "num_results": 5},
headers={"x-api-key": SCAVIO_KEY},
timeout=15
)
r.raise_for_status()
results = r.json().get("organic_results", [])
lines = [f"- {res['title']}: {res.get('snippet', '')}" for res in results[:5]]
return "\n".join(lines) or "No results found."Step 4: Run the tool-calling loop
Send the user message with tools, check if Llama wants to call a tool, execute it, and send the result back.
client = OpenAI(base_url="http://localhost:11434/v1", api_key="ollama")
def chat_with_search(user_message: str) -> str:
messages = [{"role": "user", "content": user_message}]
response = client.chat.completions.create(
model="llama3.3",
messages=messages,
tools=[SEARCH_TOOL],
tool_choice="auto"
)
choice = response.choices[0]
if choice.finish_reason == "tool_calls":
tool_call = choice.message.tool_calls[0]
args = json.loads(tool_call.function.arguments)
search_result = execute_web_search(**args)
messages.append(choice.message)
messages.append({
"role": "tool",
"tool_call_id": tool_call.id,
"content": search_result
})
final = client.chat.completions.create(
model="llama3.3",
messages=messages
)
return final.choices[0].message.content
return choice.message.content
print(chat_with_search("What are the current prices for Anthropic Claude API in 2026?"))Python Example
import json
import requests
from openai import OpenAI
SCAVIO_KEY = "your-scavio-api-key"
SEARCH_TOOL = {
"type": "function",
"function": {
"name": "web_search",
"description": "Search the web for current information, prices, news, or product data.",
"parameters": {
"type": "object",
"properties": {
"query": {"type": "string", "description": "Search query"},
"platform": {"type": "string", "enum": ["google", "amazon", "reddit", "youtube"], "description": "Platform to search"}
},
"required": ["query"]
}
}
}
def web_search(query: str, platform: str = "google") -> str:
r = requests.post(
"https://api.scavio.dev/api/v1/search",
json={"query": query, "platform": platform, "num_results": 5},
headers={"x-api-key": SCAVIO_KEY},
timeout=15
)
r.raise_for_status()
items = r.json().get("organic_results", [])
return "\n".join(f"- {it['title']}: {it.get('snippet','')}" for it in items[:5]) or "No results."
client = OpenAI(base_url="http://localhost:11434/v1", api_key="ollama")
def chat(user_message: str) -> str:
messages = [{"role": "user", "content": user_message}]
resp = client.chat.completions.create(model="llama3.3", messages=messages, tools=[SEARCH_TOOL], tool_choice="auto")
choice = resp.choices[0]
if choice.finish_reason == "tool_calls":
tc = choice.message.tool_calls[0]
args = json.loads(tc.function.arguments)
result = web_search(**args)
messages += [choice.message, {"role": "tool", "tool_call_id": tc.id, "content": result}]
final = client.chat.completions.create(model="llama3.3", messages=messages)
return final.choices[0].message.content
return choice.message.content
if __name__ == "__main__":
questions = [
"What is the price of Claude Sonnet per million tokens in 2026?",
"What are people saying on Reddit about local LLMs vs cloud APIs?"
]
for q in questions:
print(f"Q: {q}")
print(f"A: {chat(q)}\n")JavaScript Example
// Using Ollama's OpenAI-compatible endpoint with the openai npm package
import OpenAI from 'openai';
const SCAVIO_KEY = 'your-scavio-api-key';
const client = new OpenAI({ baseURL: 'http://localhost:11434/v1', apiKey: 'ollama' });
const SEARCH_TOOL = {
type: 'function',
function: {
name: 'web_search',
description: 'Search the web for current information.',
parameters: {
type: 'object',
properties: {
query: { type: 'string' },
platform: { type: 'string', enum: ['google', 'amazon', 'reddit', 'youtube'] }
},
required: ['query']
}
}
};
async function webSearch(query, platform = 'google') {
const res = await fetch('https://api.scavio.dev/api/v1/search', {
method: 'POST',
headers: { 'Content-Type': 'application/json', 'x-api-key': SCAVIO_KEY },
body: JSON.stringify({ query, platform, num_results: 5 })
});
const data = await res.json();
return (data.organic_results ?? []).map(r => `- ${r.title}: ${r.snippet ?? ''}`).join('\n') || 'No results.';
}
async function chat(userMessage) {
const messages = [{ role: 'user', content: userMessage }];
const resp = await client.chat.completions.create({ model: 'llama3.3', messages, tools: [SEARCH_TOOL], tool_choice: 'auto' });
const choice = resp.choices[0];
if (choice.finish_reason === 'tool_calls') {
const tc = choice.message.tool_calls[0];
const args = JSON.parse(tc.function.arguments);
const result = await webSearch(args.query, args.platform);
messages.push(choice.message, { role: 'tool', tool_call_id: tc.id, content: result });
const final = await client.chat.completions.create({ model: 'llama3.3', messages });
return final.choices[0].message.content;
}
return choice.message.content;
}
console.log(await chat('What does Claude Sonnet cost per million tokens in 2026?'));Expected Output
Q: What is the price of Claude Sonnet per million tokens in 2026?
A: Based on current search results, Claude Sonnet is priced at $3 per million input tokens and $15 per million output tokens as of early 2026.
Q: What are people saying on Reddit about local LLMs vs cloud APIs?
A: Reddit discussions show a split: developers prefer local LLMs for privacy and cost control, while teams favor cloud APIs for reliability and model quality.