Prerequisites

Before we begin, you’ll need:

Installation

First, install the required packages:

pip install google-generativeai klavis

Setup Environment Variables

import os
import google.generativeai as genai

# Set environment variables
os.environ["GOOGLE_AI_API_KEY"] = "your-google-ai-api-key-here"  # Replace with your actual Google AI API key
os.environ["KLAVIS_API_KEY"] = "your-klavis-api-key-here"       # Replace with your actual Klavis API key

# Configure Gemini
genai.configure(api_key=os.getenv("GOOGLE_AI_API_KEY"))

Basic Setup

import os
import google.generativeai as genai
from klavis import Klavis
from klavis.types import McpServerName, ConnectionType, ToolFormat

# Initialize clients
genai.configure(api_key=os.getenv("GOOGLE_AI_API_KEY"))
klavis_client = Klavis(api_key=os.getenv("KLAVIS_API_KEY"))

# Constants
GEMINI_MODEL = "gemini-2.5-flash"  # or "gemini-pro", "gemini-pro-vision"

AI Agent with MCP Integration

Now we’ll create an intelligent agent that can use MCP servers through Klavis API. This agent will:

  1. Create MCP Instances: Set up connections to external services
  2. Tool Discovery: Automatically find available tools from MCP servers
  3. Function Calling: Use Gemini’s function calling capabilities
  4. Tool Execution: Execute tools through Klavis API
  5. Smart Responses: Generate intelligent responses based on tool results
def gemini_with_mcp_server(mcp_server_url: str, user_query: str):
    """Process a user request using Gemini + Klavis integration."""
    
    # 1. Get tools from MCP server
    mcp_server_tools = klavis_client.mcp_server.list_tools(
        server_url=mcp_server_url,
        connection_type=ConnectionType.STREAMABLE_HTTP,
        format=ToolFormat.GEMINI,
    )
    
    # 2. Initialize Gemini model with tools
    model = genai.GenerativeModel(
        model_name=GEMINI_MODEL,
        tools=mcp_server_tools.tools
    )
    
    # 3. Start chat
    chat = model.start_chat()
    
    # 4. Send initial message
    response = chat.send_message(user_query)
    
    # 5. Check if function call is requested
    if response.candidates[0].content.parts[0].function_call:
        function_call = response.candidates[0].content.parts[0].function_call
        function_name = function_call.name
        function_args = dict(function_call.args)
        
        print(f"🔧 Calling: {function_name}, with args: {function_args}")
        
        # 6. Call the MCP server tool
        result = klavis_client.mcp_server.call_tools(
            server_url=mcp_server_url,
            tool_name=function_name,
            tool_args=function_args,
            connection_type=ConnectionType.STREAMABLE_HTTP
        )
        
        # 7. Send function response back to model
        function_response = genai.protos.Part(
            function_response=genai.protos.FunctionResponse(
                name=function_name,
                response={"result": result.result.content[0]['text']}
            )
        )
        
        final_response = chat.send_message([function_response])
        return final_response.text
    else:
        return response.text

Use Case Examples

Example 1: YouTube Video Summarization

1

Create YouTube Server

Set up a YouTube MCP server instance

2

Analyze Video

Use Gemini to summarize a YouTube video with timestamps

# Create YouTube MCP server using Klavis
youtube_mcp_instance = klavis_client.mcp_server.create_server_instance(
    server_name=McpServerName.YOUTUBE,
    user_id="1234",
    platform_name="Klavis",
    connection_type=ConnectionType.STREAMABLE_HTTP,
)

# Summarize your favorite video
YOUTUBE_VIDEO_URL = "https://www.youtube.com/watch?v=LCEmiRjPEtQ"  # pick a video you like!

result = gemini_with_mcp_server(
    mcp_server_url=youtube_mcp_instance.server_url, 
    user_query=f"Please provide a complete summary of this YouTube video with timestamp: {YOUTUBE_VIDEO_URL}"
)

print(result)

Example 2: Gmail Email Management

Gmail integration requires OAuth authentication, so you’ll need to authorize the application in your browser.

1

Create Gmail Server

Create a Gmail MCP server instance

2

OAuth Authorization

Complete OAuth flow for Gmail access

3

Send Email

Use Gemini to send an email

import webbrowser

# Create Gmail MCP server instance
gmail_mcp_server = klavis_client.mcp_server.create_server_instance(
    server_name=McpServerName.GMAIL,
    user_id="1234",
    platform_name="Klavis",
    connection_type=ConnectionType.STREAMABLE_HTTP,
)

# Redirect to Gmail OAuth page for authorization
webbrowser.open(gmail_mcp_server.oauth_url)
print(f"🔐 Opening OAuth authorization for Gmail, if you are not redirected, please open the following URL in your browser: {gmail_mcp_server.oauth_url}")

# After OAuth authorization, send an email
EMAIL_RECIPIENT = "example@email.com"  # Replace with your email
EMAIL_SUBJECT = "Test Gemini + Gmail MCP Server"
EMAIL_BODY = "Hello World from Gemini!"

result = gemini_with_mcp_server(
    mcp_server_url=gmail_mcp_server.server_url, 
    user_query=f"Please send an email to {EMAIL_RECIPIENT} with subject {EMAIL_SUBJECT} and body {EMAIL_BODY}"
)

print(result)

Complete Integration Example

Here’s a complete working example that demonstrates the full integration:

import os
import google.generativeai as genai
from klavis import Klavis
from klavis.types import McpServerName, ConnectionType, ToolFormat

# Setup
os.environ["GOOGLE_AI_API_KEY"] = "your-google-ai-api-key-here"
os.environ["KLAVIS_API_KEY"] = "your-klavis-api-key-here"

genai.configure(api_key=os.getenv("GOOGLE_AI_API_KEY"))
klavis_client = Klavis(api_key=os.getenv("KLAVIS_API_KEY"))

def gemini_with_mcp_server(mcp_server_url: str, user_query: str):
    # Get tools from MCP server
    mcp_server_tools = klavis_client.mcp_server.list_tools(
        server_url=mcp_server_url,
        connection_type=ConnectionType.STREAMABLE_HTTP,
        format=ToolFormat.GEMINI,
    )
    
    # Initialize Gemini model with tools
    model = genai.GenerativeModel(
        model_name="gemini-2.5-flash",
        tools=mcp_server_tools.tools
    )
    
    # Start chat
    chat = model.start_chat()
    
    # Send initial message
    response = chat.send_message(user_query)
    
    # Check if function call is requested
    if response.candidates[0].content.parts[0].function_call:
        function_call = response.candidates[0].content.parts[0].function_call
        function_name = function_call.name
        function_args = dict(function_call.args)
        
        print(f"🔧 Calling: {function_name}, with args: {function_args}")
        
        # Call the MCP server tool
        result = klavis_client.mcp_server.call_tools(
            server_url=mcp_server_url,
            tool_name=function_name,
            tool_args=function_args,
            connection_type=ConnectionType.STREAMABLE_HTTP
        )
        
        # Send function response back to model
        function_response = genai.protos.Part(
            function_response=genai.protos.FunctionResponse(
                name=function_name,
                response={"result": result.result.content[0]['text']}
            )
        )
        
        final_response = chat.send_message([function_response])
        return final_response.text
    else:
        return response.text

# Create YouTube MCP instance
youtube_mcp_instance = klavis_client.mcp_server.create_server_instance(
    server_name=McpServerName.YOUTUBE,
    user_id="1234",
    platform_name="Klavis",
    connection_type=ConnectionType.STREAMABLE_HTTP,
)

# Use the integration
YOUTUBE_VIDEO_URL = "https://www.youtube.com/watch?v=LCEmiRjPEtQ"
result = gemini_with_mcp_server(
    mcp_server_url=youtube_mcp_instance.server_url, 
    user_query=f"Please provide a complete summary of this YouTube video with timestamp: {YOUTUBE_VIDEO_URL}"
)

print("✅ Summary:", result)

Advanced Features

Multimodal Processing

Gemini excels at processing multiple types of content simultaneously:

import PIL.Image

# Create a multimodal model
multimodal_model = genai.GenerativeModel('gemini-pro-vision')

# Load an image
image = PIL.Image.open('path/to/image.jpg')

# Process image with text
response = multimodal_model.generate_content([
    "What do you see in this image? Also, if there's any text visible, extract it.",
    image
])

print(response.text)

Streaming Responses

For real-time applications, you can stream responses:

def stream_gemini_response(prompt):
    model = genai.GenerativeModel('gemini-2.5-flash')
    
    response = model.generate_content(
        prompt,
        stream=True
    )
    
    for chunk in response:
        print(chunk.text, end='', flush=True)

Next Steps

Explore More MCP Servers

Try other available servers like Slack, Notion, GitHub, etc.

Multimodal Workflows

Build workflows that combine text, images, and other media

Production Deployment

Scale these patterns for production applications

Custom Integrations

Build custom MCP servers for your specific needs

Useful Resources

Happy building with Gemini and Klavis! 🚀