Integrate with your codebase

1

Add target agent (if you haven't already)

from maihem import Maihem

maihem_client = Maihem()

maihem_client.add_target_agent(
    name="financial-assistant-x",
    label="Financial Assistant Company X", # Optional
    role="AI Financial Assistant",
    description="An AI assistant that provides information and summaries from financial documents."
    language="en" # (Optional) Default is "en" (English), follow ISO 639
)
2

Add a decorator to each step of your workflow

This is an example of a basic RAG workflow. Add a decorator to each step of the workflow as shown below.

See a full list of supported evaluators and metrics and their required input and output maps.

Example agent workflow in Python
import maihem
from maihem.evaluators import EndToEndEvaluator, AnswerGenerationEvaluator, ContextRetrievalEvaluator

class Agent:

    @maihem.workflow_step(
        target_agent_name="financial-assistant-x",
        workflow_name="2 step RAG workflow",
        evaluator=EndToEndEvaluator(
            input_query="message",
            output_answer=lambda x: x # Map the output of your function that contains the answer
        )
    )
    def run_workflow(conversation_id: str, message: str) -> str:
        """Trigger workflow to generate a response"""
        contexts = context_retrieval(conversation_id, message)
        message = generate_answer(conversation_id, message, contexts)
        return message

    @maihem.workflow_step(
        name="Context Retrieval", # (Optional) Name of the step to be displayed in Maihem's dashboard
        evaluator=ContextRetrievalEvaluator(
            input_query="message",
            output_contexts=lambda x: x # Map the output of your function that contains the answer
        )
    )
    def context_retrieval(conversation_id: str, message: str) -> str:
        """Retrieve a list of chunks to be used as context for the LLM."""
        contexts = retrieve_contexts(message)
        return contexts

    @maihem.workflow_step(
        name="Answer Generation", # (Optional) Name of the step to be displayed in Maihem's dashboard
        evaluator=AnswerGenerationEvaluator(
            input_query="message",
            input_contexts="contexts",
            output_answer=lambda x: x # Map the output of your function that contains the answer
        )
    )
    def generate_answer(conversation_id: str, message: str, contexts: List) -> str:
        """Generate a response using a list of retrieved contexts"""
        answer = call_llm(message, contexts) # Example of an LLM call
        return answer

Upload data and run test

4

Format your test data

Make sure the data you want to upload is in the following format:

data = [
    {
        "input_payload": {
            "message": "I want to book a stay in London",
        },
        "conversation_history": [
            {
                "role": "assistant",
                "content": "Hi! How can I help you today?"
            },
        ],
        "output_payload_expected": {  # Optional
            "answer": "Sure! Where in London would you like to stay?",
        },
    },
    {
        "input_payload": {
            "message": "I want to book a stay in San Francisco",
        },
        "conversation_history": [
            {
                "role": "assistant",
                "content": "Hi! How can I help you today?"
            },
        ],
        "output_payload_expected": {  # Optional
            "answer": "Sorry, I can't help with that. I only know about London.",
        },
    },
]
3

Upload dataset

maihem_client.upload_dataset(
    name="dataset_a",
    label="Dataset A", # (Optional) Name of the dataset to be displayed
    data=data
)

Your dataset called dataset_a will now be available to use in your tests.

4

Create test

maihem_client.create_test_uploaded_data(
    name="test_upload_data_1",
    label="Test #1 from dataset A", # (Optional) Name of the test to be displayed
    target_agent_name="financial-assistant-x",
    dataset_name="dataset_a"
)
5

Run the test

A test run will generate:

  • Simulated conversations between your target agent and Maihem
  • Evaluations of the conversations
  • A list of detected failures
maihem_client.run_test(
    name="modelX_prompt2_5_28-11-2024",
    label="Model X Prompt v2.5 (28/Nov/2024)", # Optional
    test_name="test_upload_data_1",
    concurrent_conversations=10 # Optional
)
6

See test run results in Maihem UI

See the results in your Maihem account.

Or get the test results:

test_run_results = maihem_client.get_test_run_results(
    test_name="test_your_data_1",
    test_run_name="modelX_prompt2_5_28-11-2024"
)