importasynciofrompydanticimportBaseModelfromragbits.core.llmsimportLiteLLMfromragbits.core.promptimportPromptclassQuestionAnswerPromptInput(BaseModel):question:strclassQuestionAnswerPromptOutput(BaseModel):answer:strclassQuestionAnswerPrompt(Prompt[QuestionAnswerPromptInput,QuestionAnswerPromptOutput]):system_prompt="""
You are a question answering agent. Answer the question to the best of your ability.
"""user_prompt="""
Question: {{ question }}
"""llm=LiteLLM(model_name="gpt-4.1-nano",use_structured_output=True)asyncdefmain()->None:prompt=QuestionAnswerPrompt(QuestionAnswerPromptInput(question="What are high memory and low memory on linux?"))response=awaitllm.generate(prompt)print(response.answer)if__name__=="__main__":asyncio.run(main())
Document Search
To build and query a simple vector store index:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
importasynciofromragbits.core.embeddingsimportLiteLLMEmbedderfromragbits.core.vector_storesimportInMemoryVectorStorefromragbits.document_searchimportDocumentSearchembedder=LiteLLMEmbedder(model_name="text-embedding-3-small")vector_store=InMemoryVectorStore(embedder=embedder)document_search=DocumentSearch(vector_store=vector_store)asyncdefrun()->None:awaitdocument_search.ingest("web://https://arxiv.org/pdf/1706.03762")result=awaitdocument_search.search("What are the key findings presented in this paper?")print(result)if__name__=="__main__":asyncio.run(run())
importasynciofrompydanticimportBaseModelfromragbits.core.embeddingsimportLiteLLMEmbedderfromragbits.core.llmsimportLiteLLMfromragbits.core.promptimportPromptfromragbits.core.vector_storesimportInMemoryVectorStorefromragbits.document_searchimportDocumentSearchclassQuestionAnswerPromptInput(BaseModel):question:strcontext:list[str]classQuestionAnswerPromptOutput(BaseModel):answer:strclassQuestionAnswerPrompt(Prompt[QuestionAnswerPromptInput,QuestionAnswerPromptOutput]):system_prompt="""
You are a question answering agent. Answer the question that will be provided using context.
If in the given context there is not enough information refuse to answer.
"""user_prompt="""
Question: {{ question }}
Context: {% for item in context %}
{{ item }}
{%- endfor %}
"""embedder=LiteLLMEmbedder(model_name="text-embedding-3-small")vector_store=InMemoryVectorStore(embedder=embedder)document_search=DocumentSearch(vector_store=vector_store)llm=LiteLLM(model_name="gpt-4.1-nano",use_structured_output=True)asyncdefrun()->None:question="What are the key findings presented in this paper?"awaitdocument_search.ingest("web://https://arxiv.org/pdf/1706.03762")result=awaitdocument_search.search(question)prompt=QuestionAnswerPrompt(QuestionAnswerPromptInput(question=question,context=[element.text_representationforelementinresult],))response=awaitllm.generate(prompt)print(response.answer)if__name__=="__main__":asyncio.run(run())
Chatbot interface with UI
To expose your RAG application through Ragbits UI:
fromcollections.abcimportAsyncGeneratorfrompydanticimportBaseModelfromragbits.chat.apiimportRagbitsAPIfromragbits.chat.interfaceimportChatInterfacefromragbits.chat.interface.typesimportChatContext,ChatResponsefromragbits.core.embeddingsimportLiteLLMEmbedderfromragbits.core.llmsimportLiteLLMfromragbits.core.promptimportPromptfromragbits.core.prompt.baseimportChatFormatfromragbits.core.vector_storesimportInMemoryVectorStorefromragbits.document_searchimportDocumentSearchclassQuestionAnswerPromptInput(BaseModel):question:strcontext:list[str]classQuestionAnswerPrompt(Prompt[QuestionAnswerPromptInput,str]):system_prompt="""
You are a question answering agent. Answer the question that will be provided using context.
If in the given context there is not enough information refuse to answer.
"""user_prompt="""
Question: {{ question }}
Context: {% for item in context %}{{ item }}{%- endfor %}
"""classMyChat(ChatInterface):"""Chat interface for fullapp application."""asyncdefsetup(self)->None:self.embedder=LiteLLMEmbedder(model_name="text-embedding-3-small")self.vector_store=InMemoryVectorStore(embedder=self.embedder)self.document_search=DocumentSearch(vector_store=self.vector_store)self.llm=LiteLLM(model_name="gpt-4.1-nano",use_structured_output=True)awaitself.document_search.ingest("web://https://arxiv.org/pdf/1706.03762")asyncdefchat(self,message:str,history:ChatFormat|None=None,context:ChatContext|None=None,)->AsyncGenerator[ChatResponse,None]:# Search for relevant documentsresult=awaitself.document_search.search(message)prompt=QuestionAnswerPrompt(QuestionAnswerPromptInput(question=message,context=[element.text_representationforelementinresult],))# Stream the response from the LLMasyncforchunkinself.llm.generate_streaming(prompt):yieldself.create_text_response(chunk)if__name__=="__main__":RagbitsAPI(MyChat).run()
Rapid development
Create Ragbits projects from templates:
1
uvx create-ragbits-app
Explore create-ragbits-app repo here. If you have a new idea for a template, feel free to contribute!
Documentation
Quickstart - Get started with Ragbits in a few minutes
How-to - Learn how to use Ragbits in your projects