Use SitemapKit as a tool in LangChain agents or as a URL source for document loaders. Get all URLs from a domain to build RAG pipelines.
from langchain.tools import tool
from langchain_community.document_loaders import WebBaseLoader
import requests
@tool
def get_sitemap_urls(domain: str) -> list[str]:
"""Get all URLs from a domain's sitemap using SitemapKit API."""
resp = requests.post(
"https://sitemapkit.com/api/v1/sitemap/full",
headers={"x-api-key": "YOUR_API_KEY", "Content-Type": "application/json"},
json={"url": domain}
)
return [u["loc"] for u in resp.json()["urls"]]
# Use in a RAG pipeline
urls = get_sitemap_urls.invoke("docs.example.com")
loader = WebBaseLoader(urls[:50]) # Load first 50 pages
docs = loader.load()
# Now index docs into your vector store
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
vectorstore = FAISS.from_documents(docs, OpenAIEmbeddings())sk_live_* API key./api/v1/sitemap/full endpoint to discover and extract all sitemaps from a domain in one call.POST /api/v1/sitemap/discover — Find all sitemaps on a domainPOST /api/v1/sitemap/extract — Parse a sitemap URL and extract all URLsPOST /api/v1/sitemap/full — Discover + extract in one call (recommended)100 free API calls/month. No credit card required.