文件管理

Documents API 讓你透過 API 上傳、管理知識庫文件，控制文件的向量化狀態，以及查看切分結果。

支援的文件格式

格式	MIME 類型	說明
PDF	`application/pdf`	`.pdf` 格式
Markdown	`text/markdown`	程式碼、技術文件首選
Word	`application/vnd.openxmlformats...`	`.docx` 格式
純文字	`text/plain`	`.txt` 格式
HTML	`text/html`	網頁內容

上傳文件

Node.js
Python
Go

import { readFile } from 'node:fs/promises';

const buffer = await readFile('./faq.pdf');

const doc = await client.documents.upload({
  file: buffer,
  filename: 'faq.pdf',
  contentType: 'application/pdf',
  // 切分設定（可選，不設定使用專案預設值）
  chunk_strategy: 'recursive',    // 切分策略
  chunk_size: 800,                // 每個段落最大字元數
  chunk_overlap: 80,              // 相鄰段落重疊的字元數
  metadata: { category: 'support', version: '2026-01' },
});

console.log('文件 ID:', doc.id);
console.log('狀態:', doc.status);   // processing | ready | failed

# 可傳入檔案路徑、bytes 或 file 物件
document = await client.documents.upload(
    file="./faq.pdf",
    metadata={"category": "support", "version": "2026-01"},
    chunk_strategy="recursive",
    chunk_size=800,
    chunk_overlap=80,
)
print("文件 ID:", document["id"])
print("狀態:", document["status"])

import (
    "os"
    vecstruct "github.com/vecstruct/vecstruct-sdk-go"
)

f, err := os.Open("./faq.pdf")
if err != nil {
    log.Fatal(err)
}
defer f.Close()

doc, err := client.UploadDocument(ctx, vecstruct.UploadDocumentRequest{
    File:          f,
    Filename:      "faq.pdf",
    ContentType:   "application/pdf",
    ChunkStrategy: "recursive",
    ChunkSize:     800,
    ChunkOverlap:  80,
    Metadata: map[string]string{
        "category": "support",
        "version":  "2026-01",
    },
})
if err != nil {
    log.Fatal(err)
}
fmt.Println("文件 ID:", doc.ID)
fmt.Println("狀態:", doc.Status)

上傳後，系統會非同步處理文件。處理完成（狀態 ready）後才能被 RAG 查詢到。

文件狀態

狀態	說明
`processing`	正在解析和向量化，尚未可用
`ready`	處理完成，可被 RAG 查詢
`failed`	處理失敗（格式不支援、文件損壞等）
`inactive`	已停用，不會出現在 RAG 結果中

查詢文件清單

Node.js
Python
Go

const list = await client.documents.list({
  search: 'faq',      // 按文件名稱搜尋（可選）
  page: 1,
  page_size: 20,
});

console.log(`共 ${list.total} 份文件`);
for (const doc of list.documents) {
  console.log(`${doc.filename} — ${doc.status} — ${doc.chunk_count} 個段落`);
}

documents = await client.documents.list(page=1, page_size=20)
for doc in documents["documents"]:
    print(f"{doc['filename']} — {doc['status']}")

list, err := client.ListDocuments(ctx, &vecstruct.ListDocumentsOptions{
    Page:     1,
    PageSize: 20,
})
if err != nil {
    log.Fatal(err)
}
fmt.Printf("共 %d 份文件\n", list.Total)
for _, doc := range list.Documents {
    fmt.Printf("%s — %s — %d 個段落\n", doc.Filename, doc.Status, doc.ChunkCount)
}

取得文件詳情

Node.js
Python
Go

const doc = await client.documents.retrieve('doc-uuid');
console.log('文件名稱:', doc.filename);
console.log('段落數:', doc.chunk_count);
console.log('切分策略:', doc.chunk_strategy);

doc = await client.documents.get("doc-uuid")
print("文件名稱:", doc["filename"])
print("段落數:", doc["chunk_count"])

doc, err := client.GetDocument(ctx, "doc-uuid")
if err != nil {
    log.Fatal(err)
}
fmt.Println("文件名稱:", doc.Filename)
fmt.Println("段落數:", doc.ChunkCount)
fmt.Println("切分策略:", doc.ChunkStrategy)

停用與啟用文件

停用後的文件不會出現在 RAG 查詢結果中，但檔案還在，可以重新啟用。

Node.js
Python
Go

// 停用：從 RAG 中移除，但保留檔案
await client.documents.deactivate('doc-uuid');

// 啟用：重新加入 RAG，會重新排隊向量化
await client.documents.activate('doc-uuid');

# 停用
await client.documents.deactivate("doc-uuid")

# 啟用
await client.documents.activate("doc-uuid")

// 停用：從 RAG 中移除，但保留檔案
if err := client.DeactivateDocument(ctx, "doc-uuid"); err != nil {
    log.Fatal(err)
}

// 啟用：重新加入 RAG，會重新排隊向量化
if err := client.ActivateDocument(ctx, "doc-uuid"); err != nil {
    log.Fatal(err)
}

重新索引

當你修改了切分設定，或文件向量化出現問題時，可以觸發重新索引：

Node.js
Python
Go

// 先更新切分設定
await client.documents.updateChunking('doc-uuid', {
  chunk_strategy: 'semantic',
  chunk_semantic_threshold: 0.75,
});

// 再觸發重新索引
await client.documents.reindex('doc-uuid');

# 更新切分設定後重新索引
await client.documents.update_chunking(
    "doc-uuid",
    chunk_strategy="semantic",
    chunk_semantic_threshold=0.75,
)
await client.documents.reindex("doc-uuid")

// 先更新切分設定
if err := client.UpdateDocumentChunking(ctx, "doc-uuid", vecstruct.UpdateChunkingRequest{
    ChunkStrategy:          "semantic",
    ChunkSemanticThreshold: 0.75,
}); err != nil {
    log.Fatal(err)
}

// 再觸發重新索引
if err := client.ReindexDocument(ctx, "doc-uuid"); err != nil {
    log.Fatal(err)
}

查看切分結果

可以查看文件被切成了哪些段落，確認切分品質：

Node.js
Python
Go

const { chunks, total } = await client.documents.chunks('doc-uuid');
console.log(`共 ${total} 個段落`);
for (const chunk of chunks.slice(0, 5)) {
  console.log(`段落 ${chunk.index}: ${chunk.content.slice(0, 100)}...`);
}

result = await client.documents.chunks("doc-uuid")
for chunk in result["chunks"][:5]:
    print(f"段落 {chunk['index']}: {chunk['content'][:100]}...")

chunks, err := client.GetDocumentChunks(ctx, "doc-uuid", nil)
if err != nil {
    log.Fatal(err)
}
fmt.Printf("共 %d 個段落\n", chunks.Total)
for _, c := range chunks.Chunks[:5] {
    content := c.Content
    if len(content) > 100 {
        content = content[:100]
    }
    fmt.Printf("段落 %d: %s...\n", c.Index, content)
}

刪除文件

Node.js
Python
Go

await client.documents.delete('doc-uuid');
// 刪除後無法復原，向量資料和原始檔案都會一起刪除

await client.documents.delete("doc-uuid")

if err := client.DeleteDocument(ctx, "doc-uuid"); err != nil {
    log.Fatal(err)
}
// 刪除後無法復原，向量資料和原始檔案都會一起刪除

切分策略說明

切分（Chunking） 是指把長文件分割成較小的段落，以便向量化和語義搜尋。

切分方式會直接影響搜尋品質。以下是各策略的適用情境：

策略	說明	適合的文件類型
`recursive`	按文件結構遞迴切分（預設）	一般文件、Markdown
`fixed`	按固定字元數切分	格式簡單的文件
`semantic`	按語義完整性切分	長篇文章、報告
`sentence`	以句子為最小單位	FAQ、對話記錄
`parent_child`	雙層索引，保留上下文	需要理解前後文的文件
`llm`	用 AI 判斷切分位置	需要高精度的重要文件

文件類型	建議策略	chunk_size	chunk_overlap
PDF 手冊	`recursive`	1000	200
Markdown 文件	`recursive`	1000	100
FAQ	`sentence`	500	0
長篇報告	`parent_child`	2000	200

注意事項

文件大小上限依方案而定
PDF 中的圖片文字會透過 AI 視覺辨識，可能較耗時
重新索引會消耗 Embedding Credits
需要 source.write 權限才能上傳和管理文件

支援的文件格式​

上傳文件​

文件狀態​

查詢文件清單​

取得文件詳情​

停用與啟用文件​

重新索引​

查看切分結果​

刪除文件​

切分策略說明​

推薦配置​

注意事項​