generated from vercel/ai-chatbot
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
8057195
commit e180018
Showing
2 changed files
with
101 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
import { ScoredPineconeRecord } from '@pinecone-database/pinecone' | ||
import { getCodeMatchesFromEmbeddings } from './codePinecone' | ||
import { getEmbeddings } from './embeddings' | ||
|
||
export type Metadata = { | ||
url: string | ||
text: string | ||
chunk: string | ||
} | ||
|
||
// The function `getContext` is used to retrieve the context of a given message | ||
export const getProtoContext = async ( | ||
message: string, | ||
namespace: string, | ||
maxTokens = 12000, | ||
minScore = 0.7, | ||
getOnlyText = true | ||
): Promise<string | ScoredPineconeRecord[]> => { | ||
// Get the embeddings of the input message | ||
const embedding = await getEmbeddings(message) | ||
|
||
// Retrieve the matches for the embeddings from the specified namespace | ||
const matches = await getCodeMatchesFromEmbeddings(embedding, 5, namespace) | ||
|
||
// Filter out the matches that have a score lower than the minimum score | ||
const qualifyingDocs = matches.filter(m => m.score && m.score > minScore) | ||
console.log('CODEqual docs', qualifyingDocs) | ||
|
||
if (!getOnlyText) { | ||
// Use a map to deduplicate matches by URL | ||
return qualifyingDocs | ||
} | ||
|
||
let docs = matches | ||
? qualifyingDocs.map(match => (match.metadata as Metadata).text) | ||
: [] | ||
console.log('CODEdocs', docs) | ||
// Join all the chunks of text together, truncate to the maximum number of tokens, and return the result | ||
return docs.join('\n').substring(0, maxTokens) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
import { | ||
Pinecone, | ||
type ScoredPineconeRecord | ||
} from '@pinecone-database/pinecone' | ||
|
||
export type Metadata = { | ||
url: string | ||
text: string | ||
chunk: string | ||
hash: string | ||
} | ||
|
||
// The function `getMatchesFromEmbeddings` is used to retrieve matches for the given embeddings | ||
const getProtoMatchesFromEmbeddings = async ( | ||
embeddings: number[], | ||
topK: number, | ||
namespace: string | ||
): Promise<ScoredPineconeRecord<Metadata>[]> => { | ||
// Obtain a client for Pinecone | ||
const pinecone = new Pinecone({ | ||
environment: process.env.PINECONE_ENVIRONMENT as string, | ||
apiKey: process.env.PINECONE_API_KEY as string | ||
}) | ||
|
||
const indexName: string = process.env.PINECONE_INDEX || '' | ||
if (indexName === '') { | ||
throw new Error('PINECONE_CODE_INDEX environment variable not set') | ||
} | ||
|
||
// Retrieve the list of indexes to check if expected index exists | ||
const indexes = await pinecone.listIndexes() | ||
if (indexes.filter(i => i.name === indexName).length !== 1) { | ||
throw new Error(`Index ${indexName} does not exist`) | ||
} | ||
|
||
// Get the Pinecone index | ||
const index = pinecone!.Index<Metadata>(indexName) | ||
|
||
// Get the namespace | ||
const pineconeNamespace = index.namespace(namespace ?? '') | ||
|
||
try { | ||
// Query the index with the defined request | ||
const queryResult = await pineconeNamespace.query({ | ||
vector: embeddings, | ||
topK, | ||
filter: { | ||
vector_type: 'proto' | ||
}, | ||
includeMetadata: true | ||
}) | ||
return queryResult.matches || [] | ||
} catch (e) { | ||
// Log the error and throw it | ||
console.log('Error querying embeddings: ', e) | ||
throw new Error(`Error querying embeddings: ${e}`) | ||
} | ||
} | ||
|
||
export { getProtoMatchesFromEmbeddings } | ||
|