Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
berkingurcan committed Nov 15, 2024
1 parent 8057195 commit e180018
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 0 deletions.
40 changes: 40 additions & 0 deletions app/api/chat/utils/protoContext.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import { ScoredPineconeRecord } from '@pinecone-database/pinecone'
import { getCodeMatchesFromEmbeddings } from './codePinecone'
import { getEmbeddings } from './embeddings'

export type Metadata = {
url: string
text: string
chunk: string
}

// The function `getContext` is used to retrieve the context of a given message
export const getProtoContext = async (
message: string,
namespace: string,
maxTokens = 12000,
minScore = 0.7,
getOnlyText = true
): Promise<string | ScoredPineconeRecord[]> => {
// Get the embeddings of the input message
const embedding = await getEmbeddings(message)

// Retrieve the matches for the embeddings from the specified namespace
const matches = await getCodeMatchesFromEmbeddings(embedding, 5, namespace)

// Filter out the matches that have a score lower than the minimum score
const qualifyingDocs = matches.filter(m => m.score && m.score > minScore)
console.log('CODEqual docs', qualifyingDocs)

if (!getOnlyText) {
// Use a map to deduplicate matches by URL
return qualifyingDocs
}

let docs = matches
? qualifyingDocs.map(match => (match.metadata as Metadata).text)
: []
console.log('CODEdocs', docs)
// Join all the chunks of text together, truncate to the maximum number of tokens, and return the result
return docs.join('\n').substring(0, maxTokens)
}
61 changes: 61 additions & 0 deletions app/api/chat/utils/protoPinecone.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import {
Pinecone,
type ScoredPineconeRecord
} from '@pinecone-database/pinecone'

export type Metadata = {
url: string
text: string
chunk: string
hash: string
}

// The function `getMatchesFromEmbeddings` is used to retrieve matches for the given embeddings
const getProtoMatchesFromEmbeddings = async (
embeddings: number[],
topK: number,
namespace: string
): Promise<ScoredPineconeRecord<Metadata>[]> => {
// Obtain a client for Pinecone
const pinecone = new Pinecone({
environment: process.env.PINECONE_ENVIRONMENT as string,
apiKey: process.env.PINECONE_API_KEY as string
})

const indexName: string = process.env.PINECONE_INDEX || ''
if (indexName === '') {
throw new Error('PINECONE_CODE_INDEX environment variable not set')
}

// Retrieve the list of indexes to check if expected index exists
const indexes = await pinecone.listIndexes()
if (indexes.filter(i => i.name === indexName).length !== 1) {
throw new Error(`Index ${indexName} does not exist`)
}

// Get the Pinecone index
const index = pinecone!.Index<Metadata>(indexName)

// Get the namespace
const pineconeNamespace = index.namespace(namespace ?? '')

try {
// Query the index with the defined request
const queryResult = await pineconeNamespace.query({
vector: embeddings,
topK,
filter: {
vector_type: 'proto'
},
includeMetadata: true
})
return queryResult.matches || []
} catch (e) {
// Log the error and throw it
console.log('Error querying embeddings: ', e)
throw new Error(`Error querying embeddings: ${e}`)
}
}

export { getProtoMatchesFromEmbeddings }

0 comments on commit e180018

Please sign in to comment.