Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for index only scans #188

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions src/hnsw.c
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ Datum hnsw_handler(PG_FUNCTION_ARGS __attribute__((unused)))
amroutine->amclusterable = false;
amroutine->ampredlocks = false;
amroutine->amcanparallel = false;
amroutine->amcaninclude = false;
amroutine->amcaninclude = true; /* supports INCLUDE clauses, for index-only scans */
#if PG_VERSION_NUM >= 130000
amroutine->amusemaintenanceworkmem = false; /* not used during VACUUM */
amroutine->amparallelvacuumoptions = VACUUM_OPTION_PARALLEL_BULKDEL;
Expand All @@ -255,7 +255,7 @@ Datum hnsw_handler(PG_FUNCTION_ARGS __attribute__((unused)))
amroutine->aminsert = ldb_aminsert;
amroutine->ambulkdelete = ldb_ambulkdelete;
amroutine->amvacuumcleanup = ldb_amvacuumcleanup;
amroutine->amcanreturn = NULL;
amroutine->amcanreturn = ldb_canreturn;
amroutine->amcostestimate = hnswcostestimate;
amroutine->amoptions = ldb_amoptions;
amroutine->amproperty = NULL;
Expand Down Expand Up @@ -397,3 +397,16 @@ float4 *DatumGetSizedFloatArray(Datum datum, HnswColumnType type, int dimensions
elog(ERROR, "Unsupported type");
}
}

/*
* Check whether we support index-only scans.
*
* We always do, so return true.
*/
bool
ldb_canreturn(Relation index, int attno)
{
LDB_UNUSED(index);
LDB_UNUSED(attno);
return true;
}
1 change: 1 addition & 0 deletions src/hnsw.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ PGDLLEXPORT Datum cos_dist(PG_FUNCTION_ARGS);

HnswColumnType GetIndexColumnType(Relation index);
float4 *DatumGetSizedFloatArray(Datum datum, HnswColumnType type, int dimensions);
bool ldb_canreturn(Relation index, int attno);

#define LDB_UNUSED(x) (void)(x)

Expand Down
17 changes: 11 additions & 6 deletions src/hnsw/external_index.c
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ void StoreExternalIndexBlockMapGroup(Relation index,

// note: even if the condition is true, nodepage may be too large
// as the condition does not take into account the flexible array component
// todo:: can we make this estimate for the nodepage more accurate by being conservative with the node_level? If we assume some level, we can compute the exact size of the nodepage here
while(PageGetFreeSpace(page) > sizeof(HnswIndexTuple) + dimension * sizeof(float)) {
if(node_id >= first_node_index + num_added_vectors) break;
memset(bufferpage, 0, BLCKSZ);
Expand Down Expand Up @@ -343,6 +344,7 @@ HnswIndexTuple *PrepareIndexTuple(Relation index_rel,
usearch_metadata_t *metadata,
uint32 new_tuple_id,
uint32 new_tuple_level,
uint32 extra_columns_size,
HnswInsertState *insertstate)
{
// if any data blocks exist, the last one's buffer will be read into this
Expand All @@ -363,10 +365,11 @@ HnswIndexTuple *PrepareIndexTuple(Relation index_rel,
// allocate buffer to construct the new node
// note that we allocate more than sizeof(HnswIndexTuple) since the struct has a flexible array member
// which depends on parameters passed into UsearchNodeBytes above
alloced_tuple = (HnswIndexTuple *)palloc0(sizeof(HnswIndexTuple) + new_tuple_size);
alloced_tuple = (HnswIndexTuple *)palloc0(sizeof(HnswIndexTuple) + new_tuple_size + extra_columns_size);
alloced_tuple->id = new_tuple_id;
alloced_tuple->level = new_tuple_level;
alloced_tuple->size = new_tuple_size;
alloced_tuple->extra_columns_size = extra_columns_size;

/*** Add a new tuple corresponding to the added vector to the list of tuples in the index
* (create new page if necessary) ***/
Expand All @@ -385,7 +388,7 @@ HnswIndexTuple *PrepareIndexTuple(Relation index_rel,
PageInit(page, BufferGetPageSize(new_dblock), sizeof(HnswIndexPageSpecialBlock));
extra_dirtied_add(insertstate->retriever_ctx->extra_dirted, new_vector_blockno, new_dblock, page);

new_tup_at = HnswIndexPageAddVector(page, alloced_tuple, alloced_tuple->size);
new_tup_at = HnswIndexPageAddVector(page, alloced_tuple, alloced_tuple->size + alloced_tuple->extra_columns_size);

MarkBufferDirty(new_dblock);
} else {
Expand All @@ -402,11 +405,12 @@ HnswIndexTuple *PrepareIndexTuple(Relation index_rel,

const uint32 blockmaps_are_enough
= new_tuple_id / HNSW_BLOCKMAP_BLOCKS_PER_PAGE + 1 < ((uint32)1 << (hdr->blockmap_page_groups + 1));
if(PageGetFreeSpace(page) > sizeof(HnswIndexTuple) + alloced_tuple->size && blockmaps_are_enough) {
if(PageGetFreeSpace(page) > sizeof(HnswIndexTuple) + alloced_tuple->size + alloced_tuple->extra_columns_size && blockmaps_are_enough) {

// there is enough space in the last page to fit the new vector
// so we just append it to the page
ldb_dlog("InsertBranching: we adding element to existing page");
new_tup_at = HnswIndexPageAddVector(page, alloced_tuple, alloced_tuple->size);
new_tup_at = HnswIndexPageAddVector(page, alloced_tuple, alloced_tuple->size + alloced_tuple->extra_columns_size);
new_vector_blockno = BufferGetBlockNumber(last_dblock);
assert(new_vector_blockno == hdr->last_data_block);

Expand All @@ -427,7 +431,7 @@ HnswIndexTuple *PrepareIndexTuple(Relation index_rel,
// check the count of blockmaps, see if there's place to add the block id, if yes add, if no create a
// new group check if already existing blockmaps are not enough new_tuple_id /
// HNSW_BLOCKMAP_BLOCKS_PER_PAGE + 1 is kth blockmap we check if k is more than already created 2^groups
if(new_tuple_id / HNSW_BLOCKMAP_BLOCKS_PER_PAGE + 1 >= ((uint32)1 << (hdr->blockmap_page_groups + 1))) {
if(!blockmaps_are_enough) {
CreateBlockMapGroup(hdr, index_rel, MAIN_FORKNUM, new_tuple_id, hdr->blockmap_page_groups + 1);
}

Expand All @@ -452,7 +456,7 @@ HnswIndexTuple *PrepareIndexTuple(Relation index_rel,
PageInit(page, BufferGetPageSize(new_dblock), sizeof(HnswIndexPageSpecialBlock));
extra_dirtied_add(insertstate->retriever_ctx->extra_dirted, new_vector_blockno, new_dblock, page);

new_tup_at = HnswIndexPageAddVector(page, alloced_tuple, alloced_tuple->size);
new_tup_at = HnswIndexPageAddVector(page, alloced_tuple, alloced_tuple->size + alloced_tuple->extra_columns_size);

MarkBufferDirty(new_dblock);
}
Expand All @@ -463,6 +467,7 @@ HnswIndexTuple *PrepareIndexTuple(Relation index_rel,
assert(new_tup_ref->id == new_tuple_id);
assert(new_tup_ref->level == new_tuple_level);
assert(new_tup_ref->size == new_tuple_size);
assert(new_tup_ref->extra_columns_size == extra_columns_size);
page = NULL; // to avoid its accidental use
/*** Update pagemap with the information of the added page ***/
{
Expand Down
10 changes: 9 additions & 1 deletion src/hnsw/external_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,15 @@ typedef struct HnswIndexTuple
{
uint32 id;
uint32 level;
// stores size of the flexible array member

// stores size of the vector data
uint32 size;

// stores size of the non-key column tuple data as well (written, sequentially, right after the vector data in the
// flexible array member)
uint32 extra_columns_size;

// note that the total size of the flexible array member is size + extra_columns_size
char node[ FLEXIBLE_ARRAY_MEMBER ];
} HnswIndexTuple;

Expand Down Expand Up @@ -126,6 +133,7 @@ HnswIndexTuple *PrepareIndexTuple(Relation index_rel,
usearch_metadata_t *metadata,
uint32 new_tuple_id,
uint32 new_tuple_level,
uint32 extra_columns_size,
HnswInsertState *insertstate);

#endif // LDB_HNSW_EXTERNAL_INDEX_H
37 changes: 36 additions & 1 deletion src/hnsw/insert.c
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "insert.h"

#include <access/generic_xlog.h>
#include <access/tupdesc.h>
#include <assert.h>
#if PG_VERSION_NUM >= 150000
#include <common/pg_prng.h>
Expand Down Expand Up @@ -70,6 +71,12 @@ bool ldb_aminsert(Relation index,
uint32 new_tuple_id;
HnswIndexTuple *new_tuple;
usearch_init_options_t opts = {0};
TupleDesc tupdesc = RelationGetDescr(index);
uint32 num_attributes = tupdesc->natts;
bool extra_columns_present
= num_attributes > 1; /* whether we have non-key columns to insert, for index-only scans*/
IndexTuple itup = NULL;
uint32 extra_columns_size = 0;
LDB_UNUSED(heap);
LDB_UNUSED(indexInfo);
#if PG_VERSION_NUM >= 140000
Expand Down Expand Up @@ -135,6 +142,7 @@ bool ldb_aminsert(Relation index,
datum = PointerGetDatum(PG_DETOAST_DATUM(values[ 0 ]));
float4 *vector = DatumGetSizedFloatArray(datum, insertstate->columnType, opts.dimensions);


#if LANTERNDB_COPYNODES
// currently not fully ported to the latest changes
assert(false);
Expand All @@ -151,14 +159,41 @@ bool ldb_aminsert(Relation index,
elog(ERROR, "usearch newnode error: %s", error);
}

// create a postgres IndexTuple containing the extra non-key column data (hence why we ignore values[0])
// the vector is the key column, which comes before non-key columns in this function-- we can also only have one key
// for now, so the vector must be the 0th entry
if(extra_columns_present) {
// ignore the first entry which is the vector, because we already store it

// make a new copy of isnull in case something else references it (as opposed to setting first entry to true and then back)
// todo:: this could be too cautious though... naive thing might work here as well
bool* fakeisnull = (bool*)palloc(sizeof(bool) * num_attributes);
memcpy(fakeisnull, isnull, sizeof(bool) * num_attributes);
fakeisnull[0] = true;

itup = index_form_tuple(tupdesc, values, fakeisnull);
itup->t_tid = *heap_tid;

extra_columns_size = IndexTupleSize(itup);

pfree(fakeisnull);
}

new_tuple_id = hdr->num_vectors;
// we are adding the following pages to the Generic XLog
// 1) the header page
// 2) the page containing the new tuple
// 3) (sometimes) the page that used to be last page of the index
// 4) The blockmap page for the block in which the vector was added
// Generic XLog supports up to 4 pages in a single commit, so we are good.
new_tuple = PrepareIndexTuple(index, state, hdr, &meta, new_tuple_id, level, insertstate);
new_tuple = PrepareIndexTuple(index, state, hdr, &meta, new_tuple_id, level, extra_columns_size, insertstate);

// copy the extra non-key column data so we can store it in our tuple
if(extra_columns_present) {
char *extra_columns_tape = new_tuple->node + new_tuple->size;
memcpy(extra_columns_tape, itup, new_tuple->extra_columns_size);
pfree(itup);
}

usearch_add_external(
uidx, *(unsigned long *)heap_tid, vector, new_tuple->node, usearch_scalar_f32_k, level, &error);
Expand Down
86 changes: 82 additions & 4 deletions src/hnsw/scan.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
#include <pgstat.h>
#include <utils/rel.h>

#include <utils/array.h>

#include "bench.h"
#include "build.h"
#include "external_index.h"
Expand Down Expand Up @@ -87,6 +89,7 @@ IndexScanDesc ldb_ambeginscan(Relation index, int nkeys, int norderbys)
UnlockReleaseBuffer(buf);

scan->opaque = scanstate;
scan->xs_itup = NULL;
return scan;
}

Expand Down Expand Up @@ -119,6 +122,10 @@ void ldb_amendscan(IndexScanDesc scan)

if(scanstate->labels) pfree(scanstate->labels);

if(scanstate->tapes) pfree(scanstate->tapes);

if(scan->xs_itup) pfree(scan->xs_itup);

pfree(scanstate);
scan->opaque = NULL;
}
Expand All @@ -133,6 +140,7 @@ void ldb_amrescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys,
{
HnswScanState *scanstate = (HnswScanState *)scan->opaque;
scanstate->first = true;
scan->xs_itup = NULL;
LDB_UNUSED(norderbys);
LDB_UNUSED(nkeys);

Expand Down Expand Up @@ -191,10 +199,13 @@ bool ldb_amgettuple(IndexScanDesc scan, ScanDirection dir)
if(scanstate->labels == NULL) {
scanstate->labels = palloc(k * sizeof(usearch_label_t));
}
if (scanstate->tapes == NULL) {
scanstate->tapes = palloc(k * sizeof(char*));
}

ldb_dlog("LANTERN querying index for %d elements", k);
num_returned = usearch_search(
scanstate->usearch_index, vec, usearch_scalar_f32_k, k, scanstate->labels, scanstate->distances, &error);
num_returned = usearch_search_with_tapes(
scanstate->usearch_index, vec, usearch_scalar_f32_k, k, scanstate->labels, scanstate->distances, scanstate->tapes, &error);
ldb_wal_retriever_area_reset(scanstate->retriever_ctx, NULL);

scanstate->count = num_returned;
Expand Down Expand Up @@ -226,10 +237,11 @@ bool ldb_amgettuple(IndexScanDesc scan, ScanDirection dir)
/* double k and reallocate arrays to account for increased size */
scanstate->distances = repalloc(scanstate->distances, k * sizeof(float));
scanstate->labels = repalloc(scanstate->labels, k * sizeof(usearch_label_t));
scanstate->tapes = repalloc(scanstate->tapes, k * sizeof(char*));

ldb_dlog("LANTERN - querying index for %d elements", k);
num_returned = usearch_search(
scanstate->usearch_index, vec, usearch_scalar_f32_k, k, scanstate->labels, scanstate->distances, &error);
num_returned = usearch_search_with_tapes(
scanstate->usearch_index, vec, usearch_scalar_f32_k, k, scanstate->labels, scanstate->distances, scanstate->tapes, &error);
ldb_wal_retriever_area_reset(scanstate->retriever_ctx, NULL);

scanstate->count = num_returned;
Expand All @@ -249,6 +261,72 @@ bool ldb_amgettuple(IndexScanDesc scan, ScanDirection dir)
#else
scan->xs_ctup.t_self = *tid;
#endif
// TODO: check if this is also compatible with the old version of postgres
// if the scan (index-only scan) requests the actual tuple, we set that information here
if(scan->xs_want_itup) {

scan->xs_itupdesc = RelationGetDescr(scan->indexRelation);
uint32 num_attributes = scan->xs_itupdesc->natts;

char* tape = scanstate->tapes[ scanstate->current ];
uint32 vector_size = *(uint32*)(tape - (offsetof(HnswIndexTuple, node) - offsetof(HnswIndexTuple, size)));

// this is the IndexTuple we created when we inserted the row... it is missing the vector data. We need to add it here
IndexTuple olditup = (IndexTuple)(scanstate->tapes[ scanstate->current ] + vector_size);

Datum* new_values = (Datum*) palloc(sizeof(Datum) * num_attributes);
bool* new_isnull = (bool*) palloc(sizeof(bool) * num_attributes);

// copy the old values and isnulls into the rest of the array
if(num_attributes > 1) {
index_deform_tuple(olditup, scan->xs_itupdesc, new_values, new_isnull);
}

// set/modify the first entries, corresponding to the vector
new_isnull[0] = false;

// vector_size corresponds to the entire "usearch" schema of storing a vector, which includes metadata
// the last dim*sizeof(float) entries of this schema is the actual vector data
// we process the array to be float4s when inserting regardless of type, so we do the same here
float4* vector = (float4*)(scanstate->tapes[ scanstate->current ] + vector_size - (sizeof(float4) * scanstate->dimensions));

// we build a proper Datum from this vector
// can't just cast to Datum because postgres processes it internally, like using TOAST
uint32 array_length = scanstate->dimensions;
ArrayType *array;
Datum *elem_datums;

elem_datums = (Datum *) palloc(array_length * sizeof(Datum));
for (uint32 i = 0; i < array_length; i++) {
elem_datums[i] = Float4GetDatum(vector[i]);
}

// we want a 1-D array
int dims[] = {array_length};
// lower bounds for each dimension; usually 1 for PostgreSQL arrays
int lbs[] = {1};

array = construct_md_array(elem_datums, NULL, 1, dims, lbs, FLOAT4OID, sizeof(float4), true, 'i');
Datum firstvalue = PointerGetDatum(array);

new_values[0] = firstvalue;

// todo:: are we leaking memory here?
IndexTuple newitup = index_form_tuple(scan->xs_itupdesc, new_values, new_isnull);
newitup->t_tid = olditup->t_tid;

// clean up IndexTuple created from previous row
if(scan->xs_itup) {
pfree(scan->xs_itup);
}

scan->xs_itup = newitup;

pfree(elem_datums);
pfree(new_values);
pfree(new_isnull);
}


// todo:: there is a mid-sized designed issue with index storage
// labels must be large enought to store relblockno+ indexblockno
Expand Down
1 change: 1 addition & 0 deletions src/hnsw/scan.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ typedef struct HnswScanState
ItemPointer iptr;
float *distances;
usearch_label_t *labels;
char **tapes;
HnswColumnType columnType;
int dimensions;
// indicates whether we are retrieving the first tuple
Expand Down
Loading