Skip to content

Commit 5bba086

Browse files
author
Arseny Kositsyn
committed
[PGPRO-12159] Added the output of weights.
If the index is created with the appropriate class of operators, then in addition to the positions of the lexemes, weights (A, B, C, D) are also stored in the additional information. Their output has been added. In addition, Asserts have been added to the find_add_info_atr_num() and find_add_info_oid() functions, which check that there is only one (or zero) type of additional information in the index. Tags: rum
1 parent 264c027 commit 5bba086

File tree

1 file changed

+73
-68
lines changed

1 file changed

+73
-68
lines changed

src/rum_debug_funcs.c

+73-68
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,12 @@
1414
* 2) I/O functions were not available for all types in
1515
* in the get_datum_text_by_oid() function.
1616
*
17-
* 3) SIGSEGV in case of bytea output as additional information.
17+
* 3) The output of lexeme positions in the high keys of the posting
18+
* tree is not supported.
1819
*/
1920

2021
#include "postgres.h"
22+
#include "miscadmin.h"
2123
#include "fmgr.h"
2224
#include "funcapi.h"
2325
#include "catalog/namespace.h"
@@ -115,8 +117,8 @@ static Oid get_cur_attr_oid(rum_page_items_state *inter_call_data);
115117
static Datum category_get_datum_text(RumNullCategory category);
116118
static Oid find_add_info_oid(RumState *rum_state_ptr);
117119
static OffsetNumber find_add_info_atrr_num(RumState *rum_state_ptr);
118-
119120
static Datum get_positions_to_text_datum(Datum add_info);
121+
static char pos_get_weight(WordEntryPos position);
120122

121123
/*
122124
* The rum_metapage_info() function is used to retrieve
@@ -472,7 +474,7 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
472474
*/
473475
if(fctx->call_cntr <= inter_call_data->maxoff)
474476
{
475-
RumItem *high_key_ptr;
477+
RumItem *high_key_ptr; /* to read high key from a page */
476478
RumItem *rum_item_ptr; /* to read data from a page */
477479
Datum values[4]; /* return values */
478480
bool nulls[4]; /* true if the corresponding value is NULL */
@@ -497,7 +499,7 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
497499
values[2] = BoolGetDatum(high_key_ptr->addInfoIsNull);
498500

499501
/* Returning add info */
500-
if(!(high_key_ptr->addInfoIsNull) && inter_call_data->add_info_oid != 0
502+
if(!(high_key_ptr->addInfoIsNull) && inter_call_data->add_info_oid != InvalidOid
501503
&& inter_call_data->add_info_oid != BYTEAOID)
502504
{
503505
values[3] = get_datum_text_by_oid(high_key_ptr->addInfo,
@@ -506,12 +508,11 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
506508

507509
/*
508510
* In this case, we are dealing with the positions
509-
* of tokens and they need to be decoded.
511+
* of lexemes and they need to be decoded.
510512
*/
511-
else if (!(high_key_ptr->addInfoIsNull) && inter_call_data->add_info_oid != 0
513+
else if (!(high_key_ptr->addInfoIsNull) && inter_call_data->add_info_oid != InvalidOid
512514
&& inter_call_data->add_info_oid == BYTEAOID)
513515
{
514-
/* values[3] = get_positions_to_text_datum(high_key_ptr->addInfo); */
515516
values[3] = CStringGetTextDatum("high key positions in posting tree is not supported");
516517
}
517518

@@ -525,26 +526,8 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
525526
SRF_RETURN_NEXT(fctx, result);
526527
}
527528

528-
/*
529-
* Reading information from the page in rum_item.
530-
*
531-
* TODO: The fact is that being on the posting tree page, we don't know which
532-
* index attribute this posting tree was built for, so we don't know the
533-
* attribute number of the additional information. But the rumDataPageLeafRead()
534-
* function requires it to read information from the page. Here we use the auxiliary
535-
* function find_add_info_atr_num(), which simply iterates through the array with
536-
* attributes that are additional information and selects the attribute number for
537-
* which the additional information attribute is not NULL. This approach is incorrect
538-
* because there may not be additional information for the attribute on the page,
539-
* but we hope that in this case add_info_is_null will have the value true and the
540-
* additional information will not be read.
541-
*
542-
* This problem can be solved by asking the user for the attribute number of
543-
* additional information, because going through the index from top to bottom,
544-
* he saw it next to the link to the posting tree root.
545-
*/
529+
/* Reading information from the page in rum_item */
546530
inter_call_data->item_ptr = rumDataPageLeafRead(inter_call_data->item_ptr,
547-
/* inter_call_data->cur_tuple_key_attnum, */
548531
find_add_info_atrr_num(inter_call_data->rum_state_ptr),
549532
rum_item_ptr, false, inter_call_data->rum_state_ptr);
550533

@@ -554,7 +537,7 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
554537
values[2] = BoolGetDatum(rum_item_ptr->addInfoIsNull);
555538

556539
/* Returning add info */
557-
if(!(rum_item_ptr->addInfoIsNull) && inter_call_data->add_info_oid != 0
540+
if(!(rum_item_ptr->addInfoIsNull) && inter_call_data->add_info_oid != InvalidOid
558541
&& inter_call_data->add_info_oid != BYTEAOID)
559542
{
560543
values[3] = get_datum_text_by_oid(rum_item_ptr->addInfo,
@@ -563,9 +546,9 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
563546

564547
/*
565548
* In this case, we are dealing with the positions
566-
* of tokens and they need to be decoded.
549+
* of lexemes and they need to be decoded.
567550
*/
568-
else if (!(rum_item_ptr->addInfoIsNull) && inter_call_data->add_info_oid != 0
551+
else if (!(rum_item_ptr->addInfoIsNull) && inter_call_data->add_info_oid != InvalidOid
569552
&& inter_call_data->add_info_oid == BYTEAOID)
570553
{
571554
values[3] = get_positions_to_text_datum(rum_item_ptr->addInfo);
@@ -729,7 +712,7 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
729712
*/
730713
if(fctx->call_cntr <= inter_call_data->maxoff)
731714
{
732-
RumItem *high_key_ptr;
715+
RumItem *high_key_ptr; /* to read high key from a page */
733716
PostingItem *posting_item_ptr; /* to read data from a page */
734717
Datum values[5]; /* returned values */
735718
bool nulls[5]; /* true if the corresponding returned value is NULL */
@@ -754,7 +737,7 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
754737
values[3] = BoolGetDatum(high_key_ptr->addInfoIsNull);
755738

756739
/* Returning add info */
757-
if(!(high_key_ptr->addInfoIsNull) && inter_call_data->add_info_oid != 0
740+
if(!(high_key_ptr->addInfoIsNull) && inter_call_data->add_info_oid != InvalidOid
758741
&& inter_call_data->add_info_oid != BYTEAOID)
759742
{
760743
values[4] = get_datum_text_by_oid(high_key_ptr->addInfo,
@@ -763,12 +746,11 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
763746

764747
/*
765748
* In this case, we are dealing with the positions
766-
* of tokens and they need to be decoded.
749+
* of lexemes and they need to be decoded.
767750
*/
768-
else if (!(high_key_ptr->addInfoIsNull) && inter_call_data->add_info_oid != 0
751+
else if (!(high_key_ptr->addInfoIsNull) && inter_call_data->add_info_oid != InvalidOid
769752
&& inter_call_data->add_info_oid == BYTEAOID)
770753
{
771-
/* values[4] = get_positions_to_text_datum(high_key_ptr->addInfo); */
772754
values[4] = CStringGetTextDatum("high key positions in posting tree is not supported");
773755
}
774756

@@ -793,7 +775,7 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
793775
values[3] = BoolGetDatum(posting_item_ptr->item.addInfoIsNull);
794776

795777
/* Returning add info */
796-
if(!posting_item_ptr->item.addInfoIsNull && inter_call_data->add_info_oid != 0
778+
if(!posting_item_ptr->item.addInfoIsNull && inter_call_data->add_info_oid != InvalidOid
797779
&& inter_call_data->add_info_oid != BYTEAOID)
798780
{
799781
values[4] = get_datum_text_by_oid(posting_item_ptr->item.addInfo,
@@ -802,12 +784,11 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
802784

803785
/*
804786
* In this case, we are dealing with the positions
805-
* of tokens and they need to be decoded.
787+
* of lexemes and they need to be decoded.
806788
*/
807-
else if (!posting_item_ptr->item.addInfoIsNull && inter_call_data->add_info_oid != 0
789+
else if (!posting_item_ptr->item.addInfoIsNull && inter_call_data->add_info_oid != InvalidOid
808790
&& inter_call_data->add_info_oid == BYTEAOID)
809791
{
810-
/* values[4] = get_positions_to_text_datum(posting_item_ptr->item.addInfo); */
811792
values[4] = CStringGetTextDatum("high key positions in posting tree is not supported");
812793
}
813794

@@ -1072,17 +1053,17 @@ rum_leaf_entry_page_items(PG_FUNCTION_ARGS)
10721053
values[4] = BoolGetDatum(rum_item_ptr->addInfoIsNull);
10731054

10741055
/* Returning add info */
1075-
if (!(rum_item_ptr->addInfoIsNull) && inter_call_data->add_info_oid != 0 &&
1056+
if (!(rum_item_ptr->addInfoIsNull) && inter_call_data->add_info_oid != InvalidOid &&
10761057
inter_call_data->add_info_oid != BYTEAOID)
10771058
{
10781059
values[5] = get_datum_text_by_oid(rum_item_ptr->addInfo, inter_call_data->add_info_oid);
10791060
}
10801061

10811062
/*
10821063
* In this case, we are dealing with the positions
1083-
* of tokens and they need to be decoded.
1064+
* of lexemes and they need to be decoded.
10841065
*/
1085-
else if (!(rum_item_ptr->addInfoIsNull) && inter_call_data->add_info_oid != 0
1066+
else if (!(rum_item_ptr->addInfoIsNull) && inter_call_data->add_info_oid != InvalidOid
10861067
&& inter_call_data->add_info_oid == BYTEAOID)
10871068
{
10881069
values[5] = get_positions_to_text_datum(rum_item_ptr->addInfo);
@@ -1427,22 +1408,16 @@ get_page_from_raw(bytea *raw_page)
14271408
* int2, int4, int8, float4, float8, money, oid, timestamp,
14281409
* timestamptz, time, timetz, date, interval, macaddr, inet,
14291410
* cidr, text, varchar, char, bytea, bit, varbit, numeric.
1430-
*
1431-
* TODO: All types accepted by rum must be checked, but
1432-
* perhaps some types are missing or some are superfluous.
14331411
*/
14341412
static Datum
14351413
get_datum_text_by_oid(Datum info, Oid info_oid)
14361414
{
14371415
char *str_info = NULL;
14381416

1439-
/* info cannot be NULL */
1440-
Assert(DatumGetPointer(info) != NULL);
1441-
14421417
/*
14431418
* Form a string depending on the type of info.
14441419
*
1445-
* FIXME: The macros used below are taken from the
1420+
* TODO: The macros used below are taken from the
14461421
* pg_type_d file.h, and it says not to use them
14471422
* in the new code.
14481423
*/
@@ -1528,18 +1503,9 @@ get_datum_text_by_oid(Datum info, Oid info_oid)
15281503
str_info = OidOutputFunctionCall(F_CHAROUT, info);
15291504
break;
15301505

1531-
/*
1532-
* TODO: For some reason, the rum index created for a single tsv
1533-
* field contains additional information as bytea. In addition,
1534-
* if additional information in this format is extracted from
1535-
* posting tree pages, it cannot be displayed correctly as text.
1536-
* If the additional information was extracted from the entry
1537-
* tree pages, then it is displayed correctly.
1538-
*/
15391506
case BYTEAOID:
1540-
/* str_info = OidOutputFunctionCall(F_BYTEAOUT, info); */
1541-
/* break; */
1542-
return CStringGetTextDatum("BYTEAOID is not supported");
1507+
str_info = OidOutputFunctionCall(F_BYTEAOUT, info);
1508+
break;
15431509

15441510
case BITOID:
15451511
str_info = OidOutputFunctionCall(F_BIT_OUT, info);
@@ -1634,14 +1600,14 @@ get_rel_raw_page(Relation rel, BlockNumber blkno)
16341600
* the Oid of additional information for an attribute for
16351601
* which it is not NULL.
16361602
*
1637-
* TODO: The logic of the function assumes that there cannot
1603+
* The logic of the function assumes that there cannot
16381604
* be several types of additional information in the index,
16391605
* otherwise it will not work.
16401606
*/
16411607
static Oid
16421608
find_add_info_oid(RumState *rum_state_ptr)
16431609
{
1644-
Oid add_info_oid = 0;
1610+
Oid add_info_oid = InvalidOid;
16451611

16461612
/* Number of index attributes */
16471613
int num_attrs = rum_state_ptr->origTupdesc->natts;
@@ -1651,8 +1617,13 @@ find_add_info_oid(RumState *rum_state_ptr)
16511617
* oid of additional information.
16521618
*/
16531619
for (int i = 0; i < num_attrs; i++)
1620+
{
16541621
if ((rum_state_ptr->addAttrs)[i] != NULL)
1622+
{
1623+
Assert(add_info_oid == InvalidOid);
16551624
add_info_oid = ((rum_state_ptr->addAttrs)[i])->atttypid;
1625+
}
1626+
}
16561627

16571628
return add_info_oid;
16581629
}
@@ -1661,19 +1632,28 @@ find_add_info_oid(RumState *rum_state_ptr)
16611632
* This is an auxiliary function to get the attribute number
16621633
* for additional information. It is used in the rum_leaf_data_page_items()
16631634
* function to call the rumDataPageLeafRead() function.
1635+
*
1636+
* The logic of the function assumes that there cannot
1637+
* be several types of additional information in the index,
1638+
* otherwise it will not work.
16641639
*/
16651640
static OffsetNumber
16661641
find_add_info_atrr_num(RumState *rum_state_ptr)
16671642
{
1668-
OffsetNumber add_info_attr_num = 0;
1643+
OffsetNumber add_info_attr_num = InvalidOffsetNumber;
16691644

16701645
/* Number of index attributes */
16711646
int num_attrs = rum_state_ptr->origTupdesc->natts;
16721647

16731648
/* Go through the addAttrs array */
1674-
for (int i = 0; i < num_attrs; i++)
1649+
for (int i = 0; i < num_attrs; i++)
1650+
{
16751651
if ((rum_state_ptr->addAttrs)[i] != NULL)
1652+
{
1653+
Assert(add_info_attr_num == InvalidOffsetNumber);
16761654
add_info_attr_num = i;
1655+
}
1656+
}
16771657

16781658
/* Need to add 1 because the attributes are numbered from 1 */
16791659
return add_info_attr_num + 1;
@@ -1683,8 +1663,8 @@ find_add_info_atrr_num(RumState *rum_state_ptr)
16831663
#define POS_MAX_VAL_LENGHT 6
16841664

16851665
/*
1686-
* A function for extracting the positions of tokens from additional
1687-
* information. Returns a string in which the positions of the tokens
1666+
* A function for extracting the positions of lexemes from additional
1667+
* information. Returns a string in which the positions of the lexemes
16881668
* are recorded. The memory that the string occupies must be cleared later.
16891669
*/
16901670
static Datum
@@ -1711,14 +1691,17 @@ get_positions_to_text_datum(Datum add_info)
17111691
cur_max_str_lenght = POS_STR_BUF_LENGHT;
17121692
positions_str_cur_ptr = positions_str;
17131693

1714-
/* Extract the positions of the tokens and put them in the string */
1694+
/* Extract the positions of the lexemes and put them in the string */
17151695
for (int i = 0; i < npos; i++)
17161696
{
17171697
/* At each iteration decode the position */
17181698
ptrt = decompress_pos(ptrt, &position);
17191699

1720-
/* Write this position in the string */
1721-
sprintf(positions_str_cur_ptr, "%d,", position);
1700+
/* Write this position and weight in the string */
1701+
if(pos_get_weight(position) == 'D')
1702+
sprintf(positions_str_cur_ptr, "%d,", WEP_GETPOS(position));
1703+
else
1704+
sprintf(positions_str_cur_ptr, "%d%c,", WEP_GETPOS(position), pos_get_weight(position));
17221705

17231706
/* Moving the pointer forward */
17241707
positions_str_cur_ptr += strlen(positions_str_cur_ptr);
@@ -1744,3 +1727,25 @@ get_positions_to_text_datum(Datum add_info)
17441727
pfree(positions_str);
17451728
return res;
17461729
}
1730+
1731+
/*
1732+
* The function extracts the weight and
1733+
* returns the corresponding letter.
1734+
*/
1735+
static char
1736+
pos_get_weight(WordEntryPos position)
1737+
{
1738+
char res = 'D';
1739+
1740+
switch(WEP_GETWEIGHT(position))
1741+
{
1742+
case 3:
1743+
return 'A';
1744+
case 2:
1745+
return 'B';
1746+
case 1:
1747+
return 'C';
1748+
}
1749+
1750+
return res;
1751+
}

0 commit comments

Comments
 (0)