1414 * 2) I/O functions were not available for all types in
1515 * in the get_datum_text_by_oid() function.
1616 *
17- * 3) SIGSEGV in case of bytea output as additional information.
17+ * 3) The output of lexeme positions in the high keys of the posting
18+ * tree is not supported.
1819 */
1920
2021#include "postgres.h"
@@ -115,8 +116,8 @@ static Oid get_cur_attr_oid(rum_page_items_state *inter_call_data);
115116static Datum category_get_datum_text (RumNullCategory category );
116117static Oid find_add_info_oid (RumState * rum_state_ptr );
117118static OffsetNumber find_add_info_atrr_num (RumState * rum_state_ptr );
118-
119119static Datum get_positions_to_text_datum (Datum add_info );
120+ static char pos_get_weight (WordEntryPos position );
120121
121122/*
122123 * The rum_metapage_info() function is used to retrieve
@@ -472,7 +473,7 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
472473 */
473474 if (fctx -> call_cntr <= inter_call_data -> maxoff )
474475 {
475- RumItem * high_key_ptr ;
476+ RumItem * high_key_ptr ; /* to read high key from a page */
476477 RumItem * rum_item_ptr ; /* to read data from a page */
477478 Datum values [4 ]; /* return values */
478479 bool nulls [4 ]; /* true if the corresponding value is NULL */
@@ -497,7 +498,7 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
497498 values [2 ] = BoolGetDatum (high_key_ptr -> addInfoIsNull );
498499
499500 /* Returning add info */
500- if (!(high_key_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != 0
501+ if (!(high_key_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != InvalidOid
501502 && inter_call_data -> add_info_oid != BYTEAOID )
502503 {
503504 values [3 ] = get_datum_text_by_oid (high_key_ptr -> addInfo ,
@@ -506,12 +507,11 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
506507
507508 /*
508509 * In this case, we are dealing with the positions
509- * of tokens and they need to be decoded.
510+ * of lexemes and they need to be decoded.
510511 */
511- else if (!(high_key_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != 0
512+ else if (!(high_key_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != InvalidOid
512513 && inter_call_data -> add_info_oid == BYTEAOID )
513514 {
514- /* values[3] = get_positions_to_text_datum(high_key_ptr->addInfo); */
515515 values [3 ] = CStringGetTextDatum ("high key positions in posting tree is not supported" );
516516 }
517517
@@ -525,26 +525,8 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
525525 SRF_RETURN_NEXT (fctx , result );
526526 }
527527
528- /*
529- * Reading information from the page in rum_item.
530- *
531- * TODO: The fact is that being on the posting tree page, we don't know which
532- * index attribute this posting tree was built for, so we don't know the
533- * attribute number of the additional information. But the rumDataPageLeafRead()
534- * function requires it to read information from the page. Here we use the auxiliary
535- * function find_add_info_atr_num(), which simply iterates through the array with
536- * attributes that are additional information and selects the attribute number for
537- * which the additional information attribute is not NULL. This approach is incorrect
538- * because there may not be additional information for the attribute on the page,
539- * but we hope that in this case add_info_is_null will have the value true and the
540- * additional information will not be read.
541- *
542- * This problem can be solved by asking the user for the attribute number of
543- * additional information, because going through the index from top to bottom,
544- * he saw it next to the link to the posting tree root.
545- */
528+ /* Reading information from the page in rum_item */
546529 inter_call_data -> item_ptr = rumDataPageLeafRead (inter_call_data -> item_ptr ,
547- /* inter_call_data->cur_tuple_key_attnum, */
548530 find_add_info_atrr_num (inter_call_data -> rum_state_ptr ),
549531 rum_item_ptr , false, inter_call_data -> rum_state_ptr );
550532
@@ -554,7 +536,7 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
554536 values [2 ] = BoolGetDatum (rum_item_ptr -> addInfoIsNull );
555537
556538 /* Returning add info */
557- if (!(rum_item_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != 0
539+ if (!(rum_item_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != InvalidOid
558540 && inter_call_data -> add_info_oid != BYTEAOID )
559541 {
560542 values [3 ] = get_datum_text_by_oid (rum_item_ptr -> addInfo ,
@@ -563,9 +545,9 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
563545
564546 /*
565547 * In this case, we are dealing with the positions
566- * of tokens and they need to be decoded.
548+ * of lexemes and they need to be decoded.
567549 */
568- else if (!(rum_item_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != 0
550+ else if (!(rum_item_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != InvalidOid
569551 && inter_call_data -> add_info_oid == BYTEAOID )
570552 {
571553 values [3 ] = get_positions_to_text_datum (rum_item_ptr -> addInfo );
@@ -729,7 +711,7 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
729711 */
730712 if (fctx -> call_cntr <= inter_call_data -> maxoff )
731713 {
732- RumItem * high_key_ptr ;
714+ RumItem * high_key_ptr ; /* to read high key from a page */
733715 PostingItem * posting_item_ptr ; /* to read data from a page */
734716 Datum values [5 ]; /* returned values */
735717 bool nulls [5 ]; /* true if the corresponding returned value is NULL */
@@ -754,7 +736,7 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
754736 values [3 ] = BoolGetDatum (high_key_ptr -> addInfoIsNull );
755737
756738 /* Returning add info */
757- if (!(high_key_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != 0
739+ if (!(high_key_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != InvalidOid
758740 && inter_call_data -> add_info_oid != BYTEAOID )
759741 {
760742 values [4 ] = get_datum_text_by_oid (high_key_ptr -> addInfo ,
@@ -763,12 +745,11 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
763745
764746 /*
765747 * In this case, we are dealing with the positions
766- * of tokens and they need to be decoded.
748+ * of lexemes and they need to be decoded.
767749 */
768- else if (!(high_key_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != 0
750+ else if (!(high_key_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != InvalidOid
769751 && inter_call_data -> add_info_oid == BYTEAOID )
770752 {
771- /* values[4] = get_positions_to_text_datum(high_key_ptr->addInfo); */
772753 values [4 ] = CStringGetTextDatum ("high key positions in posting tree is not supported" );
773754 }
774755
@@ -793,7 +774,7 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
793774 values [3 ] = BoolGetDatum (posting_item_ptr -> item .addInfoIsNull );
794775
795776 /* Returning add info */
796- if (!posting_item_ptr -> item .addInfoIsNull && inter_call_data -> add_info_oid != 0
777+ if (!posting_item_ptr -> item .addInfoIsNull && inter_call_data -> add_info_oid != InvalidOid
797778 && inter_call_data -> add_info_oid != BYTEAOID )
798779 {
799780 values [4 ] = get_datum_text_by_oid (posting_item_ptr -> item .addInfo ,
@@ -802,12 +783,11 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
802783
803784 /*
804785 * In this case, we are dealing with the positions
805- * of tokens and they need to be decoded.
786+ * of lexemes and they need to be decoded.
806787 */
807- else if (!posting_item_ptr -> item .addInfoIsNull && inter_call_data -> add_info_oid != 0
788+ else if (!posting_item_ptr -> item .addInfoIsNull && inter_call_data -> add_info_oid != InvalidOid
808789 && inter_call_data -> add_info_oid == BYTEAOID )
809790 {
810- /* values[4] = get_positions_to_text_datum(posting_item_ptr->item.addInfo); */
811791 values [4 ] = CStringGetTextDatum ("high key positions in posting tree is not supported" );
812792 }
813793
@@ -1072,17 +1052,17 @@ rum_leaf_entry_page_items(PG_FUNCTION_ARGS)
10721052 values [4 ] = BoolGetDatum (rum_item_ptr -> addInfoIsNull );
10731053
10741054 /* Returning add info */
1075- if (!(rum_item_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != 0 &&
1055+ if (!(rum_item_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != InvalidOid &&
10761056 inter_call_data -> add_info_oid != BYTEAOID )
10771057 {
10781058 values [5 ] = get_datum_text_by_oid (rum_item_ptr -> addInfo , inter_call_data -> add_info_oid );
10791059 }
10801060
10811061 /*
10821062 * In this case, we are dealing with the positions
1083- * of tokens and they need to be decoded.
1063+ * of lexemes and they need to be decoded.
10841064 */
1085- else if (!(rum_item_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != 0
1065+ else if (!(rum_item_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != InvalidOid
10861066 && inter_call_data -> add_info_oid == BYTEAOID )
10871067 {
10881068 values [5 ] = get_positions_to_text_datum (rum_item_ptr -> addInfo );
@@ -1427,22 +1407,16 @@ get_page_from_raw(bytea *raw_page)
14271407 * int2, int4, int8, float4, float8, money, oid, timestamp,
14281408 * timestamptz, time, timetz, date, interval, macaddr, inet,
14291409 * cidr, text, varchar, char, bytea, bit, varbit, numeric.
1430- *
1431- * TODO: All types accepted by rum must be checked, but
1432- * perhaps some types are missing or some are superfluous.
14331410 */
14341411static Datum
14351412get_datum_text_by_oid (Datum info , Oid info_oid )
14361413{
14371414 char * str_info = NULL ;
14381415
1439- /* info cannot be NULL */
1440- Assert (DatumGetPointer (info ) != NULL );
1441-
14421416 /*
14431417 * Form a string depending on the type of info.
14441418 *
1445- * FIXME : The macros used below are taken from the
1419+ * TODO : The macros used below are taken from the
14461420 * pg_type_d file.h, and it says not to use them
14471421 * in the new code.
14481422 */
@@ -1528,18 +1502,9 @@ get_datum_text_by_oid(Datum info, Oid info_oid)
15281502 str_info = OidOutputFunctionCall (F_CHAROUT , info );
15291503 break ;
15301504
1531- /*
1532- * TODO: For some reason, the rum index created for a single tsv
1533- * field contains additional information as bytea. In addition,
1534- * if additional information in this format is extracted from
1535- * posting tree pages, it cannot be displayed correctly as text.
1536- * If the additional information was extracted from the entry
1537- * tree pages, then it is displayed correctly.
1538- */
15391505 case BYTEAOID :
1540- /* str_info = OidOutputFunctionCall(F_BYTEAOUT, info); */
1541- /* break; */
1542- return CStringGetTextDatum ("BYTEAOID is not supported" );
1506+ str_info = OidOutputFunctionCall (F_BYTEAOUT , info );
1507+ break ;
15431508
15441509 case BITOID :
15451510 str_info = OidOutputFunctionCall (F_BIT_OUT , info );
@@ -1634,14 +1599,14 @@ get_rel_raw_page(Relation rel, BlockNumber blkno)
16341599 * the Oid of additional information for an attribute for
16351600 * which it is not NULL.
16361601 *
1637- * TODO: The logic of the function assumes that there cannot
1602+ * The logic of the function assumes that there cannot
16381603 * be several types of additional information in the index,
16391604 * otherwise it will not work.
16401605 */
16411606static Oid
16421607find_add_info_oid (RumState * rum_state_ptr )
16431608{
1644- Oid add_info_oid = 0 ;
1609+ Oid add_info_oid = InvalidOid ;
16451610
16461611 /* Number of index attributes */
16471612 int num_attrs = rum_state_ptr -> origTupdesc -> natts ;
@@ -1651,8 +1616,13 @@ find_add_info_oid(RumState *rum_state_ptr)
16511616 * oid of additional information.
16521617 */
16531618 for (int i = 0 ; i < num_attrs ; i ++ )
1619+ {
16541620 if ((rum_state_ptr -> addAttrs )[i ] != NULL )
1621+ {
1622+ Assert (add_info_oid == InvalidOid );
16551623 add_info_oid = ((rum_state_ptr -> addAttrs )[i ])-> atttypid ;
1624+ }
1625+ }
16561626
16571627 return add_info_oid ;
16581628}
@@ -1661,19 +1631,28 @@ find_add_info_oid(RumState *rum_state_ptr)
16611631 * This is an auxiliary function to get the attribute number
16621632 * for additional information. It is used in the rum_leaf_data_page_items()
16631633 * function to call the rumDataPageLeafRead() function.
1634+ *
1635+ * The logic of the function assumes that there cannot
1636+ * be several types of additional information in the index,
1637+ * otherwise it will not work.
16641638 */
16651639static OffsetNumber
16661640find_add_info_atrr_num (RumState * rum_state_ptr )
16671641{
1668- OffsetNumber add_info_attr_num = 0 ;
1642+ OffsetNumber add_info_attr_num = InvalidOffsetNumber ;
16691643
16701644 /* Number of index attributes */
16711645 int num_attrs = rum_state_ptr -> origTupdesc -> natts ;
16721646
16731647 /* Go through the addAttrs array */
1674- for (int i = 0 ; i < num_attrs ; i ++ )
1648+ for (int i = 0 ; i < num_attrs ; i ++ )
1649+ {
16751650 if ((rum_state_ptr -> addAttrs )[i ] != NULL )
1651+ {
1652+ Assert (add_info_attr_num == InvalidOffsetNumber );
16761653 add_info_attr_num = i ;
1654+ }
1655+ }
16771656
16781657 /* Need to add 1 because the attributes are numbered from 1 */
16791658 return add_info_attr_num + 1 ;
@@ -1683,8 +1662,8 @@ find_add_info_atrr_num(RumState *rum_state_ptr)
16831662#define POS_MAX_VAL_LENGHT 6
16841663
16851664/*
1686- * A function for extracting the positions of tokens from additional
1687- * information. Returns a string in which the positions of the tokens
1665+ * A function for extracting the positions of lexemes from additional
1666+ * information. Returns a string in which the positions of the lexemes
16881667 * are recorded. The memory that the string occupies must be cleared later.
16891668 */
16901669static Datum
@@ -1711,14 +1690,17 @@ get_positions_to_text_datum(Datum add_info)
17111690 cur_max_str_lenght = POS_STR_BUF_LENGHT ;
17121691 positions_str_cur_ptr = positions_str ;
17131692
1714- /* Extract the positions of the tokens and put them in the string */
1693+ /* Extract the positions of the lexemes and put them in the string */
17151694 for (int i = 0 ; i < npos ; i ++ )
17161695 {
17171696 /* At each iteration decode the position */
17181697 ptrt = decompress_pos (ptrt , & position );
17191698
1720- /* Write this position in the string */
1721- sprintf (positions_str_cur_ptr , "%d," , position );
1699+ /* Write this position and weight in the string */
1700+ if (pos_get_weight (position ) == 'D' )
1701+ sprintf (positions_str_cur_ptr , "%d," , WEP_GETPOS (position ));
1702+ else
1703+ sprintf (positions_str_cur_ptr , "%d%c," , WEP_GETPOS (position ), pos_get_weight (position ));
17221704
17231705 /* Moving the pointer forward */
17241706 positions_str_cur_ptr += strlen (positions_str_cur_ptr );
@@ -1744,3 +1726,25 @@ get_positions_to_text_datum(Datum add_info)
17441726 pfree (positions_str );
17451727 return res ;
17461728}
1729+
1730+ /*
1731+ * The function extracts the weight and
1732+ * returns the corresponding letter.
1733+ */
1734+ static char
1735+ pos_get_weight (WordEntryPos position )
1736+ {
1737+ char res = 'D' ;
1738+
1739+ switch (WEP_GETWEIGHT (position ))
1740+ {
1741+ case 3 :
1742+ return 'A' ;
1743+ case 2 :
1744+ return 'B' ;
1745+ case 1 :
1746+ return 'C' ;
1747+ }
1748+
1749+ return res ;
1750+ }
0 commit comments