14
14
* 2) I/O functions were not available for all types in
15
15
* in the get_datum_text_by_oid() function.
16
16
*
17
- * 3) SIGSEGV in case of bytea output as additional information.
17
+ * 3) The output of lexeme positions in the high keys of the posting
18
+ * tree is not supported.
18
19
*/
19
20
20
21
#include "postgres.h"
22
+ #include "miscadmin.h"
21
23
#include "fmgr.h"
22
24
#include "funcapi.h"
23
25
#include "catalog/namespace.h"
@@ -115,8 +117,8 @@ static Oid get_cur_attr_oid(rum_page_items_state *inter_call_data);
115
117
static Datum category_get_datum_text (RumNullCategory category );
116
118
static Oid find_add_info_oid (RumState * rum_state_ptr );
117
119
static OffsetNumber find_add_info_atrr_num (RumState * rum_state_ptr );
118
-
119
120
static Datum get_positions_to_text_datum (Datum add_info );
121
+ static char pos_get_weight (WordEntryPos position );
120
122
121
123
/*
122
124
* The rum_metapage_info() function is used to retrieve
@@ -472,7 +474,7 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
472
474
*/
473
475
if (fctx -> call_cntr <= inter_call_data -> maxoff )
474
476
{
475
- RumItem * high_key_ptr ;
477
+ RumItem * high_key_ptr ; /* to read high key from a page */
476
478
RumItem * rum_item_ptr ; /* to read data from a page */
477
479
Datum values [4 ]; /* return values */
478
480
bool nulls [4 ]; /* true if the corresponding value is NULL */
@@ -497,7 +499,7 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
497
499
values [2 ] = BoolGetDatum (high_key_ptr -> addInfoIsNull );
498
500
499
501
/* Returning add info */
500
- if (!(high_key_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != 0
502
+ if (!(high_key_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != InvalidOid
501
503
&& inter_call_data -> add_info_oid != BYTEAOID )
502
504
{
503
505
values [3 ] = get_datum_text_by_oid (high_key_ptr -> addInfo ,
@@ -506,12 +508,11 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
506
508
507
509
/*
508
510
* In this case, we are dealing with the positions
509
- * of tokens and they need to be decoded.
511
+ * of lexemes and they need to be decoded.
510
512
*/
511
- else if (!(high_key_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != 0
513
+ else if (!(high_key_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != InvalidOid
512
514
&& inter_call_data -> add_info_oid == BYTEAOID )
513
515
{
514
- /* values[3] = get_positions_to_text_datum(high_key_ptr->addInfo); */
515
516
values [3 ] = CStringGetTextDatum ("high key positions in posting tree is not supported" );
516
517
}
517
518
@@ -525,26 +526,8 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
525
526
SRF_RETURN_NEXT (fctx , result );
526
527
}
527
528
528
- /*
529
- * Reading information from the page in rum_item.
530
- *
531
- * TODO: The fact is that being on the posting tree page, we don't know which
532
- * index attribute this posting tree was built for, so we don't know the
533
- * attribute number of the additional information. But the rumDataPageLeafRead()
534
- * function requires it to read information from the page. Here we use the auxiliary
535
- * function find_add_info_atr_num(), which simply iterates through the array with
536
- * attributes that are additional information and selects the attribute number for
537
- * which the additional information attribute is not NULL. This approach is incorrect
538
- * because there may not be additional information for the attribute on the page,
539
- * but we hope that in this case add_info_is_null will have the value true and the
540
- * additional information will not be read.
541
- *
542
- * This problem can be solved by asking the user for the attribute number of
543
- * additional information, because going through the index from top to bottom,
544
- * he saw it next to the link to the posting tree root.
545
- */
529
+ /* Reading information from the page in rum_item */
546
530
inter_call_data -> item_ptr = rumDataPageLeafRead (inter_call_data -> item_ptr ,
547
- /* inter_call_data->cur_tuple_key_attnum, */
548
531
find_add_info_atrr_num (inter_call_data -> rum_state_ptr ),
549
532
rum_item_ptr , false, inter_call_data -> rum_state_ptr );
550
533
@@ -554,7 +537,7 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
554
537
values [2 ] = BoolGetDatum (rum_item_ptr -> addInfoIsNull );
555
538
556
539
/* Returning add info */
557
- if (!(rum_item_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != 0
540
+ if (!(rum_item_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != InvalidOid
558
541
&& inter_call_data -> add_info_oid != BYTEAOID )
559
542
{
560
543
values [3 ] = get_datum_text_by_oid (rum_item_ptr -> addInfo ,
@@ -563,9 +546,9 @@ rum_leaf_data_page_items(PG_FUNCTION_ARGS)
563
546
564
547
/*
565
548
* In this case, we are dealing with the positions
566
- * of tokens and they need to be decoded.
549
+ * of lexemes and they need to be decoded.
567
550
*/
568
- else if (!(rum_item_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != 0
551
+ else if (!(rum_item_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != InvalidOid
569
552
&& inter_call_data -> add_info_oid == BYTEAOID )
570
553
{
571
554
values [3 ] = get_positions_to_text_datum (rum_item_ptr -> addInfo );
@@ -729,7 +712,7 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
729
712
*/
730
713
if (fctx -> call_cntr <= inter_call_data -> maxoff )
731
714
{
732
- RumItem * high_key_ptr ;
715
+ RumItem * high_key_ptr ; /* to read high key from a page */
733
716
PostingItem * posting_item_ptr ; /* to read data from a page */
734
717
Datum values [5 ]; /* returned values */
735
718
bool nulls [5 ]; /* true if the corresponding returned value is NULL */
@@ -754,7 +737,7 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
754
737
values [3 ] = BoolGetDatum (high_key_ptr -> addInfoIsNull );
755
738
756
739
/* Returning add info */
757
- if (!(high_key_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != 0
740
+ if (!(high_key_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != InvalidOid
758
741
&& inter_call_data -> add_info_oid != BYTEAOID )
759
742
{
760
743
values [4 ] = get_datum_text_by_oid (high_key_ptr -> addInfo ,
@@ -763,12 +746,11 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
763
746
764
747
/*
765
748
* In this case, we are dealing with the positions
766
- * of tokens and they need to be decoded.
749
+ * of lexemes and they need to be decoded.
767
750
*/
768
- else if (!(high_key_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != 0
751
+ else if (!(high_key_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != InvalidOid
769
752
&& inter_call_data -> add_info_oid == BYTEAOID )
770
753
{
771
- /* values[4] = get_positions_to_text_datum(high_key_ptr->addInfo); */
772
754
values [4 ] = CStringGetTextDatum ("high key positions in posting tree is not supported" );
773
755
}
774
756
@@ -793,7 +775,7 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
793
775
values [3 ] = BoolGetDatum (posting_item_ptr -> item .addInfoIsNull );
794
776
795
777
/* Returning add info */
796
- if (!posting_item_ptr -> item .addInfoIsNull && inter_call_data -> add_info_oid != 0
778
+ if (!posting_item_ptr -> item .addInfoIsNull && inter_call_data -> add_info_oid != InvalidOid
797
779
&& inter_call_data -> add_info_oid != BYTEAOID )
798
780
{
799
781
values [4 ] = get_datum_text_by_oid (posting_item_ptr -> item .addInfo ,
@@ -802,12 +784,11 @@ rum_internal_data_page_items(PG_FUNCTION_ARGS)
802
784
803
785
/*
804
786
* In this case, we are dealing with the positions
805
- * of tokens and they need to be decoded.
787
+ * of lexemes and they need to be decoded.
806
788
*/
807
- else if (!posting_item_ptr -> item .addInfoIsNull && inter_call_data -> add_info_oid != 0
789
+ else if (!posting_item_ptr -> item .addInfoIsNull && inter_call_data -> add_info_oid != InvalidOid
808
790
&& inter_call_data -> add_info_oid == BYTEAOID )
809
791
{
810
- /* values[4] = get_positions_to_text_datum(posting_item_ptr->item.addInfo); */
811
792
values [4 ] = CStringGetTextDatum ("high key positions in posting tree is not supported" );
812
793
}
813
794
@@ -1072,17 +1053,17 @@ rum_leaf_entry_page_items(PG_FUNCTION_ARGS)
1072
1053
values [4 ] = BoolGetDatum (rum_item_ptr -> addInfoIsNull );
1073
1054
1074
1055
/* Returning add info */
1075
- if (!(rum_item_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != 0 &&
1056
+ if (!(rum_item_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != InvalidOid &&
1076
1057
inter_call_data -> add_info_oid != BYTEAOID )
1077
1058
{
1078
1059
values [5 ] = get_datum_text_by_oid (rum_item_ptr -> addInfo , inter_call_data -> add_info_oid );
1079
1060
}
1080
1061
1081
1062
/*
1082
1063
* In this case, we are dealing with the positions
1083
- * of tokens and they need to be decoded.
1064
+ * of lexemes and they need to be decoded.
1084
1065
*/
1085
- else if (!(rum_item_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != 0
1066
+ else if (!(rum_item_ptr -> addInfoIsNull ) && inter_call_data -> add_info_oid != InvalidOid
1086
1067
&& inter_call_data -> add_info_oid == BYTEAOID )
1087
1068
{
1088
1069
values [5 ] = get_positions_to_text_datum (rum_item_ptr -> addInfo );
@@ -1427,22 +1408,16 @@ get_page_from_raw(bytea *raw_page)
1427
1408
* int2, int4, int8, float4, float8, money, oid, timestamp,
1428
1409
* timestamptz, time, timetz, date, interval, macaddr, inet,
1429
1410
* cidr, text, varchar, char, bytea, bit, varbit, numeric.
1430
- *
1431
- * TODO: All types accepted by rum must be checked, but
1432
- * perhaps some types are missing or some are superfluous.
1433
1411
*/
1434
1412
static Datum
1435
1413
get_datum_text_by_oid (Datum info , Oid info_oid )
1436
1414
{
1437
1415
char * str_info = NULL ;
1438
1416
1439
- /* info cannot be NULL */
1440
- Assert (DatumGetPointer (info ) != NULL );
1441
-
1442
1417
/*
1443
1418
* Form a string depending on the type of info.
1444
1419
*
1445
- * FIXME : The macros used below are taken from the
1420
+ * TODO : The macros used below are taken from the
1446
1421
* pg_type_d file.h, and it says not to use them
1447
1422
* in the new code.
1448
1423
*/
@@ -1528,18 +1503,9 @@ get_datum_text_by_oid(Datum info, Oid info_oid)
1528
1503
str_info = OidOutputFunctionCall (F_CHAROUT , info );
1529
1504
break ;
1530
1505
1531
- /*
1532
- * TODO: For some reason, the rum index created for a single tsv
1533
- * field contains additional information as bytea. In addition,
1534
- * if additional information in this format is extracted from
1535
- * posting tree pages, it cannot be displayed correctly as text.
1536
- * If the additional information was extracted from the entry
1537
- * tree pages, then it is displayed correctly.
1538
- */
1539
1506
case BYTEAOID :
1540
- /* str_info = OidOutputFunctionCall(F_BYTEAOUT, info); */
1541
- /* break; */
1542
- return CStringGetTextDatum ("BYTEAOID is not supported" );
1507
+ str_info = OidOutputFunctionCall (F_BYTEAOUT , info );
1508
+ break ;
1543
1509
1544
1510
case BITOID :
1545
1511
str_info = OidOutputFunctionCall (F_BIT_OUT , info );
@@ -1634,14 +1600,14 @@ get_rel_raw_page(Relation rel, BlockNumber blkno)
1634
1600
* the Oid of additional information for an attribute for
1635
1601
* which it is not NULL.
1636
1602
*
1637
- * TODO: The logic of the function assumes that there cannot
1603
+ * The logic of the function assumes that there cannot
1638
1604
* be several types of additional information in the index,
1639
1605
* otherwise it will not work.
1640
1606
*/
1641
1607
static Oid
1642
1608
find_add_info_oid (RumState * rum_state_ptr )
1643
1609
{
1644
- Oid add_info_oid = 0 ;
1610
+ Oid add_info_oid = InvalidOid ;
1645
1611
1646
1612
/* Number of index attributes */
1647
1613
int num_attrs = rum_state_ptr -> origTupdesc -> natts ;
@@ -1651,8 +1617,13 @@ find_add_info_oid(RumState *rum_state_ptr)
1651
1617
* oid of additional information.
1652
1618
*/
1653
1619
for (int i = 0 ; i < num_attrs ; i ++ )
1620
+ {
1654
1621
if ((rum_state_ptr -> addAttrs )[i ] != NULL )
1622
+ {
1623
+ Assert (add_info_oid == InvalidOid );
1655
1624
add_info_oid = ((rum_state_ptr -> addAttrs )[i ])-> atttypid ;
1625
+ }
1626
+ }
1656
1627
1657
1628
return add_info_oid ;
1658
1629
}
@@ -1661,19 +1632,28 @@ find_add_info_oid(RumState *rum_state_ptr)
1661
1632
* This is an auxiliary function to get the attribute number
1662
1633
* for additional information. It is used in the rum_leaf_data_page_items()
1663
1634
* function to call the rumDataPageLeafRead() function.
1635
+ *
1636
+ * The logic of the function assumes that there cannot
1637
+ * be several types of additional information in the index,
1638
+ * otherwise it will not work.
1664
1639
*/
1665
1640
static OffsetNumber
1666
1641
find_add_info_atrr_num (RumState * rum_state_ptr )
1667
1642
{
1668
- OffsetNumber add_info_attr_num = 0 ;
1643
+ OffsetNumber add_info_attr_num = InvalidOffsetNumber ;
1669
1644
1670
1645
/* Number of index attributes */
1671
1646
int num_attrs = rum_state_ptr -> origTupdesc -> natts ;
1672
1647
1673
1648
/* Go through the addAttrs array */
1674
- for (int i = 0 ; i < num_attrs ; i ++ )
1649
+ for (int i = 0 ; i < num_attrs ; i ++ )
1650
+ {
1675
1651
if ((rum_state_ptr -> addAttrs )[i ] != NULL )
1652
+ {
1653
+ Assert (add_info_attr_num == InvalidOffsetNumber );
1676
1654
add_info_attr_num = i ;
1655
+ }
1656
+ }
1677
1657
1678
1658
/* Need to add 1 because the attributes are numbered from 1 */
1679
1659
return add_info_attr_num + 1 ;
@@ -1683,8 +1663,8 @@ find_add_info_atrr_num(RumState *rum_state_ptr)
1683
1663
#define POS_MAX_VAL_LENGHT 6
1684
1664
1685
1665
/*
1686
- * A function for extracting the positions of tokens from additional
1687
- * information. Returns a string in which the positions of the tokens
1666
+ * A function for extracting the positions of lexemes from additional
1667
+ * information. Returns a string in which the positions of the lexemes
1688
1668
* are recorded. The memory that the string occupies must be cleared later.
1689
1669
*/
1690
1670
static Datum
@@ -1711,14 +1691,17 @@ get_positions_to_text_datum(Datum add_info)
1711
1691
cur_max_str_lenght = POS_STR_BUF_LENGHT ;
1712
1692
positions_str_cur_ptr = positions_str ;
1713
1693
1714
- /* Extract the positions of the tokens and put them in the string */
1694
+ /* Extract the positions of the lexemes and put them in the string */
1715
1695
for (int i = 0 ; i < npos ; i ++ )
1716
1696
{
1717
1697
/* At each iteration decode the position */
1718
1698
ptrt = decompress_pos (ptrt , & position );
1719
1699
1720
- /* Write this position in the string */
1721
- sprintf (positions_str_cur_ptr , "%d," , position );
1700
+ /* Write this position and weight in the string */
1701
+ if (pos_get_weight (position ) == 'D' )
1702
+ sprintf (positions_str_cur_ptr , "%d," , WEP_GETPOS (position ));
1703
+ else
1704
+ sprintf (positions_str_cur_ptr , "%d%c," , WEP_GETPOS (position ), pos_get_weight (position ));
1722
1705
1723
1706
/* Moving the pointer forward */
1724
1707
positions_str_cur_ptr += strlen (positions_str_cur_ptr );
@@ -1744,3 +1727,25 @@ get_positions_to_text_datum(Datum add_info)
1744
1727
pfree (positions_str );
1745
1728
return res ;
1746
1729
}
1730
+
1731
+ /*
1732
+ * The function extracts the weight and
1733
+ * returns the corresponding letter.
1734
+ */
1735
+ static char
1736
+ pos_get_weight (WordEntryPos position )
1737
+ {
1738
+ char res = 'D' ;
1739
+
1740
+ switch (WEP_GETWEIGHT (position ))
1741
+ {
1742
+ case 3 :
1743
+ return 'A' ;
1744
+ case 2 :
1745
+ return 'B' ;
1746
+ case 1 :
1747
+ return 'C' ;
1748
+ }
1749
+
1750
+ return res ;
1751
+ }
0 commit comments