ResearchHub · koutst · Mar 3, 2025 · Feb 28, 2025 · Feb 28, 2025 · Feb 28, 2025
@@ -136,7 +136,6 @@ def update_discussion_count(self):
         paper = self.paper
         if paper:
             new_dis_count = paper.get_discussion_count()
-            paper.calculate_hot_score()
 
             paper.discussion_count = new_dis_count
             paper.save(update_fields=["discussion_count"])

@@ -7,45 +7,38 @@
 class Command(BaseCommand):
 
     def add_arguments(self, parser):
-      parser.add_argument(
-        '--save',
-        default=False,
-        help='Should save the score'
-      )
-
-      parser.add_argument(
-        '--id',
-        default=False,
-        help='Only perform for specific id'
-      )
-
-      parser.add_argument(
-        '--start_date',
-        help='Perform for date starting'
-      )
+        parser.add_argument("--save", default=False, help="Should save the score")
+
+        parser.add_argument("--id", default=False, help="Only perform for specific id")
+
+        parser.add_argument("--start_date", help="Perform for date starting")
 
     def handle(self, *args, **options):
-        docs = ResearchhubUnifiedDocument.objects.filter(is_removed=False).order_by('id')
+        docs = ResearchhubUnifiedDocument.objects.filter(is_removed=False).order_by(
+            "id"
+        )
 
-        print('Recalculating hot score')
+        print("Recalculating hot score")
         save = False
 
-        if options['save']:
+        if options["save"]:
             save = True
-        if options['id']:
-            docs = docs.filter(id=options['id'])
-        if options['start_date']:
+        if options["id"]:
+            docs = docs.filter(id=options["id"])
+        if options["start_date"]:
             print(f"Calculating for docs GTE: {options['start_date']}")
-            start_date = parser.parse(options['start_date'])
+            start_date = parser.parse(options["start_date"])
             docs = docs.filter(created_date__gte=start_date)
 
         count = docs.count()
         for i, doc in enumerate(docs):
             try:
-                if doc.document_type.upper() in ['DISCUSSION', 'PAPER']:
-                    hot_score_tpl = doc.calculate_hot_score_v2(should_save=save)
+                if doc.document_type.upper() in ["DISCUSSION", "PAPER"]:
+                    hot_score_tpl = doc.calculate_hot_score(should_save=save)
 
-                    print(f'Doc: {doc.id}, {doc.document_type}, score: {hot_score_tpl[0]} - {i + 1}/{count}')
+                    print(
+                        f"Doc: {doc.id}, {doc.document_type}, score: {hot_score_tpl[0]} - {i + 1}/{count}"
+                    )
 
             except Exception as e:
-                print(f'Error updating score for paper: {doc.id}', e)
+                print(f"Error updating score for paper: {doc.id}", e)
@@ -518,12 +518,6 @@ def hot_score(self):
             return self.score
         return self.unified_document.hot_score
 
-    @property
-    def hot_score_v2(self):
-        if self.unified_document is None:
-            return self.score
-        return self.unified_document.hot_score_v2
-
     @property
     def votes_indexing(self):
         all_votes = self.votes.all()
@@ -552,112 +546,6 @@ def get_accepted_answer(self):
             is_accepted_answer=True, discussion_post_type="ANSWER"
         ).first()
 
-    def calculate_hot_score(paper):
-        ALGO_START_UNIX = 1546329600
-        TIME_DIV = 3600000
-        HOUR_SECONDS = 86400
-        DATE_BOOST = 10
-
-        boosts = paper.purchases.filter(
-            paid_status=Purchase.PAID,
-            amount__gt=0,
-            user__moderator=True,
-            boost_time__gte=0,
-        )
-        today = datetime.datetime.now(tz=pytz.utc).replace(hour=0, minute=0, second=0)
-        score = paper.score
-        if score is None:
-            return 0
-
-        unified_doc = paper.unified_document
-        if unified_doc is None:
-            return score
-
-        original_uploaded_date = paper.created_date
-        uploaded_date = original_uploaded_date
-        day_delta = datetime.timedelta(days=2)
-        timeframe = today - day_delta
-
-        if original_uploaded_date > timeframe:
-            uploaded_date = timeframe.replace(
-                hour=original_uploaded_date.hour,
-                minute=original_uploaded_date.minute,
-                second=original_uploaded_date.second,
-            )
-
-        votes = paper.votes
-        if votes.exists():
-            vote_avg_epoch = (
-                paper.votes.aggregate(
-                    avg=Avg(
-                        Extract("created_date", "epoch"),
-                        output_field=models.IntegerField(),
-                    )
-                )["avg"]
-                or 0
-            )
-            num_votes = votes.count()
-        else:
-            num_votes = 0
-            vote_avg_epoch = timeframe.timestamp()
-
-        vote_avg = (max(0, vote_avg_epoch - ALGO_START_UNIX)) / TIME_DIV
-
-        base_score = paper_piecewise_log(score + 1)
-        uploaded_date_score = uploaded_date.timestamp() / TIME_DIV
-        vote_score = paper_piecewise_log(num_votes + 1)
-        discussion_score = paper_piecewise_log(paper.discussion_count + 1)
-
-        # Why we log delta days
-        # Ex: If paper 1 was uploaded 3 days ago with a low score and paper
-        # 2 was uploaded 4 days ago with a very high score, paper 2 will
-        # appear higher in the feed than paper 1. If we remove the delta
-        # days log, paper 1 will appear higher just because time is linear,
-        # and it gives a it better score
-
-        if original_uploaded_date > timeframe:
-            uploaded_date_delta = original_uploaded_date - timeframe
-            delta_days = (
-                paper_piecewise_log(uploaded_date_delta.total_seconds() / HOUR_SECONDS)
-                * DATE_BOOST
-            )
-            uploaded_date_score += delta_days
-        else:
-            uploaded_date_delta = timeframe - original_uploaded_date
-            delta_days = (
-                -paper_piecewise_log(
-                    (uploaded_date_delta.total_seconds() / HOUR_SECONDS) + 1
-                )
-                * DATE_BOOST
-            )
-            uploaded_date_score += delta_days
-
-        boost_score = 0
-        if boosts.exists():
-            boost_amount = sum(map(int, boosts.values_list("amount", flat=True)))
-            boost_score = paper_piecewise_log(boost_amount + 1)
-
-        hot_score = (
-            base_score
-            + uploaded_date_score
-            + vote_avg
-            + vote_score
-            + discussion_score
-            + boost_score
-        ) * 1000
-
-        completeness = paper.completeness
-        if completeness == paper.COMPLETE:
-            hot_score *= 1
-        elif completeness == paper.PARTIAL:
-            hot_score *= 0.95
-        else:
-            hot_score *= 0.90
-
-        unified_doc.hot_score = hot_score
-        paper.save()
-        return hot_score
-
     def get_promoted_score(paper):
         purchases = paper.purchases.filter(
             paid_status=Purchase.PAID, amount__gt=0, boost_time__gt=0

@@ -37,7 +37,6 @@ def add_unified_doc(created, instance, **kwargs):
             try:
                 unified_doc = ResearchhubUnifiedDocument.objects.create(
                     document_type=PAPER_DOC_TYPE,
-                    hot_score=instance.calculate_hot_score(),
                     score=instance.score,
                 )
                 unified_doc.hubs.add(*instance.hubs.all())

@@ -34,11 +34,11 @@ def create_paper(
     )
     unified_doc = ResearchhubUnifiedDocument.objects.create(
         document_type=PAPER_DOC_TYPE,
-        hot_score=paper.calculate_hot_score(),
         score=paper.score,
     )
     paper.unified_document = unified_doc
     paper.save()
+
     return paper
 
 

@@ -11,16 +11,11 @@
 
 @app.task
 def update_purchases():
-    PAPER_CONTENT_TYPE = ContentType.objects.get(app_label="paper", model="paper")
     purchases = Purchase.objects.filter(boost_time__gt=0)
     for purchase in purchases:
         purchase.boost_time = purchase.get_boost_time()
         purchase.save()
 
-        if purchase.content_type == PAPER_CONTENT_TYPE:
-            paper = PAPER_CONTENT_TYPE.get_object_for_this_type(id=purchase.object_id)
-            paper.calculate_hot_score()
-
 
 @app.task(queue=QUEUE_NOTIFICATION)
 def send_support_email(

@@ -177,7 +177,6 @@ def create(self, request):
             if content_type_str == "paper":
                 paper = Paper.objects.get(id=object_id)
                 unified_doc = paper.unified_document
-                paper.calculate_hot_score()
                 recipient = paper.uploaded_by
                 cache_key = get_cache_key("paper", object_id)
                 cache.delete(cache_key)

@@ -434,4 +434,4 @@
     open_bounties = Bounty.objects.filter(status=Bounty.OPEN)
 
     for bounty in open_bounties:
-        bounty.unified_document.calculate_hot_score_v2(should_save=True)
+        bounty.unified_document.calculate_hot_score(should_save=True)
@@ -256,7 +256,7 @@ def ordering_filter(self, qs, name, value):
             qs = qs.filter(created_date__range=(start_date, end_date))
             ordering.append("-created_date")
         elif value == HOT:
-            ordering.append("-hot_score_v2")
+            ordering.append("-hot_score")
         elif value == DISCUSSED:
             key = f"document_filter__discussed_{time_scope}"
             if time_scope != "all":

@@ -4,7 +4,6 @@
 from django.db.models import Q
 
 from reputation.related_models.bounty import Bounty
-from researchhub_document.related_models.constants.document_type import PAPER
 
 
 class HotScoreMixin:
@@ -27,7 +26,6 @@
 
     def _get_time_score(self, date):
         num_seconds_in_half_day = 43000
-        num_seconds_in_one_day = 86000
 
         input_date = date.replace(tzinfo=None)
         epoch_date = datetime.datetime(2020, 1, 1)
@@ -58,7 +56,7 @@
         total_bounty_score = 0
 
         try:
-            bounty_promo_period = three_days_in_seconds = 259200
+            bounty_promo_period = 259200
             open_bounties = Bounty.objects.filter(
                 unified_document_id=self.id, status=Bounty.OPEN
             )
@@ -114,7 +112,7 @@
     # add them to some time score elapsed since the epoch. The signals should be
     # somewhat comparable to the time score. To do that, we pass these signals through
     # log functions so that scores don't grow out of control.
-    def calculate_hot_score_v2(self, should_save=False):
+    def calculate_hot_score(self, should_save=False):
         MIN_REQ_DISCUSSIONS = 1
         hot_score = 0
         doc = self.get_document()
@@ -168,7 +166,7 @@
         }
 
         if should_save:
-            self.hot_score_v2 = hot_score
+            self.hot_score = hot_score
             self.save()
 
         return (hot_score, debug_obj)
@@ -0,0 +1,20 @@
+# Generated by Django 5.1.5 on 2025-02-28 13:18
+
+from django.db import migrations
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        (
+            "researchhub_document",
+            "0064_researchhubunifieddocument_doc_type_hot_score_idx",
+        ),
+    ]
+
+    operations = [
+        migrations.RemoveField(
+            model_name="researchhubunifieddocument",
+            name="hot_score",
+        ),
+    ]
@@ -0,0 +1,62 @@
+# Generated by Django 5.1.5 on 2025-02-28 13:55
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("hub", "0028_hub_hub_hub_name_upper_idx"),
+        ("researchhub_document", "0065_remove_researchhubunifieddocument_hot_score"),
+        ("tag", "0004_remove_concept_hub"),
+        ("topic", "0008_alter_unifieddocumenttopics_unique_together"),
+    ]
+
+    operations = [
+        migrations.RemoveIndex(
+            model_name="researchhubunifieddocument",
+            name="idx_unified_doc_hot_score_v2",
+        ),
+        migrations.RemoveIndex(
+            model_name="researchhubunifieddocument",
+            name="idx_document_type_hot_score",
+        ),
+        migrations.RemoveIndex(
+            model_name="researchhubunifieddocument",
+            name="idx_paper_filter_sort",
+        ),
+        migrations.RemoveIndex(
+            model_name="researchhubunifieddocument",
+            name="doc_type_hot_score_idx",
+        ),
+        migrations.RenameField(
+            model_name="researchhubunifieddocument",
+            old_name="hot_score_v2",
+            new_name="hot_score",
+        ),
+        migrations.AddIndex(
+            model_name="researchhubunifieddocument",
+            index=models.Index(
+                fields=["document_type", "-hot_score"], name="doc_type_hot_score_idx"
+            ),
+        ),
+        migrations.AddIndex(
+            model_name="researchhubunifieddocument",
+            index=models.Index(
+                condition=models.Q(("document_type", "PAPER")),
+                fields=["is_removed", "document_type", "hot_score", "document_filter"],
+                name="idx_paper_filter_sort",
+            ),
+        ),
+        migrations.AddIndex(
+            model_name="researchhubunifieddocument",
+            index=models.Index(fields=["hot_score"], name="idx_unified_doc_hot_score"),
+        ),
+        migrations.AddIndex(
+            model_name="researchhubunifieddocument",
+            index=models.Index(
+                fields=["is_removed", "document_type", "hot_score"],
+                name="idx_document_type_hot_score",
+            ),
+        ),
+    ]