Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove v1 of hot_score from unified document #2146

Merged
merged 3 commits into from
Mar 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/discussion/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,6 @@ def update_discussion_count(self):
paper = self.paper
if paper:
new_dis_count = paper.get_discussion_count()
paper.calculate_hot_score()

paper.discussion_count = new_dis_count
paper.save(update_fields=["discussion_count"])
Expand Down
47 changes: 20 additions & 27 deletions src/paper/management/commands/recalc_hot_score.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,45 +7,38 @@
class Command(BaseCommand):

def add_arguments(self, parser):
parser.add_argument(
'--save',
default=False,
help='Should save the score'
)

parser.add_argument(
'--id',
default=False,
help='Only perform for specific id'
)

parser.add_argument(
'--start_date',
help='Perform for date starting'
)
parser.add_argument("--save", default=False, help="Should save the score")

parser.add_argument("--id", default=False, help="Only perform for specific id")

parser.add_argument("--start_date", help="Perform for date starting")

def handle(self, *args, **options):
docs = ResearchhubUnifiedDocument.objects.filter(is_removed=False).order_by('id')
docs = ResearchhubUnifiedDocument.objects.filter(is_removed=False).order_by(
"id"
)

print('Recalculating hot score')
print("Recalculating hot score")
save = False

if options['save']:
if options["save"]:
save = True
if options['id']:
docs = docs.filter(id=options['id'])
if options['start_date']:
if options["id"]:
docs = docs.filter(id=options["id"])
if options["start_date"]:
print(f"Calculating for docs GTE: {options['start_date']}")
start_date = parser.parse(options['start_date'])
start_date = parser.parse(options["start_date"])
docs = docs.filter(created_date__gte=start_date)

count = docs.count()
for i, doc in enumerate(docs):
try:
if doc.document_type.upper() in ['DISCUSSION', 'PAPER']:
hot_score_tpl = doc.calculate_hot_score_v2(should_save=save)
if doc.document_type.upper() in ["DISCUSSION", "PAPER"]:
hot_score_tpl = doc.calculate_hot_score(should_save=save)

print(f'Doc: {doc.id}, {doc.document_type}, score: {hot_score_tpl[0]} - {i + 1}/{count}')
print(
f"Doc: {doc.id}, {doc.document_type}, score: {hot_score_tpl[0]} - {i + 1}/{count}"
)

except Exception as e:
print(f'Error updating score for paper: {doc.id}', e)
print(f"Error updating score for paper: {doc.id}", e)
112 changes: 0 additions & 112 deletions src/paper/related_models/paper_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,12 +518,6 @@ def hot_score(self):
return self.score
return self.unified_document.hot_score

@property
def hot_score_v2(self):
if self.unified_document is None:
return self.score
return self.unified_document.hot_score_v2

@property
def votes_indexing(self):
all_votes = self.votes.all()
Expand Down Expand Up @@ -552,112 +546,6 @@ def get_accepted_answer(self):
is_accepted_answer=True, discussion_post_type="ANSWER"
).first()

def calculate_hot_score(paper):
ALGO_START_UNIX = 1546329600
TIME_DIV = 3600000
HOUR_SECONDS = 86400
DATE_BOOST = 10

boosts = paper.purchases.filter(
paid_status=Purchase.PAID,
amount__gt=0,
user__moderator=True,
boost_time__gte=0,
)
today = datetime.datetime.now(tz=pytz.utc).replace(hour=0, minute=0, second=0)
score = paper.score
if score is None:
return 0

unified_doc = paper.unified_document
if unified_doc is None:
return score

original_uploaded_date = paper.created_date
uploaded_date = original_uploaded_date
day_delta = datetime.timedelta(days=2)
timeframe = today - day_delta

if original_uploaded_date > timeframe:
uploaded_date = timeframe.replace(
hour=original_uploaded_date.hour,
minute=original_uploaded_date.minute,
second=original_uploaded_date.second,
)

votes = paper.votes
if votes.exists():
vote_avg_epoch = (
paper.votes.aggregate(
avg=Avg(
Extract("created_date", "epoch"),
output_field=models.IntegerField(),
)
)["avg"]
or 0
)
num_votes = votes.count()
else:
num_votes = 0
vote_avg_epoch = timeframe.timestamp()

vote_avg = (max(0, vote_avg_epoch - ALGO_START_UNIX)) / TIME_DIV

base_score = paper_piecewise_log(score + 1)
uploaded_date_score = uploaded_date.timestamp() / TIME_DIV
vote_score = paper_piecewise_log(num_votes + 1)
discussion_score = paper_piecewise_log(paper.discussion_count + 1)

# Why we log delta days
# Ex: If paper 1 was uploaded 3 days ago with a low score and paper
# 2 was uploaded 4 days ago with a very high score, paper 2 will
# appear higher in the feed than paper 1. If we remove the delta
# days log, paper 1 will appear higher just because time is linear,
# and it gives a it better score

if original_uploaded_date > timeframe:
uploaded_date_delta = original_uploaded_date - timeframe
delta_days = (
paper_piecewise_log(uploaded_date_delta.total_seconds() / HOUR_SECONDS)
* DATE_BOOST
)
uploaded_date_score += delta_days
else:
uploaded_date_delta = timeframe - original_uploaded_date
delta_days = (
-paper_piecewise_log(
(uploaded_date_delta.total_seconds() / HOUR_SECONDS) + 1
)
* DATE_BOOST
)
uploaded_date_score += delta_days

boost_score = 0
if boosts.exists():
boost_amount = sum(map(int, boosts.values_list("amount", flat=True)))
boost_score = paper_piecewise_log(boost_amount + 1)

hot_score = (
base_score
+ uploaded_date_score
+ vote_avg
+ vote_score
+ discussion_score
+ boost_score
) * 1000

completeness = paper.completeness
if completeness == paper.COMPLETE:
hot_score *= 1
elif completeness == paper.PARTIAL:
hot_score *= 0.95
else:
hot_score *= 0.90

unified_doc.hot_score = hot_score
paper.save()
return hot_score

def get_promoted_score(paper):
purchases = paper.purchases.filter(
paid_status=Purchase.PAID, amount__gt=0, boost_time__gt=0
Expand Down
1 change: 0 additions & 1 deletion src/paper/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ def add_unified_doc(created, instance, **kwargs):
try:
unified_doc = ResearchhubUnifiedDocument.objects.create(
document_type=PAPER_DOC_TYPE,
hot_score=instance.calculate_hot_score(),
score=instance.score,
)
unified_doc.hubs.add(*instance.hubs.all())
Expand Down
2 changes: 1 addition & 1 deletion src/paper/tests/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ def create_paper(
)
unified_doc = ResearchhubUnifiedDocument.objects.create(
document_type=PAPER_DOC_TYPE,
hot_score=paper.calculate_hot_score(),
score=paper.score,
)
paper.unified_document = unified_doc
paper.save()

return paper


Expand Down
5 changes: 0 additions & 5 deletions src/purchase/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,11 @@

@app.task
def update_purchases():
PAPER_CONTENT_TYPE = ContentType.objects.get(app_label="paper", model="paper")
purchases = Purchase.objects.filter(boost_time__gt=0)
for purchase in purchases:
purchase.boost_time = purchase.get_boost_time()
purchase.save()

if purchase.content_type == PAPER_CONTENT_TYPE:
paper = PAPER_CONTENT_TYPE.get_object_for_this_type(id=purchase.object_id)
paper.calculate_hot_score()


@app.task(queue=QUEUE_NOTIFICATION)
def send_support_email(
Expand Down
1 change: 0 additions & 1 deletion src/purchase/views/purchase_view.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,6 @@ def create(self, request):
if content_type_str == "paper":
paper = Paper.objects.get(id=object_id)
unified_doc = paper.unified_document
paper.calculate_hot_score()
recipient = paper.uploaded_by
cache_key = get_cache_key("paper", object_id)
cache.delete(cache_key)
Expand Down
2 changes: 1 addition & 1 deletion src/reputation/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,4 +434,4 @@
open_bounties = Bounty.objects.filter(status=Bounty.OPEN)

for bounty in open_bounties:
bounty.unified_document.calculate_hot_score_v2(should_save=True)
bounty.unified_document.calculate_hot_score(should_save=True)

Check warning on line 437 in src/reputation/tasks.py

View check run for this annotation

Codecov / codecov/patch

src/reputation/tasks.py#L437

Added line #L437 was not covered by tests
2 changes: 1 addition & 1 deletion src/researchhub_document/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ def ordering_filter(self, qs, name, value):
qs = qs.filter(created_date__range=(start_date, end_date))
ordering.append("-created_date")
elif value == HOT:
ordering.append("-hot_score_v2")
ordering.append("-hot_score")
elif value == DISCUSSED:
key = f"document_filter__discussed_{time_scope}"
if time_scope != "all":
Expand Down
8 changes: 3 additions & 5 deletions src/researchhub_document/hot_score_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from django.db.models import Q

from reputation.related_models.bounty import Bounty
from researchhub_document.related_models.constants.document_type import PAPER


class HotScoreMixin:
Expand All @@ -27,7 +26,6 @@

def _get_time_score(self, date):
num_seconds_in_half_day = 43000
num_seconds_in_one_day = 86000

input_date = date.replace(tzinfo=None)
epoch_date = datetime.datetime(2020, 1, 1)
Expand Down Expand Up @@ -58,7 +56,7 @@
total_bounty_score = 0

try:
bounty_promo_period = three_days_in_seconds = 259200
bounty_promo_period = 259200

Check warning on line 59 in src/researchhub_document/hot_score_mixin.py

View check run for this annotation

Codecov / codecov/patch

src/researchhub_document/hot_score_mixin.py#L59

Added line #L59 was not covered by tests
open_bounties = Bounty.objects.filter(
unified_document_id=self.id, status=Bounty.OPEN
)
Expand Down Expand Up @@ -114,7 +112,7 @@
# add them to some time score elapsed since the epoch. The signals should be
# somewhat comparable to the time score. To do that, we pass these signals through
# log functions so that scores don't grow out of control.
def calculate_hot_score_v2(self, should_save=False):
def calculate_hot_score(self, should_save=False):
MIN_REQ_DISCUSSIONS = 1
hot_score = 0
doc = self.get_document()
Expand Down Expand Up @@ -168,7 +166,7 @@
}

if should_save:
self.hot_score_v2 = hot_score
self.hot_score = hot_score

Check warning on line 169 in src/researchhub_document/hot_score_mixin.py

View check run for this annotation

Codecov / codecov/patch

src/researchhub_document/hot_score_mixin.py#L169

Added line #L169 was not covered by tests
self.save()

return (hot_score, debug_obj)
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Generated by Django 5.1.5 on 2025-02-28 13:18

from django.db import migrations


class Migration(migrations.Migration):

dependencies = [
(
"researchhub_document",
"0064_researchhubunifieddocument_doc_type_hot_score_idx",
),
]

operations = [
migrations.RemoveField(
model_name="researchhubunifieddocument",
name="hot_score",
),
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Generated by Django 5.1.5 on 2025-02-28 13:55

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("hub", "0028_hub_hub_hub_name_upper_idx"),
("researchhub_document", "0065_remove_researchhubunifieddocument_hot_score"),
("tag", "0004_remove_concept_hub"),
("topic", "0008_alter_unifieddocumenttopics_unique_together"),
]

operations = [
migrations.RemoveIndex(
model_name="researchhubunifieddocument",
name="idx_unified_doc_hot_score_v2",
),
migrations.RemoveIndex(
model_name="researchhubunifieddocument",
name="idx_document_type_hot_score",
),
migrations.RemoveIndex(
model_name="researchhubunifieddocument",
name="idx_paper_filter_sort",
),
migrations.RemoveIndex(
model_name="researchhubunifieddocument",
name="doc_type_hot_score_idx",
),
migrations.RenameField(
model_name="researchhubunifieddocument",
old_name="hot_score_v2",
new_name="hot_score",
),
migrations.AddIndex(
model_name="researchhubunifieddocument",
index=models.Index(
fields=["document_type", "-hot_score"], name="doc_type_hot_score_idx"
),
),
migrations.AddIndex(
model_name="researchhubunifieddocument",
index=models.Index(
condition=models.Q(("document_type", "PAPER")),
fields=["is_removed", "document_type", "hot_score", "document_filter"],
name="idx_paper_filter_sort",
),
),
migrations.AddIndex(
model_name="researchhubunifieddocument",
index=models.Index(fields=["hot_score"], name="idx_unified_doc_hot_score"),
),
migrations.AddIndex(
model_name="researchhubunifieddocument",
index=models.Index(
fields=["is_removed", "document_type", "hot_score"],
name="idx_document_type_hot_score",
),
),
]
Loading