diff --git a/forum/__init__.py b/forum/__init__.py index 3a5afaa3..44c360c3 100644 --- a/forum/__init__.py +++ b/forum/__init__.py @@ -2,4 +2,4 @@ Openedx forum app. """ -__version__ = "0.3.8" +__version__ = "0.3.9" diff --git a/forum/admin.py b/forum/admin.py index 3d624a8e..c1ff0239 100644 --- a/forum/admin.py +++ b/forum/admin.py @@ -14,6 +14,7 @@ UserVote, Subscription, MongoContent, + ModerationAuditLog, ) @@ -55,11 +56,12 @@ class CommentThreadAdmin(admin.ModelAdmin): # type: ignore "context", "closed", "pinned", + "is_spam", "created_at", "updated_at", ) search_fields = ("title", "body", "author__username", "course_id") - list_filter = ("thread_type", "context", "closed", "pinned") + list_filter = ("thread_type", "context", "closed", "pinned", "is_spam") @admin.register(Comment) @@ -74,9 +76,10 @@ class CommentAdmin(admin.ModelAdmin): # type: ignore "updated_at", "endorsed", "anonymous", + "is_spam", ) search_fields = ("body", "author__username", "comment_thread__title") - list_filter = ("endorsed", "anonymous") + list_filter = ("endorsed", "anonymous", "is_spam") @admin.register(EditHistory) @@ -152,3 +155,100 @@ class MongoContentAdmin(admin.ModelAdmin): # type: ignore list_display = ("mongo_id", "content_object_id", "content_type") search_fields = ("mongo_id",) + + +@admin.register(ModerationAuditLog) +class ModerationAuditLogAdmin(admin.ModelAdmin): # type: ignore + """Admin interface for ModerationAuditLog model.""" + + list_display = ( + "timestamp", + "classification", + "actions_taken", + "body_preview", + "original_author", + "moderator_override", + "confidence_score", + ) + list_filter = ( + "classification", + "moderator_override", + "timestamp", + ) + search_fields = ( + "original_author__username", + "moderator__username", + "reasoning", + "override_reason", + "body", + ) + readonly_fields = ( + "timestamp", + "body", + "classifier_output", + "reasoning", + "classification", + "actions_taken", + "confidence_score", + "original_author", + ) + fieldsets = ( + ( + "Moderation Decision", + { + "fields": ( + "timestamp", + "classification", + "actions_taken", + "confidence_score", + "reasoning", + ) + }, + ), + ("Content Information", {"fields": ("body",)}), + ("Author Information", {"fields": ("original_author",)}), + ( + "Human Override", + { + "fields": ( + "moderator_override", + "moderator", + "override_reason", + ) + }, + ), + ( + "Technical Details", + { + "fields": ("classifier_output",), + "classes": ("collapse",), + }, + ), + ) + + def body_preview(self, obj): # type: ignore + """Return a truncated preview of the body for list display.""" + if obj.body: + return obj.body[:100] + "..." if len(obj.body) > 100 else obj.body + return "-" + + body_preview.short_description = "Body Preview" # type: ignore + + # pylint: disable=unused-argument + def has_add_permission(self, request): # type: ignore[no-untyped-def] + """Disable adding audit logs manually.""" + return False + + # pylint: disable=unused-argument + def has_delete_permission(self, request, obj=None): # type: ignore[no-untyped-def] + """Disable deleting audit logs to maintain integrity.""" + return False + + def get_queryset(self, request): # type: ignore + """Optimize queryset with related objects.""" + return ( + super() + .get_queryset(request) + .select_related("original_author", "moderator") + .order_by("-timestamp") + ) diff --git a/forum/ai_moderation.py b/forum/ai_moderation.py new file mode 100644 index 00000000..5e9233e8 --- /dev/null +++ b/forum/ai_moderation.py @@ -0,0 +1,319 @@ +""" +AI Moderation utilities for forum content. +""" + +import json +import logging +from typing import Dict, Optional, Any + +import requests +from django.conf import settings +from django.contrib.auth import get_user_model +from django.utils import timezone +from opaque_keys.edx.keys import CourseKey + +from forum.backends.mysql.models import ModerationAuditLog + +User = get_user_model() +log = logging.getLogger(__name__) + + +def _get_author_from_content(content_instance: Any) -> Any: + """ + Get author from content instance. + + Args: + content_instance: Dict containing all content related data + Returns: + Author object or user ID + """ + author_id = content_instance.get("author_id") + if author_id: + try: + return User.objects.get(pk=author_id) + except (User.DoesNotExist, ValueError, TypeError): + # If we can't get the User object, return the ID as fallback + return author_id + return None + + +def create_moderation_audit_log( + content_instance: Any, + moderation_result: Dict[str, Any], + actions_taken: list[str], + original_author: Any, +) -> None: + """ + Create an audit log entry for AI moderation decisions. + + Only creates audit logs for spam content to reduce database load. + + Args: + content_instance: The content object (Thread or Comment, dict or model) + moderation_result: Full result from AI moderation + actions_taken: List of actions taken (e.g., ['flagged'], ['flagged', 'soft_deleted']) + original_author: User who created the content + """ + if original_author is None: + original_author = _get_author_from_content(content_instance) + + content_id = str(content_instance.get("_id")) + content_body = content_instance.get("body", "") + + enhanced_moderation_result = moderation_result.copy() + enhanced_moderation_result.update( + { + "content_id": content_id, + "metadata": { + "_id": content_id, + "title": content_instance.get("title", ""), + "body": ( + content_instance.get("body", "")[:200] + "..." + if len(content_instance.get("body", "")) > 200 + else content_instance.get("body", "") + ), + "course_id": content_instance.get("course_id", ""), + "created_at": str(content_instance.get("created_at", "")), + }, + } + ) + + try: + audit_log = ModerationAuditLog( + timestamp=timezone.now(), + body=content_body, # Store full body content + classifier_output=enhanced_moderation_result, + reasoning=moderation_result.get("reasoning", "No reasoning provided"), + classification=moderation_result.get("classification", "spam"), + actions_taken=actions_taken, + confidence_score=moderation_result.get("confidence_score"), + original_author=original_author, + ) + audit_log.save() + except (ValueError, TypeError, AttributeError) as db_error: + log.error(f"Failed to create database audit log: {db_error}") + + +class AIModerationService: + """ + Service for AI-based content moderation. + + Waffle Flag "discussion.enable_ai_moderation" controls whether AI moderation is active. + + XPERT AI Moderation API is used to classify content as spam or not spam. + """ + + def __init__(self): # type: ignore[no-untyped-def] + """Initialize the AI moderation service.""" + self.api_url = getattr(settings, "AI_MODERATION_API_URL", None) + self.client_id = getattr(settings, "AI_MODERATION_CLIENT_ID", None) + self.system_message = getattr(settings, "AI_MODERATION_SYSTEM_MESSAGE", None) + self.connection_timeout = getattr( + settings, "AI_MODERATION_CONNECTION_TIMEOUT", 30 + ) # seconds + self.read_timeout = getattr( + settings, "AI_MODERATION_READ_TIMEOUT", 30 + ) # seconds + self.ai_moderation_user_id = getattr(settings, "AI_MODERATION_USER_ID", None) + + def _make_api_request(self, content: str) -> Optional[Dict[str, Any]]: + """ + Make API request to XPert Service. + + Args: + content: The text content to moderate + + Returns: + Dictionary with 'reasoning' and 'classification' keys, or None if failed + """ + if not self.api_url: + log.error("AI_MODERATION_API_URL setting is not configured") + return None + + headers = { + "accept": "*/*", + "accept-language": "en-US,en;q=0.9", + "content-type": "application/json", + "user-agent": "Mozilla/5.0 (compatible; edX-Forum-AI-Moderation/1.0)", + } + + payload = { + "messages": [{"role": "user", "content": content}], + "client_id": self.client_id, + "system_message": self.system_message, + } + + try: + response = requests.post( + self.api_url, + headers=headers, + json=payload, + timeout=(self.connection_timeout, self.read_timeout), + ) + response.raise_for_status() + + response_data = response.json() + # Validate response data structure + if not isinstance(response_data, list): + log.error( + f"Expected list response from XPert API, got {type(response_data)}" + ) + return None + + if len(response_data) == 0: + log.error("Empty response list from XPert API") + return None + + if not isinstance(response_data[0], dict): + log.error( + f"Expected dict in response list, got {type(response_data[0])}" + ) + return None + + assistant_content = response_data[0].get("content", "") + # Parse the JSON content from the assistant response + try: + moderation_result = json.loads(assistant_content) + # full API response for audit purposes + moderation_result["full_api_response"] = response_data + return moderation_result + except json.JSONDecodeError as e: + log.error(f"Failed to parse AI moderation response JSON: {e}") + return None + except ( + requests.RequestException, + requests.Timeout, + requests.ConnectionError, + ) as e: + log.error(f"AI moderation API request failed: {e}") + return None + + def moderate_and_flag_content( + self, + content: str, + content_instance: Any, + course_id: Optional[str] = None, + backend: Optional[Any] = None, + ) -> Dict[str, Any]: + """ + Moderate content and flag as spam and flag abuse if detected. + + Args: + content: The text content to check + content_instance: The content model instance (Thread or Comment) + course_id: Optional course ID for waffle flag checking + backend: Backend instance for database operations + + Returns: + Dictionary with moderation results and actions taken + """ + result = { + "is_spam": False, + "reasoning": "AI moderation disabled or unavailable", + "classification": "not_spam", + "actions_taken": ["no_action"], + "flagged": False, + } + # Check if AI moderation is enabled + # pylint: disable=import-outside-toplevel + from forum.toggles import ( + is_ai_moderation_enabled, + ) + + course_key = CourseKey.from_string(course_id) if course_id else None + if not is_ai_moderation_enabled(course_key): # type: ignore[no-untyped-call] + return result + + # Make API request + moderation_result = self._make_api_request(content) + + if moderation_result is None: + result["reasoning"] = "AI moderation API failed" + log.warning("AI moderation API failed") + return result + + classification = moderation_result.get("classification", "not_spam") + reasoning = moderation_result.get("reasoning", "No reasoning provided") + is_spam = classification in ["spam", "spam_or_scam"] + + result.update( + { + "is_spam": is_spam, + "reasoning": reasoning, + "classification": classification, + "moderation_result": moderation_result, + } + ) + + if is_spam: + try: + content_instance["is_spam"] = True + + self._mark_as_spam_and_flag_abuse(content_instance, backend) + + result["actions_taken"] = ["flagged"] + result["flagged"] = True + except (AttributeError, ValueError, TypeError) as e: + log.error(f"Failed to flag content as spam: {e}") + result["actions_taken"] = ["no_action"] + else: + result["actions_taken"] = ["no_action"] + + # Only create audit log for spam content (or API failures, handled above) + if is_spam: + create_moderation_audit_log( + content_instance, + moderation_result, + result["actions_taken"], # type: ignore[arg-type] + _get_author_from_content(content_instance), + ) + return result + + def _mark_as_spam_and_flag_abuse(self, content_instance: Any, backend: Any) -> None: + """Flag content as abuse using backend methods.""" + content_id = str(content_instance.get("_id")) + content_type = str(content_instance.get("_type")) + extra_data = { + "entity_type": ( + "CommentThread" if content_type == "CommentThread" else "Comment" + ) + } + try: + if not self.ai_moderation_user_id: + raise ValueError("AI_MODERATION_USER_ID setting is not configured.") + backend.flag_content_as_spam(content_type, content_id) + backend.flag_as_abuse( + str(self.ai_moderation_user_id), content_id, **extra_data + ) + except (AttributeError, ValueError, TypeError, ImportError) as e: + log.error(f"Failed to flag content via backend: {e}") + + +# Global instance +ai_moderation_service = AIModerationService() # type: ignore[no-untyped-call] + + +def moderate_and_flag_spam( + content: str, + content_instance: Any, + course_id: Optional[str] = None, + backend: Optional[Any] = None, +) -> Dict[str, Any]: + """ + Moderate content and flag as spam if detected. + + Args: + content: The text content to moderate + content_instance: The content model instance + course_id: Optional course ID for waffle flag checking + backend: Backend instance for database operations + + Returns: + Dictionary with moderation results and actions taken + + TODO:- + - Add content check for images + """ + return ai_moderation_service.moderate_and_flag_content( + content, content_instance, course_id, backend + ) diff --git a/forum/api/comments.py b/forum/api/comments.py index edc14a1c..38a9a8bd 100644 --- a/forum/api/comments.py +++ b/forum/api/comments.py @@ -9,6 +9,7 @@ from django.core.exceptions import ObjectDoesNotExist from rest_framework.serializers import ValidationError +from forum.ai_moderation import moderate_and_flag_spam from forum.backend import get_backend from forum.serializers.comment import CommentSerializer from forum.utils import ForumV2RequestError @@ -129,13 +130,21 @@ def create_child_comment( log.error("Forumv2RequestError for create child comment request.") raise ForumV2RequestError("comment is not created") + # AI Moderation: Check for spam after successful creation + try: + moderate_and_flag_spam(body, comment, course_id, backend) + # Get the updated comment after AI moderation + comment = backend.get_comment(comment_id) + except Exception as e: # pylint: disable=broad-except + log.error(f"AI moderation failed for child comment {comment_id}: {e}") + user = backend.get_user(user_id) thread = backend.get_thread(parent_comment["comment_thread_id"]) if user and thread and comment: backend.mark_as_read(user_id, parent_comment["comment_thread_id"]) try: comment_data = prepare_comment_api_response( - comment, + comment, # type: ignore[arg-type] backend, exclude_fields=["endorsement", "sk"], ) @@ -291,6 +300,13 @@ def create_parent_comment( log.error("Forumv2RequestError for create parent comment request.") raise ForumV2RequestError("comment is not created") comment = backend.get_comment(comment_id) or {} + try: + moderate_and_flag_spam(body, comment, course_id, backend) + # Get the updated comment after AI moderation + comment = backend.get_comment(comment_id) # type: ignore[assignment] + except Exception as e: # pylint: disable=broad-except + log.error(f"AI moderation failed for parent comment {comment_id}: {e}") + user = backend.get_user(user_id) if user and comment: backend.mark_as_read(user_id, thread_id) diff --git a/forum/api/threads.py b/forum/api/threads.py index b5b036c5..5eb60768 100644 --- a/forum/api/threads.py +++ b/forum/api/threads.py @@ -8,6 +8,7 @@ from django.core.exceptions import ObjectDoesNotExist from rest_framework.serializers import ValidationError +from forum.ai_moderation import moderate_and_flag_spam from forum.api.users import mark_thread_as_read from forum.backend import get_backend from forum.serializers.thread import ThreadSerializer @@ -327,15 +328,22 @@ def create_thread( thread = backend.get_thread(thread_id) if not thread: raise ForumV2RequestError(f"Failed to create thread with data: {data}") + try: + combined_content = f"{title}\n\n{body}" + moderate_and_flag_spam(combined_content, thread, course_id, backend) + # Get the updated thread after AI moderation + thread = backend.get_thread(thread_id) + except Exception as e: # pylint: disable=broad-except + log.error(f"AI moderation failed for thread {thread_id}: {e}") if not (anonymous or anonymous_to_peers): backend.update_stats_for_course( - thread["author_id"], thread["course_id"], threads=1 + thread["author_id"], thread["course_id"], threads=1 # type: ignore[index] ) try: return prepare_thread_api_response( - thread, + thread, # type: ignore[arg-type] backend, True, data, diff --git a/forum/backends/mongodb/api.py b/forum/backends/mongodb/api.py index b279ac8e..cddc412d 100644 --- a/forum/backends/mongodb/api.py +++ b/forum/backends/mongodb/api.py @@ -1044,6 +1044,7 @@ def create_comment(cls, data: dict[str, Any]) -> str: depth=data.get("depth", 0), comment_thread_id=data["comment_thread_id"], parent_id=data.get("parent_id"), + is_spam=data.get("is_spam", False), ) if data.get("parent_id"): @@ -1586,6 +1587,7 @@ def create_thread(data: dict[str, Any]) -> str: abuse_flaggers=data.get("abuse_flaggers"), historical_abuse_flaggers=data.get("historical_abuse_flaggers"), group_id=data.get("group_id"), + is_spam=data.get("is_spam", False), ) return new_thread_id @@ -1763,3 +1765,46 @@ def get_user_contents_by_username(username: str) -> list[dict[str, Any]]: CommentThread().find({"author_username": username}) ) return contents + + # AI Moderation Methods for MongoDB + @staticmethod + def flag_content_as_spam(content_type: str, content_id: str) -> int: + """ + Flag content as spam by adding AI system to abuse flaggers and updating spam fields. + + Args: + content_type: Type of content ('CommentThread' or 'Comment') + content_id: ID of the content to flag + + Returns: + Number of documents modified + """ + model = CommentThread() if content_type == "CommentThread" else Comment() + + # Get current content to check existing flaggers + content = model.get(content_id) + if not content: + return 0 + + return model.update(content_id, is_spam=True) + + @staticmethod + def unflag_content_as_spam(content_type: str, content_id: str) -> int: + """ + Remove spam flag from content. + + Args: + content_type: Type of content ('CommentThread' or 'Comment') + content_id: ID of the content to unflag + + Returns: + Number of documents modified + """ + model = CommentThread() if content_type == "CommentThread" else Comment() + + # Get current content to update flaggers + content = model.get(content_id) + if not content: + return 0 + + return model.update(content_id, is_spam=False) diff --git a/forum/backends/mongodb/comments.py b/forum/backends/mongodb/comments.py index a50563c2..7f9af685 100644 --- a/forum/backends/mongodb/comments.py +++ b/forum/backends/mongodb/comments.py @@ -78,6 +78,7 @@ def insert( abuse_flaggers: Optional[list[str]] = None, historical_abuse_flaggers: Optional[list[str]] = None, visible: bool = True, + is_spam: bool = False, ) -> str: """ Inserts a new comment document into the database. @@ -94,7 +95,7 @@ def insert( abuse_flaggers (Optional[list[str]], optional): Users who flagged the comment. Defaults to None. historical_abuse_flaggers (Optional[list[str]], optional): Users historically flagged the comment. visible (bool, optional): Whether the comment is visible. Defaults to True. - + is_spam (bool, optional): Whether the comment has been flagged as spam by AI moderation. Defaults to False. Returns: str: The ID of the inserted document. """ @@ -104,6 +105,7 @@ def insert( "visible": visible, "abuse_flaggers": abuse_flaggers or [], "historical_abuse_flaggers": historical_abuse_flaggers or [], + "is_spam": is_spam, "parent_ids": [ObjectId(parent_id)] if parent_id else [], "at_position_list": [], "body": body, @@ -163,6 +165,7 @@ def update( edit_reason_code: Optional[str] = None, endorsement_user_id: Optional[str] = None, sk: Optional[str] = None, + is_spam: Optional[bool] = None, ) -> int: """ Updates a comment document in the database. @@ -206,6 +209,7 @@ def update( ("depth", depth), ("closed", closed), ("sk", sk), + ("is_spam", is_spam), ] update_data: dict[str, Any] = { field: value for field, value in fields if value is not None diff --git a/forum/backends/mongodb/threads.py b/forum/backends/mongodb/threads.py index be8e9638..61126624 100644 --- a/forum/backends/mongodb/threads.py +++ b/forum/backends/mongodb/threads.py @@ -100,6 +100,7 @@ def insert( abuse_flaggers: Optional[list[str]] = None, historical_abuse_flaggers: Optional[list[str]] = None, group_id: Optional[int] = None, + is_spam: bool = False, ) -> str: """ Inserts a new thread document into the database. @@ -119,7 +120,7 @@ def insert( visible (bool): Whether the thread is visible. Defaults to True. abuse_flaggers: A list of users who flagged the thread for abuse. historical_abuse_flaggers: A list of users who historically flagged the thread for abuse. - + is_spam: Whether the thread was flagged as spam by AI moderation. Defaults to False. Raises: ValueError: If `thread_type` is not 'question' or 'discussion'. ValueError: If `context` is not 'course' or 'standalone'. @@ -162,6 +163,7 @@ def insert( "visible": visible, "abuse_flaggers": abuse_flaggers, "historical_abuse_flaggers": historical_abuse_flaggers, + "is_spam": is_spam, } if group_id: thread_data["group_id"] = group_id @@ -205,6 +207,7 @@ def update( closed_by_id: Optional[str] = None, group_id: Optional[int] = None, skip_timestamp_update: bool = False, + is_spam: Optional[bool] = None, ) -> int: """ Updates a thread document in the database. @@ -258,6 +261,7 @@ def update( ("close_reason_code", close_reason_code), ("closed_by_id", closed_by_id), ("group_id", group_id), + ("is_spam", is_spam), ] update_data: dict[str, Any] = { field: value for field, value in fields if value is not None diff --git a/forum/backends/mysql/api.py b/forum/backends/mysql/api.py index 93591ac8..c8633476 100644 --- a/forum/backends/mysql/api.py +++ b/forum/backends/mysql/api.py @@ -1686,6 +1686,9 @@ def update_comment(comment_id: str, **kwargs: Any) -> int: vote=-1, ) + if "is_spam" in kwargs: + comment.is_spam = kwargs["is_spam"] + comment.updated_at = timezone.now() comment.save() return 1 @@ -1898,6 +1901,9 @@ def update_thread( vote=-1, ) + if "is_spam" in kwargs: + thread.is_spam = kwargs["is_spam"] + thread.updated_at = timezone.now() thread.save() return 1 @@ -2208,3 +2214,44 @@ def get_user_contents_by_username(username: str) -> list[dict[str, Any]]: for thread in CommentThread.objects.filter(author__username=username) ] return contents + + # AI Moderation Methods for MySQL + @classmethod + def flag_content_as_spam(cls, content_type: str, content_id: str) -> int: + """ + Flag content as spam by adding AI system to abuse flaggers and updating spam fields. + + Args: + content_type: Type of content ('CommentThread' or 'Comment') + content_id: ID of the content to flag + + Returns: + Number of documents modified + """ + + # Use existing update methods to add AI system to abuse flaggers and set spam flag + update_data = {"is_spam": True} + if content_type == "CommentThread": + return cls.update_thread(content_id, **update_data) + else: + return cls.update_comment(content_id, **update_data) + + @classmethod + def unflag_content_as_spam(cls, content_type: str, content_id: str) -> int: + """ + Remove spam flag from content. + + Args: + content_type: Type of content ('CommentThread' or 'Comment') + content_id: ID of the content to unflag + + Returns: + Number of documents modified + """ + # Just update the spam flag to False + update_data = {"is_spam": False} + + if content_type == "CommentThread": + return cls.update_thread(content_id, **update_data) + else: + return cls.update_comment(content_id, **update_data) diff --git a/forum/backends/mysql/models.py b/forum/backends/mysql/models.py index 3924a0d6..e149daa6 100644 --- a/forum/backends/mysql/models.py +++ b/forum/backends/mysql/models.py @@ -125,6 +125,10 @@ class Content(models.Model): updated_at: models.DateTimeField[datetime, datetime] = models.DateTimeField( auto_now=True ) + is_spam: models.BooleanField[bool, bool] = models.BooleanField( + default=False, + help_text="Whether this content has been identified as spam by AI moderation", + ) uservote = GenericRelation( "UserVote", object_id_field="content_object_id", @@ -318,6 +322,7 @@ def to_dict(self) -> dict[str, Any]: "last_activity_at": self.last_activity_at, "edit_history": edit_history, "group_id": self.group_id, + "is_spam": self.is_spam, } def doc_to_hash(self) -> dict[str, Any]: @@ -353,6 +358,9 @@ class Meta: models.Index( fields=["author", "course_id", "anonymous", "anonymous_to_peers"] ), + models.Index(fields=["is_spam"]), + models.Index(fields=["course_id", "is_spam"]), + models.Index(fields=["author", "course_id", "is_spam"]), ] @@ -500,6 +508,7 @@ def to_dict(self) -> dict[str, Any]: "updated_at": self.updated_at, "created_at": self.created_at, "endorsement": endorsement if self.endorsement else None, + "is_spam": self.is_spam, } if edit_history: data["edit_history"] = edit_history @@ -538,6 +547,9 @@ class Meta: models.Index( fields=["author", "course_id", "anonymous", "anonymous_to_peers"] ), + models.Index(fields=["is_spam"]), + models.Index(fields=["course_id", "is_spam"]), + models.Index(fields=["author", "course_id", "is_spam"]), ] @@ -774,3 +786,96 @@ class MongoContent(models.Model): class Meta: app_label = "forum" + + +class ModerationAuditLog(models.Model): + """Audit log for AI moderation decisions on spam content.""" + + # Available actions that can be taken on spam content + ACTION_CHOICES = [ + ("flagged", "Content Flagged"), + ("soft_deleted", "Content Soft Deleted"), + ("no_action", "No Action Taken"), + ] + + # Only spam classifications since we don't store non-spam entries + CLASSIFICATION_CHOICES = [ + ("spam", "Spam"), + ("spam_or_scam", "Spam or Scam"), + ] + + timestamp: models.DateTimeField[datetime, datetime] = models.DateTimeField( + default=timezone.now, help_text="When the moderation decision was made" + ) + body: models.TextField[str, str] = models.TextField( + help_text="The content body that was moderated" + ) + classifier_output: models.JSONField[dict[str, Any], dict[str, Any]] = ( + models.JSONField(help_text="Full output from the AI classifier") + ) + reasoning: models.TextField[str, str] = models.TextField( + help_text="AI reasoning for the decision" + ) + classification: models.CharField[str, str] = models.CharField( + max_length=20, + choices=CLASSIFICATION_CHOICES, + help_text="AI classification result", + ) + actions_taken: models.JSONField[list[str], list[str]] = models.JSONField( + default=list, + help_text="List of actions taken based on moderation (e.g., ['flagged', 'soft_deleted'])", + ) + confidence_score: models.FloatField[Optional[float], float] = models.FloatField( + null=True, blank=True, help_text="AI confidence score if available" + ) + moderator_override: models.BooleanField[bool, bool] = models.BooleanField( + default=False, help_text="Whether a human moderator overrode the AI decision" + ) + override_reason: models.TextField[Optional[str], str] = models.TextField( + blank=True, null=True, help_text="Reason for moderator override" + ) + moderator: models.ForeignKey[User, User] = models.ForeignKey( + User, + null=True, + blank=True, + on_delete=models.SET_NULL, + related_name="moderation_actions", + help_text="Human moderator who made override", + ) + original_author: models.ForeignKey[User, User] = models.ForeignKey( + User, + on_delete=models.CASCADE, + related_name="moderated_content", + help_text="Original author of the moderated content", + ) + + def to_dict(self) -> dict[str, Any]: + """Return a dictionary representation of the model.""" + return { + "_id": str(self.pk), + "timestamp": self.timestamp.isoformat(), + "body": self.body, + "classifier_output": self.classifier_output, + "reasoning": self.reasoning, + "classification": self.classification, + "actions_taken": self.actions_taken, + "confidence_score": self.confidence_score, + "moderator_override": self.moderator_override, + "override_reason": self.override_reason, + "moderator_id": str(self.moderator.pk) if self.moderator else None, + "moderator_username": self.moderator.username if self.moderator else None, + "original_author_id": str(self.original_author.pk), + "original_author_username": self.original_author.username, + } + + class Meta: + app_label = "forum" + verbose_name = "Moderation Audit Log" + verbose_name_plural = "Moderation Audit Logs" + ordering = ["-timestamp"] + indexes = [ + models.Index(fields=["timestamp"]), + models.Index(fields=["classification"]), + models.Index(fields=["original_author"]), + models.Index(fields=["moderator"]), + ] diff --git a/forum/migrations/0005_moderationauditlog_comment_is_spam_and_more.py b/forum/migrations/0005_moderationauditlog_comment_is_spam_and_more.py new file mode 100644 index 00000000..3627a9fa --- /dev/null +++ b/forum/migrations/0005_moderationauditlog_comment_is_spam_and_more.py @@ -0,0 +1,191 @@ +# Generated by Django 5.2.7 on 2025-11-05 08:11 + +import django.db.models.deletion +import django.utils.timezone +from django.conf import settings +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("forum", "0004_add_author_username_fields"), + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ] + + operations = [ + migrations.CreateModel( + name="ModerationAuditLog", + fields=[ + ( + "id", + models.AutoField( + auto_created=True, + primary_key=True, + serialize=False, + verbose_name="ID", + ), + ), + ( + "timestamp", + models.DateTimeField( + default=django.utils.timezone.now, + help_text="When the moderation decision was made", + ), + ), + ( + "body", + models.TextField(help_text="The content body that was moderated"), + ), + ( + "classifier_output", + models.JSONField(help_text="Full output from the AI classifier"), + ), + ( + "reasoning", + models.TextField(help_text="AI reasoning for the decision"), + ), + ( + "classification", + models.CharField( + choices=[("spam", "Spam"), ("spam_or_scam", "Spam or Scam")], + help_text="AI classification result", + max_length=20, + ), + ), + ( + "actions_taken", + models.JSONField( + default=list, + help_text="List of actions taken based on moderation (e.g., ['flagged', 'soft_deleted'])", + ), + ), + ( + "confidence_score", + models.FloatField( + blank=True, + help_text="AI confidence score if available", + null=True, + ), + ), + ( + "moderator_override", + models.BooleanField( + default=False, + help_text="Whether a human moderator overrode the AI decision", + ), + ), + ( + "override_reason", + models.TextField( + blank=True, help_text="Reason for moderator override", null=True + ), + ), + ], + options={ + "verbose_name": "Moderation Audit Log", + "verbose_name_plural": "Moderation Audit Logs", + "ordering": ["-timestamp"], + }, + ), + migrations.AddField( + model_name="comment", + name="is_spam", + field=models.BooleanField( + default=False, + help_text="Whether this content has been identified as spam by AI moderation", + ), + ), + migrations.AddField( + model_name="commentthread", + name="is_spam", + field=models.BooleanField( + default=False, + help_text="Whether this content has been identified as spam by AI moderation", + ), + ), + migrations.AddIndex( + model_name="comment", + index=models.Index( + fields=["is_spam"], name="forum_comme_is_spam_46c762_idx" + ), + ), + migrations.AddIndex( + model_name="comment", + index=models.Index( + fields=["course_id", "is_spam"], name="forum_comme_course__4a265f_idx" + ), + ), + migrations.AddIndex( + model_name="comment", + index=models.Index( + fields=["author", "course_id", "is_spam"], + name="forum_comme_author__dde6dd_idx", + ), + ), + migrations.AddIndex( + model_name="commentthread", + index=models.Index( + fields=["is_spam"], name="forum_comme_is_spam_0e7304_idx" + ), + ), + migrations.AddIndex( + model_name="commentthread", + index=models.Index( + fields=["course_id", "is_spam"], name="forum_comme_course__2c84e0_idx" + ), + ), + migrations.AddIndex( + model_name="commentthread", + index=models.Index( + fields=["author", "course_id", "is_spam"], + name="forum_comme_author__96f3e5_idx", + ), + ), + migrations.AddField( + model_name="moderationauditlog", + name="moderator", + field=models.ForeignKey( + blank=True, + help_text="Human moderator who made override", + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="moderation_actions", + to=settings.AUTH_USER_MODEL, + ), + ), + migrations.AddField( + model_name="moderationauditlog", + name="original_author", + field=models.ForeignKey( + help_text="Original author of the moderated content", + on_delete=django.db.models.deletion.CASCADE, + related_name="moderated_content", + to=settings.AUTH_USER_MODEL, + ), + ), + migrations.AddIndex( + model_name="moderationauditlog", + index=models.Index( + fields=["timestamp"], name="forum_moder_timesta_0d4616_idx" + ), + ), + migrations.AddIndex( + model_name="moderationauditlog", + index=models.Index( + fields=["classification"], name="forum_moder_classif_f477d2_idx" + ), + ), + migrations.AddIndex( + model_name="moderationauditlog", + index=models.Index( + fields=["original_author"], name="forum_moder_origina_c51089_idx" + ), + ), + migrations.AddIndex( + model_name="moderationauditlog", + index=models.Index( + fields=["moderator"], name="forum_moder_moderat_c62a1c_idx" + ), + ), + ] diff --git a/forum/serializers/contents.py b/forum/serializers/contents.py index d01f69eb..6fd174b7 100644 --- a/forum/serializers/contents.py +++ b/forum/serializers/contents.py @@ -55,6 +55,7 @@ class ContentSerializer(serializers.Serializer[dict[str, Any]]): edit_history (list): A list of previous versions of the content. closed (bool): Whether the content is closed for further interactions. type (str): The type of content (e.g., "post", "comment"). + is_spam (bool): Whether the content was flagged as spam by AI moderation. """ id = serializers.CharField(source="_id") @@ -76,6 +77,7 @@ class ContentSerializer(serializers.Serializer[dict[str, Any]]): edit_history = EditHistorySerializer(default=[], many=True) closed = serializers.BooleanField(default=False) type = serializers.CharField() + is_spam = serializers.BooleanField(default=False) def create(self, validated_data: dict[str, Any]) -> Any: """Raise NotImplementedError""" diff --git a/forum/toggles.py b/forum/toggles.py index 62616cc4..013810f6 100644 --- a/forum/toggles.py +++ b/forum/toggles.py @@ -5,6 +5,7 @@ FORUM_V2_WAFFLE_FLAG_NAMESPACE = "forum_v2" +DISCUSSION_WAFFLE_FLAG_NAMESPACE = "discussions" # .. toggle_name: forum_v2.enable_mysql_backend # .. toggle_implementation: CourseWaffleFlag @@ -16,3 +17,21 @@ ENABLE_MYSQL_BACKEND = CourseWaffleFlag( f"{FORUM_V2_WAFFLE_FLAG_NAMESPACE}.enable_mysql_backend", __name__ ) + +# .. toggle_name: discussions.enable_ai_moderation +# .. toggle_implementation: CourseWaffleFlag +# .. toggle_default: False +# .. toggle_description: Waffle flag to enable AI moderation for discussions. +# .. toggle_use_cases: temporary, open_edx +# .. toggle_creation_date: 2025-10-29 +# .. toggle_target_removal_date: 2026-06-29 +ENABLE_AI_MODERATION = CourseWaffleFlag( + f"{DISCUSSION_WAFFLE_FLAG_NAMESPACE}.enable_ai_moderation", __name__ +) + + +def is_ai_moderation_enabled(course_key): # type: ignore[no-untyped-def] + """ + Check if AI moderation is enabled for the given course. + """ + return ENABLE_AI_MODERATION.is_enabled(course_key)