diff --git a/backend/app/api/endpoints/base/dataset.py b/backend/app/api/endpoints/base/dataset.py index 7c2cb0ff..5ddb9524 100644 --- a/backend/app/api/endpoints/base/dataset.py +++ b/backend/app/api/endpoints/base/dataset.py @@ -2,8 +2,10 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. -from fastapi import APIRouter, File, UploadFile +from fastapi import APIRouter, Depends, File, Request, UploadFile +from app.api.middleware.authentication import validate_access_token +from app.domain.schemas.base.dataset import UpdateDatasetInfo from app.domain.services.base.dataset import DatasetService @@ -24,3 +26,35 @@ async def create_dataset_in_db( access_type: str, ): return DatasetService().create_dataset_in_db(task_id, dataset_name, access_type) + + +@router.get("/task/{task_id}") +async def get_datasets_by_task_id(task_id: int): + return DatasetService().get_datasets_by_task_id(task_id) + + +@router.get("/get_access_types") +async def get_dataset_info_by_name(): + return DatasetService().get_dataset_info_by_name() + + +@router.get("/get_log_access_types") +async def get_log_access_types(): + return DatasetService().get_log_access_types() + + +@router.put("/update/{dataset_id}") +async def update_dataset( + dataset_id: int, + model: UpdateDatasetInfo, + request: Request, + token_payload=Depends(validate_access_token), +): + return DatasetService().update_dataset_access_type(dataset_id, request, model) + + +@router.delete("/delete/{dataset_id}") +async def delete_dataset( + dataset_id: int, request: Request, token_payload=Depends(validate_access_token) +): + return DatasetService().delete_dataset(dataset_id, request) diff --git a/backend/app/api/endpoints/base/task.py b/backend/app/api/endpoints/base/task.py index 925faa85..a70af0ff 100644 --- a/backend/app/api/endpoints/base/task.py +++ b/backend/app/api/endpoints/base/task.py @@ -3,7 +3,7 @@ # LICENSE file in the root directory of this source tree. import os -from fastapi import APIRouter, Body, Depends, Request +from fastapi import APIRouter, Body, Depends, Query, Request from fastapi.responses import FileResponse from app.api.middleware.authentication import validate_access_token @@ -13,6 +13,7 @@ GetDynaboardInfoByTaskIdRequest, PreliminaryQuestionsRequest, SignInConsentRequest, + UpdateModelsInTheLoopRequest, UpdateTaskInstructions, UpdateYamlConfiguration, ) @@ -227,3 +228,36 @@ async def get_model_identifiers( if not LoginService().is_admin_or_owner(task_id, request): raise PermissionError("Unauthorized access to get model identifiers.") return TaskService().get_model_identifiers(task_id) + + +@router.put("/update_models_in_the_loop/{task_id}", response_model={}) +async def update_models_in_the_loop( + task_id: int, + request: Request, + model: UpdateModelsInTheLoopRequest, + token_payload=Depends(validate_access_token), +): + if not LoginService().is_admin_or_owner(task_id, request): + raise PermissionError("Unauthorized access to update models in the loop.") + return TaskService().update_models_in_the_loop(task_id, model.model_ids) + + +@router.get("/{task_id}/users", response_model={}) +async def get_user_leaderboard( + task_id: int, + limit: int = Query(5, alias="limit"), + offset: int = Query(0, alias="offset"), +): + return TaskService().get_user_leaderboard(task_id, limit, offset) + + +@router.get("/{task_id}/rounds/{round_id}/users", response_model={}) +async def get_leaderboard_by_task_and_round( + task_id: int, + round_id: int, + limit: int = Query(5, alias="limit"), + offset: int = Query(0, alias="offset"), +): + return TaskService().get_leaderboard_by_task_and_round( + task_id, round_id, limit, offset + ) diff --git a/backend/app/domain/schemas/base/dataset.py b/backend/app/domain/schemas/base/dataset.py index a66ef985..a972f05d 100644 --- a/backend/app/domain/schemas/base/dataset.py +++ b/backend/app/domain/schemas/base/dataset.py @@ -1,3 +1,14 @@ # Copyright (c) MLCommons and its affiliates. # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. +from typing import Optional + +from pydantic import BaseModel + + +class UpdateDatasetInfo(BaseModel): + access_type: str + log_access_type: Optional[str] = None + longdesc: Optional[str] = None + rid: Optional[int] = 0 + source_url: Optional[str] = None diff --git a/backend/app/domain/schemas/base/task.py b/backend/app/domain/schemas/base/task.py index d91d9d01..f13d1dfa 100644 --- a/backend/app/domain/schemas/base/task.py +++ b/backend/app/domain/schemas/base/task.py @@ -50,3 +50,7 @@ class CheckSignConsentRequest(BaseModel): class UpdateYamlConfiguration(BaseModel): task_id: int config_yaml: str + + +class UpdateModelsInTheLoopRequest(BaseModel): + model_ids: Optional[List[int]] = [] diff --git a/backend/app/domain/services/base/dataset.py b/backend/app/domain/services/base/dataset.py index 6d9362f4..9732bd89 100644 --- a/backend/app/domain/services/base/dataset.py +++ b/backend/app/domain/services/base/dataset.py @@ -11,13 +11,20 @@ import jsonlines from fastapi import File +from app.domain.auth.authentication import LoginService from app.domain.helpers.s3_helpers import S3Helpers +from app.domain.schemas.base.dataset import UpdateDatasetInfo +from app.infrastructure.models.models import AccessTypeEnum, LogAccessTypeEnum from app.infrastructure.repositories.dataset import DatasetRepository +from app.infrastructure.repositories.score import ScoreRepository +from app.infrastructure.repositories.task import TaskRepository class DatasetService: def __init__(self): self.dataset_repository = DatasetRepository() + self.score_repository = ScoreRepository() + self.task_repository = TaskRepository() self.s3_helpers = S3Helpers() def get_dataset_name_by_id(self, dataset_id: int): @@ -58,3 +65,44 @@ def upload_dataset( jsonl_contents.encode("utf-8"), f"datasets/{task_code}/{dataset_name}.jsonl" ) return "Dataset uploaded successfully" + + def get_datasets_by_task_id(self, task_id: int): + datasets_list = [] + datasets = self.dataset_repository.get_datasets_by_task_id(task_id) + if datasets: + for dataset in datasets: + datasets_list.append(dataset.__dict__) + return datasets_list + + def get_dataset_info_by_name(self): + return [enum.name for enum in AccessTypeEnum] + + def get_log_access_types(self): + return [enum.name for enum in LogAccessTypeEnum] + + def update_dataset_access_type( + self, dataset_id: int, request, model: UpdateDatasetInfo + ): + dataset = self.dataset_repository.get_dataset_info_by_id(dataset_id) + if not LoginService().is_admin_or_owner(dataset["tid"], request): + raise PermissionError("Unauthorized access to update models in the loop.") + data = model.__dict__ + for field in data.keys(): + if field not in ( + "longdesc", + "rid", + "source_url", + "access_type", + "log_access_type", + ): + raise ValueError(f"Invalid field: {field}") + self.dataset_repository.update_dataset_info(dataset_id, data) + return {"success": "ok"} + + def delete_dataset(self, dataset_id: int, request): + dataset = self.dataset_repository.get_dataset_info_by_id(dataset_id) + if not LoginService().is_admin_or_owner(dataset["tid"], request): + raise PermissionError("Unauthorized access to delete dataset.") + + self.dataset_repository.hide_dataset(dataset_id) + return {"success": "ok"} diff --git a/backend/app/domain/services/base/task.py b/backend/app/domain/services/base/task.py index e60d30ba..ea02501a 100644 --- a/backend/app/domain/services/base/task.py +++ b/backend/app/domain/services/base/task.py @@ -711,3 +711,73 @@ def get_model_identifiers(self, task_id): } ) return model_identifiers + + def update_models_in_the_loop(self, task_id, model_ids=[]): + self.model_repository.clean_models_in_the_loop(task_id) + if len(model_ids) > 0: + for model_id in model_ids: + self.model_repository.update_model_in_the_loop(model_id) + return {"success": "ok"} + + def get_user_leaderboard(self, task_id: int, limit: int, offset: int): + """ + Return users and MER based on their examples score based on tasks + :param tid: Task id, limit: limit, offset: offset + :return: Json Object + """ + try: + task_r_realids = [] + rounds = self.round_repository.get_rounds_by_task_id(task_id) + for round_instance in rounds: + round_dict = round_instance.__dict__ + task_r_realids.append(round_dict["rid"]) + ( + query_result, + total_count, + ) = self.example_repository.getUserLeaderByRoundRealids( + task_r_realids, limit, offset + ) + return self.__construct_user_board_response_json(query_result, total_count) + + except Exception as e: + print(e) + return {"count": 0, "data": []} + + def get_leaderboard_by_task_and_round(self, task_id, round_id, limit, offset): + """ + Get top leaders based on their examples score for specific task and round + :param tid: Task id, limit: limit, offset: offset, :param rid: round id + :return: Json Object + """ + try: + round_instance = self.round_repository.get_round_info_by_round_and_task( + task_id, round_id + ).__dict__ + ( + query_result, + total_count, + ) = self.example_repository.getUserLeaderByRoundRealids( + [round_instance["id"]], limit, offset + ) + return self.__construct_user_board_response_json(query_result, total_count) + + except Exception as e: + print(e) + return {"count": 0, "data": []} + + def __construct_user_board_response_json(self, query_result, total_count=0): + list_objs = [] + for result in query_result: + obj = {} + obj["uid"] = result[0] + obj["username"] = result[1] + obj["avatar_url"] = result[2] if result[2] is not None else "" + obj["count"] = int(result[3]) + obj["MER"] = str(round(result[4] * 100, 2)) + obj["created"] = result[5] + obj["total"] = str(result[3]) + "/" + str(result[5]) + list_objs.append(obj) + if list_objs: + return {"count": total_count, "data": list_objs} + else: + return {"count": 0, "data": []} diff --git a/backend/app/infrastructure/models/models.py b/backend/app/infrastructure/models/models.py index 5b036d4e..2ecc09f8 100644 --- a/backend/app/infrastructure/models/models.py +++ b/backend/app/infrastructure/models/models.py @@ -6,6 +6,8 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. +import enum + # coding: utf-8 from sqlalchemy import ( JSON, @@ -191,6 +193,17 @@ class Badge(Base): user = relationship("User") +class AccessTypeEnum(enum.Enum): + scoring = "scoring" + standard = "standard" + hidden = "hidden" + + +class LogAccessTypeEnum(enum.Enum): + owner = "owner" + user = "user" + + class Dataset(Base): __tablename__ = "datasets" @@ -201,8 +214,8 @@ class Dataset(Base): desc = Column(String(255)) longdesc = Column(Text) source_url = Column(Text) - access_type = Column(Enum("scoring", "standard", "hidden")) - log_access_type = Column(Enum("owner", "user")) + access_type = Column(Enum(AccessTypeEnum)) + log_access_type = Column(Enum(LogAccessTypeEnum)) tags = Column(Integer) has_downstream = Column(TINYINT(1)) weight = Column(Float) diff --git a/backend/app/infrastructure/repositories/dataset.py b/backend/app/infrastructure/repositories/dataset.py index e8449a33..314be5d7 100644 --- a/backend/app/infrastructure/repositories/dataset.py +++ b/backend/app/infrastructure/repositories/dataset.py @@ -6,6 +6,7 @@ # This source code is licensed under the MIT license found in the # LICENSE file in the root directory of this source tree. +from app.domain.schemas.base.dataset import UpdateDatasetInfo from app.infrastructure.models.models import Dataset from app.infrastructure.repositories.abstract import AbstractRepository @@ -131,3 +132,20 @@ def get_dataset_weight(self, dataset_id: int) -> dict: .filter(self.model.id == dataset_id) .one() ) + + def get_datasets_by_task_id(self, task_id: int): + return self.session.query(self.model).filter(self.model.tid == task_id).all() + + def update_dataset_info(self, dataset_id: int, update_data: UpdateDatasetInfo): + self.session.query(self.model).filter(self.model.id == dataset_id).update( + update_data + ) + with self.session as session: + session.commit() + + def hide_dataset(self, dataset_id: int): + self.session.query(self.model).filter(self.model.id == dataset_id).update( + {"tid": 0} + ) + with self.session as session: + session.commit() diff --git a/backend/app/infrastructure/repositories/example.py b/backend/app/infrastructure/repositories/example.py index 7d2117f9..fe30ddc8 100644 --- a/backend/app/infrastructure/repositories/example.py +++ b/backend/app/infrastructure/repositories/example.py @@ -7,9 +7,16 @@ # LICENSE file in the root directory of this source tree. from pydantic import Json -from sqlalchemy import func +from sqlalchemy import desc, func -from app.infrastructure.models.models import Context, Example, Round, Validation +from app.infrastructure.models.models import ( + Context, + Example, + Round, + RoundUserExampleInfo, + User, + Validation, +) from app.infrastructure.repositories.abstract import AbstractRepository @@ -223,3 +230,44 @@ def get_used_models_by_user_id_and_task_id(self, user_id: int, task_id: int): .distinct() .all() ) + + def getUserLeaderByRoundRealids( + self, task_r_realids: list, limit: int, offset: int + ): + total_fooled_cnt = func.sum(RoundUserExampleInfo.total_fooled).label( + "total_fooled_cnt" + ) + total_verified_not_correct_fooled_cnt = func.sum( + RoundUserExampleInfo.total_verified_not_correct_fooled + ).label("total_verified_not_correct_fooled_cnt") + examples_submitted_cnt = func.sum( + RoundUserExampleInfo.examples_submitted + ).label("examples_submitted_cnt") + + verified_fooled = ( + total_fooled_cnt - total_verified_not_correct_fooled_cnt + ).label("verified_fooled") + fooling_rate = ( + (total_fooled_cnt - total_verified_not_correct_fooled_cnt) + / examples_submitted_cnt + ).label("fooling_rate") + + query_res = ( + self.session.query( + User.id, + User.username, + User.avatar_url, + verified_fooled, + fooling_rate, + examples_submitted_cnt, + ) + .join(RoundUserExampleInfo, RoundUserExampleInfo.uid == User.id) + .filter(RoundUserExampleInfo.r_realid.in_(task_r_realids)) + .group_by(RoundUserExampleInfo.uid) + .order_by(desc(examples_submitted_cnt)) + ) + results = query_res.limit(limit).offset(offset * limit).all() + + total_count = query_res.count() + + return results, total_count diff --git a/backend/app/infrastructure/repositories/model.py b/backend/app/infrastructure/repositories/model.py index 03398a11..80271539 100644 --- a/backend/app/infrastructure/repositories/model.py +++ b/backend/app/infrastructure/repositories/model.py @@ -310,3 +310,23 @@ def get_amount_of_models_uploaded_in_hr_diff( def get_models_by_task_id(self, task_id: int): return self.session.query(self.model).filter(self.model.tid == task_id).all() + + def clean_models_in_the_loop(self, task_id: int): + all_models_for_task = ( + self.session.query(self.model) + .filter(self.model.tid == task_id, self.model.is_in_the_loop == 1) + .all() + ) + for model in all_models_for_task: + model.is_in_the_loop = False + self.session.flush() + self.session.commit() + + def update_model_in_the_loop(self, model_id: int): + with self.session as session: + instance = ( + session.query(self.model).filter(self.model.id == model_id).first() + ) + instance.is_in_the_loop = True + session.commit() + session.flush() diff --git a/backend/app/infrastructure/repositories/score.py b/backend/app/infrastructure/repositories/score.py index dbc6e123..73716818 100644 --- a/backend/app/infrastructure/repositories/score.py +++ b/backend/app/infrastructure/repositories/score.py @@ -118,3 +118,6 @@ def fix_f1_score(self, model_id: int): session.execute(sql, {"model_id": model_id}) session.flush() session.commit() + + def get_scores_for_dataset(self, dataset_id: int): + return self.session.query(Score).filter(Score.did == dataset_id).all() diff --git a/frontends/web/src/common/ApiService.js b/frontends/web/src/common/ApiService.js index 3389d526..19082b34 100644 --- a/frontends/web/src/common/ApiService.js +++ b/frontends/web/src/common/ApiService.js @@ -280,7 +280,7 @@ export default class ApiService { round === "overall" ? `/users?limit=${limit || 10}&offset=${offset || 0}` : `/rounds/${round}/users?limit=${limit || 10}&offset=${offset || 0}`; - return this.fetch(`${this.domain}/tasks/${taskId}${url}`, { + return this.fetch(`${this.alternateDomain}/task/${taskId}${url}`, { method: "GET", }); } @@ -667,7 +667,7 @@ export default class ApiService { updateModelsInTheLoop(tid, rid, data) { return this.fetch( - `${this.domain}/tasks/update_models_in_the_loop/${tid}/${rid}`, + `${this.alternateDomain}/task/update_models_in_the_loop/${tid}`, { method: "PUT", body: JSON.stringify(data), @@ -700,25 +700,25 @@ export default class ApiService { } getAvailableDatasetAccessTypes() { - return this.fetch(`${this.domain}/datasets/get_access_types`, { + return this.fetch(`${this.alternateDomain}/dataset/get_access_types`, { method: "GET", }); } getAvailableDatasetLogAccessTypes() { - return this.fetch(`${this.domain}/datasets/get_log_access_types`, { + return this.fetch(`${this.alternateDomain}/dataset/get_log_access_types`, { method: "GET", }); } getDatasets(tid) { - return this.fetch(`${this.domain}/tasks/datasets/${tid}`, { + return this.fetch(`${this.alternateDomain}/dataset/task/${tid}`, { method: "GET", }); } updateDataset(did, data) { - return this.fetch(`${this.domain}/datasets/update/${did}`, { + return this.fetch(`${this.alternateDomain}/dataset/update/${did}`, { method: "PUT", body: JSON.stringify(data), }); @@ -740,7 +740,7 @@ export default class ApiService { } deleteDataset(did) { - return this.fetch(`${this.domain}/datasets/delete/${did}`, { + return this.fetch(`${this.alternateDomain}/dataset/delete/${did}`, { method: "DELETE", }); } diff --git a/frontends/web/src/components/TaskOwnerPageComponents/Datasets.js b/frontends/web/src/components/TaskOwnerPageComponents/Datasets.js index d0848541..458f7795 100644 --- a/frontends/web/src/components/TaskOwnerPageComponents/Datasets.js +++ b/frontends/web/src/components/TaskOwnerPageComponents/Datasets.js @@ -67,6 +67,8 @@ const Datasets = (props) => { const config = yaml.load(props.task.config_yaml); const delta_metric_configs = config.delta_metrics ? config.delta_metrics : []; const delta_files = {}; + const token = localStorage.getItem("id_token") || ""; + axios.defaults.headers.common["Authorization"] = `Bearer ${token}`; for (const config of delta_metric_configs) { delta_files[config.type] = null; @@ -86,6 +88,7 @@ const Datasets = (props) => { dataset_name: values.name, task_code: props.task.task_code, }, + withCredentials: true, }) .then((response) => { if (response.status === 200) { diff --git a/frontends/web/src/new_front/pages/Submissions/SubmitPrediction.tsx b/frontends/web/src/new_front/pages/Submissions/SubmitPrediction.tsx index 8b2787ab..7a32186d 100644 --- a/frontends/web/src/new_front/pages/Submissions/SubmitPrediction.tsx +++ b/frontends/web/src/new_front/pages/Submissions/SubmitPrediction.tsx @@ -23,6 +23,8 @@ const SubmitPrediction = () => { reValidateMode: "onSubmit", defaultValues: initState, }); + const token = localStorage.getItem("id_token") || ""; + axios.defaults.headers.common["Authorization"] = `Bearer ${token}`; const isLogin = async () => { if (!user.id) { @@ -72,6 +74,7 @@ const SubmitPrediction = () => { task_code: taskCode, model_name: modelData.modelName.replace(/\s/g, "_"), }, + withCredentials: true, }) .then(() => { Swal.fire({