Backend/app.py at main · AILiteracyTest/Backend · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
import os
import sys
import time
import uuid
import asyncio
import random
import aiohttp
import sqlite3 #백분위 계산

from io import BytesIO
from typing import Optional, List, Dict, Any
from fastapi import FastAPI, Query, HTTPException
from fastapi.responses import JSONResponse
from fastapi.concurrency import run_in_threadpool
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
from starlette.middleware.cors import CORSMiddleware
from openai import AsyncOpenAI
from pathlib import Path

from unet_autoencoder.ae_explain import analyze_and_explain

BASE_DIR = Path(__file__).resolve().parent
TMP_DIR = BASE_DIR / "tmp_images"
TMP_DIR.mkdir(exist_ok=True)
SCORE_DB_PATH=BASE_DIR/'scores.db' #백분위 계산 - DB 파일 경로 상수 추가

DATA_DIR = BASE_DIR / "db_folder"

OPENAI_API_KEY      = os.getenv("OPENAI_API_KEY")
UNSPLASH_ACCESS_KEY = os.getenv("UNSPLASH_ACCESS_KEY")

if not OPENAI_API_KEY:
    raise RuntimeError("OPENAI_API_KEY is not set")
if not UNSPLASH_ACCESS_KEY:
    raise RuntimeError("UNSPLASH_ACCESS_KEY is not set")

# ========= FastAPI 앱 =========
app = FastAPI(title="Image Analysis (FastAPI+async)")

app.mount("/db_folder", StaticFiles(directory=str(DATA_DIR)), name="db_folder")

app.add_middleware(
    CORSMiddleware,
    allow_origins=[
        "http://localhost:5173",
        "https://ai-literacy-test.netlify.app"
    ],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# ========= 전역 세션/클라이언트 =========
http_session: aiohttp.ClientSession | None = None
oai_client: AsyncOpenAI | None = None

@app.on_event("startup")
async def _startup():
    global http_session, oai_client
    timeout = aiohttp.ClientTimeout(total=60)
    http_session = aiohttp.ClientSession(timeout=timeout, trust_env=True)
    oai_client = AsyncOpenAI(api_key=OPENAI_API_KEY)
    await run_in_threadpool(_init_score_db) #백분위 계산-DB 초기화 연결

@app.on_event("shutdown")
async def _shutdown():
    global http_session
    if http_session and not http_session.closed:
        await http_session.close()

# ========= (캐시/유틸 동일) =========
RUN_TTL_SEC = 15 * 60
run_cache: Dict[str, Dict[str, Any]] = {}

def _gc_runs():
    now = time.time()
    stale = [rid for rid, rec in run_cache.items()
             if now - rec.get("created_at", 0) > RUN_TTL_SEC]
    for rid in stale:
        run_cache.pop(rid, None)

# def build_random_query() -> str:
#     ages = ["teenage", "young", "middle-aged", "elderly"]
#     races = ["white", "black", "asian"]
#     age = random.choice(ages)
#     gender_candidates = ["boy", "girl"] if age in ["teenage", "young"] else ["male", "female"]
#     race = random.choice(races)
#     gender = random.choice(gender_candidates)
#     return f"{age} {race} {gender}"

def build_random_query() -> str:
    """
    사람 / 동물(강아지, 고양이) / 풍경(산, 바다, 사막) 중 하나를 랜덤으로 선택해
    Unsplash 검색용 query 문자열을 만들어 반환한다.
    """
    category = random.choice(["human", "dog", "cat", "landscape"])

    # ---------------- 사람 ----------------
    if category == "human":
        ages = ["teenage", "young", "middle-aged", "elderly"]
        races = ["white", "black", "asian"]
        age = random.choice(ages)
        race = random.choice(races)
        gender_candidates = (
            ["boy", "girl"] if age in ["teenage", "young"] else ["male", "female"]
        )
        gender = random.choice(gender_candidates)
        return f"{age} {race} {gender}"

    # ---------------- 강아지 ----------------
    if category == "dog":
        ages = ["puppy", "young", "adult", "senior"]
        colors = ["brown", "white", "black", "golden"]
        breeds = ["labrador", "poodle", "mixed-breed"]

        age = random.choice(ages)
        color = random.choice(colors)
        breed = random.choice(breeds)

        return f"{age} {color} {breed} dog"

    # ---------------- 고양이 ----------------
    if category == "cat":
        ages = ["kitten", "young", "adult", "senior"]
        colors = ["white", "black", "gray", "orange"]
        breeds = ["persian", "siamese", "tabby", "mixed-breed"]

        age = random.choice(ages)
        color = random.choice(colors)
        breed = random.choice(breeds)

        return f"{age} {color} {breed} cat"

    # ---------------- 풍경 ----------------
    if category == "landscape":
        scenes = ["mountain landscape", "ocean sea view landscape", "desert landscape","forest landscape"]
        return random.choice(scenes)

def _init_score_db() -> None: #백분위 계산-점수 테이블 생성 함수
    conn=sqlite3.connect(SCORE_DB_PATH)
    try:
        conn.execute("""
            CREATE TABLE IF NOT EXISTS scores (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                score INTEGER NOT NULL,
                created_at REAL NOT NULL
            )
        """)
        conn.commit()
    finally:
        conn.close()

def _insert_and_calc(score: int) -> dict: #백분위-DB 저장 + 백분위 계산 함수
    conn = sqlite3.connect(SCORE_DB_PATH)
    conn.row_factory = sqlite3.Row
    try:
        # 1) 저장
        conn.execute(
            "INSERT INTO scores (score, created_at) VALUES (?, ?)",
            (score, time.time())
        )
        conn.commit()

        # 2) 전체 점수 기반 통계 (가장 단순)
        rows = conn.execute("SELECT score FROM scores").fetchall()
        scores = [r["score"] for r in rows]
        total = len(scores)

        higher = sum(1 for s in scores if s > score)
        rank = higher + 1
        percentile = round((higher / total) * 100) if total else 0

        return {"rank": rank,"total": total, "percentile": percentile}

    finally:
        conn.close()

REAL_DIR = DATA_DIR / "real"
FAKE_DIR = DATA_DIR / "fake"
TEXT_DIR = DATA_DIR / "text"

def get_all_ids() -> list[int]:
    # text 기준으로 id 수집 (가장 안전)
    ids = []
    for p in TEXT_DIR.glob("*.txt"):
        try:
            ids.append(int(p.stem))
        except ValueError:
            pass
    return sorted(ids)

async def fetch_unsplash_image(query: str) -> List[str]:
    assert http_session is not None
    url = "https://api.unsplash.com/photos/random"
    params = {"client_id": UNSPLASH_ACCESS_KEY, "query": query}
    async with http_session.get(url, params=params) as resp:
        if resp.status != 200:
            txt = await resp.text()
            raise HTTPException(status_code=502, detail=f"Unsplash error: {resp.status}, {txt[:200]}")
        data = await resp.json()
    photos = [data] if isinstance(data, dict) else (data or [])
    return [f"{photos[0]['urls']['raw']}&fm=jpg&w=1080"] if photos else []

async def download_image(url: str) -> str:
    """
    DALL·E가 생성한 이미지 URL을 받아서 로컬에 저장하고,
    저장된 파일 경로를 문자열로 반환.
    """
    assert http_session is not None

    filename = TMP_DIR / f"{uuid.uuid4().hex}.png"
    async with http_session.get(url) as resp:
        if resp.status != 200:
            txt = await resp.text()
            raise HTTPException(
                status_code=502,
                detail=f"Download error: {resp.status}, {txt[:200]}"
            )
        content = await resp.read()

    with open(filename, "wb") as f:
        f.write(content)

    return str(filename)

async def generate_dalle_image(prompt: str) -> str:
    assert oai_client is not None
    resp = await oai_client.images.generate(
        model="dall-e-3",
        prompt=prompt,
        size="1024x1792",
        n=1
    )
    return resp.data[0].url

# ========= 응답 모델(간소화) =========
class SyntheticOut(BaseModel):
    generated_image_url: Optional[str] = None
    explanation: Optional[str] = None

class ImageAnalysisOut(BaseModel):
    mode: str
    run_id: str
    query: str
    unsplash: Dict[str, List[str]]
    synthetic: SyntheticOut

class ScoreIn(BaseModel): #백분위 계산-점수 요청 모델
    score: int

class LoadImagesOut(BaseModel):
    sample_id: int
    real_url: str
    fake_url: str
    explanation: str

# ========= 이미지 생성 및 불러오기 =========
@app.get("/image_analysis", response_model=ImageAnalysisOut)
async def image_analysis(
    mode: str = Query("default", pattern="^.*$"),
    run_id: Optional[str] = Query(None),
):
    """
    Unsplash + DALL·E 3만 생성해서 반환.
    run_id가 있으면 캐시된 같은 이미지/쿼리 재사용, 없으면 새로 생성.
    """
    _gc_runs()

    rec = None
    if run_id:
        rec = run_cache.get(run_id)
        if rec and (time.time() - rec["created_at"] <= RUN_TTL_SEC):
            query = rec["query"]
            real_urls = rec["unsplash"]
            gen_url = rec["gen_url"]
        else:
            rec = None

    if rec is None:
        # 1) 쿼리 & 프롬프트 생성
        query = build_random_query()
        if "landscape" in query:
            prompt = f"A high-resolution landscape photo of {query}, natural lighting, clear atmosphere."
        else:
            prompt = (
                f"A natural candid photo of a {query} outdoors, "
                "standing on a city street or in a park during daytime. "
                "Unposed moment, looking slightly away from the camera, "
                "natural skin texture, realistic lighting, "
                "shot with a handheld camera."
            )

        real_urls, gen_url = await asyncio.gather(
            fetch_unsplash_image(query),
            generate_dalle_image(prompt),
        )

        # 3) DALL·E 이미지 로컬로 다운로드
        local_fake_path = await download_image(gen_url)

        # 4) U-Net AE + VLM 설명 실행
        ae_result = await run_in_threadpool(analyze_and_explain, local_fake_path)

        run_id = uuid.uuid4().hex
        run_cache[run_id] = {
            "created_at": time.time(),
            "query": query,
            "unsplash": real_urls,
            "gen_url": gen_url,
            "ae_result": ae_result,
        }

    if rec is not None and ae_result is None:
        # 필요 시 재분석 (선택사항)
        local_fake_path = await download_image(gen_url)
        ae_result = await run_in_threadpool(analyze_and_explain, local_fake_path)
        rec["ae_result"] = ae_result

    explanation_text = ae_result.get("explanation") if ae_result else None

    return ImageAnalysisOut(
        mode=mode,
        run_id=run_id,
        query=query,
        unsplash={"images": real_urls},
        synthetic=SyntheticOut(generated_image_url=gen_url, explanation=explanation_text,),
    )

# ========= 백분위 계산 =========
@app.post("/score")
async def save_score(payload: ScoreIn):

    if payload.score < 0:
        raise HTTPException(status_code=400, detail='score must be >= 0')

    stats = await run_in_threadpool(_insert_and_calc, payload.score)
    return stats

# ========= DB 내 데이터 불러오기 =========
@app.get("/load_images", response_model=LoadImagesOut)
async def load_images():
    ids = await run_in_threadpool(get_all_ids)
    if not ids:
        raise HTTPException(status_code=404, detail="No samples found")

    sample_id = random.choice(ids)

    # 확장자 달라도 OK
    def find_img(folder, sid):
        for ext in [".jpg", ".jpeg", ".png", ".webp"]:
            p = folder / f"{sid}{ext}"
            if p.exists():
                return p
        return None

    real_path = find_img(REAL_DIR, sample_id)
    fake_path = find_img(FAKE_DIR, sample_id)
    text_path = TEXT_DIR / f"{sample_id}.txt"

    if not real_path or not fake_path or not text_path.exists():
        raise HTTPException(status_code=500, detail="Dataset mismatch")

    explanation = await run_in_threadpool(text_path.read_text, "utf-8")

    return LoadImagesOut(
        sample_id=sample_id,
        real_url=f"/db_folder/real/{real_path.name}",
        fake_url=f"/db_folder/fake/{fake_path.name}",
        explanation=explanation
    )