Skip to content

Commit

Permalink
Merge pull request #27 from 2jun0/GDET-35
Browse files Browse the repository at this point in the history
GDET-35: 데일리 퀴즈 생성 람다 구현
  • Loading branch information
2jun0 authored Feb 7, 2024
2 parents d018512 + 3ef4ef0 commit 36e5b04
Show file tree
Hide file tree
Showing 104 changed files with 1,131 additions and 431 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Test scraper
name: Test aws lambda

on:
pull_request:
Expand All @@ -10,7 +10,7 @@ jobs:
test:
runs-on: ubuntu-latest
env:
working-directory: ./data_scrapers
working-directory: ./aws_lambdas

steps:
- uses: actions/checkout@v3
Expand Down
File renamed without changes.
4 changes: 3 additions & 1 deletion data_scrapers/.gitignore → aws_lambdas/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,6 @@ requirements.txt
# vscode
.vscode
# data
*.csv
*.csv
# jupyter notebook
.ipynb
Empty file added aws_lambdas/README.md
Empty file.
8 changes: 8 additions & 0 deletions aws_lambdas/daily_quiz/aws_lambda/event.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from typing import Any, Literal, TypedDict

EventName = Literal["save_screenshots", "save_quizzes", "get_all_games"]


class Event(TypedDict):
name: EventName
payload: Any
38 changes: 38 additions & 0 deletions aws_lambdas/daily_quiz/aws_lambda/lambda_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import json
from typing import Any, Iterable

import boto3

from daily_quiz.aws_lambda.model import Game

from .. import protocols
from ..config import setting
from .event import Event
from .exception import AWSLambdaException
from .model import SaveGameScreenshot, SaveQuiz


class LambdaAPI(protocols.LambdaAPI):
def __init__(self) -> None:
self.client = boto3.client("lambda")

def invoke_lambda(self, event: Event) -> Any:
response = self.client.invoke(FunctionName=setting.DATABASE_LAMBDA_NAME, Payload=json.dumps(event))

if "FunctionError" in response:
raise AWSLambdaException(response["FunctionError"])

payload: Any = response["Payload"].read().decode("utf-8")
return json.loads(payload)

def save_screenshots(self, screenshots: Iterable[SaveGameScreenshot]):
event = Event(name="save_screenshots", payload=[s.model_dump() for s in screenshots])
self.invoke_lambda(event)

def save_quizzes(self, quizzes: Iterable[SaveQuiz]):
event = Event(name="save_quizzes", payload=[q.model_dump() for q in quizzes])
self.invoke_lambda(event)

def get_all_games(self) -> list[Game]:
event = Event(name="get_all_games", payload=None)
return self.invoke_lambda(event)
28 changes: 28 additions & 0 deletions aws_lambdas/daily_quiz/aws_lambda/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
from datetime import datetime
from typing import Optional, Sequence

from pydantic import BaseModel


class Game(BaseModel):
id: int
steam_id: int
name: str
kr_name: Optional[str]
released_at: datetime
genres: Sequence[str]
updated_at: datetime
created_at: datetime

def __hash__(self) -> int:
return hash(self.id)


class SaveGameScreenshot(BaseModel):
steam_file_id: int
url: str
game_id: int


class SaveQuiz(BaseModel):
screenshots: Sequence[SaveGameScreenshot]
26 changes: 26 additions & 0 deletions aws_lambdas/daily_quiz/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from collections.abc import Sequence

from pydantic_settings import BaseSettings, SettingsConfigDict


class Config(BaseSettings):
DATABASE_LAMBDA_NAME: str = "database"
DAILY_QUIZ_CNT: int = 5
GAME_GENERES: Sequence[str] = [
"Action",
"Adventure",
"Massively Multiplayer",
"Strategy",
"RPG",
"Indie",
"Simulation",
"Casual",
"Racing",
"Sports",
]
OLDER_GAME_COUNT: int = 2 # Newer game count will be `DAILY_QUIZ_CNT` - `OLDER_GAME_COUNT`

model_config = SettingsConfigDict(env_file=".daily_quiz.env", env_file_encoding="utf-8")


setting = Config() # type: ignore
2 changes: 2 additions & 0 deletions aws_lambdas/daily_quiz/daily_quiz/exception.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
class NotEnoughGamesError(Exception):
pass
90 changes: 90 additions & 0 deletions aws_lambdas/daily_quiz/daily_quiz/game_picker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import random
from collections import defaultdict
from collections.abc import Collection, Iterable, Sequence
from datetime import datetime

from ..aws_lambda.model import Game
from ..config import setting
from .exception import NotEnoughGamesError
from .utils import divide_randomly

GameGroup = list[Game]


def _categorize_games_by_genre(games: Iterable[Game], genres: Iterable[str]) -> list[GameGroup]:
categorized: dict[str, GameGroup] = defaultdict(list)

for game in games:
for genre in game.genres:
if genre not in genres:
continue

categorized[genre].append(game)

return list(categorized.values())


def _filter_older_games(games: Iterable[Game], threshold_released_at: datetime) -> list[Game]:
return [game for game in games if game.released_at <= threshold_released_at]


def _filter_newer_games(games: Iterable[Game], threshold_released_at: datetime) -> list[Game]:
return [game for game in games if game.released_at > threshold_released_at]


def _get_median_released_at(games: Iterable[Game]) -> datetime:
released_ats = [game.released_at for game in games]
released_ats.sort()
return released_ats[len(released_ats) // 2]


def _pick_older_newer_games(
categorized_games: Sequence[GameGroup], median_released_at: datetime
) -> tuple[list[GameGroup], list[GameGroup]]:
older_part, newer_part = divide_randomly(categorized_games, setting.OLDER_GAME_COUNT)

olders = [_filter_older_games(games, median_released_at) for games in older_part]
newers = [_filter_newer_games(games, median_released_at) for games in newer_part]

return olders, newers


def _pick_unique_per_category(categorized_games: Iterable[GameGroup]) -> set[Game]:
unique_games: set[Game] = set()

for games in sorted(categorized_games, key=len):
games_ = list(set(games) - unique_games)

if len(games_) == 0:
flat = [g for g in games for games in categorized_games]
raise NotEnoughGamesError(
f"게임의 수가 너무 적어 게임 선택 알고리즘을 작동할 수 없습니다. 지금까지 선발된 게임: {set(flat)}"
)

game = random.choice(games_)
unique_games.add(game)

return unique_games


def _validate_final_games(games: Collection, genres: Collection):
if len(games) != len(genres):
raise NotEnoughGamesError(
f"게임의 수가 너무 적어 게임 선택 알고리즘을 작동할 수 없습니다. 최종 선발된 게임: {games}"
)


def pick_games(
games: Iterable[Game],
genres: Sequence[str],
) -> set[Game]:
categorized_games = _categorize_games_by_genre(games, genres)

# 오래된 게임 / 최신 게임으로 분리
median_released_at = _get_median_released_at(games)
olders, newers = _pick_older_newer_games(categorized_games, median_released_at)

# 최종 게임 선발
final_games = _pick_unique_per_category(olders + newers)
_validate_final_games(final_games, genres)
return final_games
7 changes: 7 additions & 0 deletions aws_lambdas/daily_quiz/daily_quiz/genre_picker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
import random

from ..config import setting


def pick_genres(k: int) -> list[str]:
return random.sample(setting.GAME_GENERES, k=k)
8 changes: 8 additions & 0 deletions aws_lambdas/daily_quiz/daily_quiz/screenshot_scraper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from ..aws_lambda.model import Game, SaveGameScreenshot
from ..protocols import SteamAPI


def scrap_screenshots(steam_api: SteamAPI, game: Game):
scraped = steam_api.get_game_screenshots(game.steam_id)

return [SaveGameScreenshot(steam_file_id=s.file_id, url=s.full_image_url, game_id=game.id) for s in scraped]
35 changes: 35 additions & 0 deletions aws_lambdas/daily_quiz/daily_quiz/serivce.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from typing import Iterable

from ..aws_lambda.model import Game, SaveQuiz
from ..config import setting
from ..protocols import LambdaAPI, SteamAPI
from .game_picker import pick_games
from .genre_picker import pick_genres
from .screenshot_scraper import scrap_screenshots


def create_quizzes(steam_api: SteamAPI, games: Iterable[Game]) -> list[SaveQuiz]:
quizzes = []
for game in games:
# 스크린샷 크롤링
screenshots = scrap_screenshots(steam_api, game)
quizzes.append(SaveQuiz(screenshots=screenshots))

return quizzes


def new_daily_quizzes(lambda_api: LambdaAPI, steam_api: SteamAPI):
# 모든 게임 가져오기
all_games = lambda_api.get_all_games()

# 장르 선택
picked_genres = pick_genres(setting.DAILY_QUIZ_CNT)

# 게임 선택
picked_games = pick_games(all_games, picked_genres)

# 퀴즈 생성 (스크린샷 스크래핑함)
quizzes = create_quizzes(steam_api, picked_games)

# 저장
lambda_api.save_quizzes(quizzes)
16 changes: 16 additions & 0 deletions aws_lambdas/daily_quiz/daily_quiz/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import random
from typing import Sequence, TypeVar

T = TypeVar("T")


def divide_randomly(x: Sequence[T], k: int) -> tuple[list[T], list[T]]:
idxes = range(len(x))

a_idxes = random.sample(idxes, k=k)
b_idxes = list(set(idxes) - set(a_idxes))

a = [x[a_i] for a_i in a_idxes]
b = [x[b_i] for b_i in b_idxes]

return a, b
22 changes: 22 additions & 0 deletions aws_lambdas/daily_quiz/lambda_func.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from typing import Any

from . import protocols
from .aws_lambda.lambda_api import LambdaAPI
from .daily_quiz.serivce import new_daily_quizzes as new_daily_quizzes_
from .logger import logger
from .steam.steam_api import SteamAPI


def new_daily_quizzes(lambda_api: protocols.LambdaAPI, steam_api: protocols.SteamAPI):
logger.info("-- new daily quizzes job start --")

new_daily_quizzes_(lambda_api, steam_api)

logger.info("-- new daily quizzes job end --")


def lambda_handler(event: Any, context: Any):
lambda_api = LambdaAPI()
steam_api = SteamAPI()

new_daily_quizzes(lambda_api, steam_api)
File renamed without changes.
20 changes: 20 additions & 0 deletions aws_lambdas/daily_quiz/protocols.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from typing import Protocol, Sequence

from .aws_lambda.model import Game, SaveGameScreenshot, SaveQuiz
from .steam.model import SteamGameScreenshotResponse


class SteamAPI(Protocol):
def get_game_screenshots(self, app_id: int, page: int = 1) -> list[SteamGameScreenshotResponse]:
...


class LambdaAPI(Protocol):
def save_screenshots(self, screenshots: Sequence[SaveGameScreenshot]):
...

def save_quizzes(self, quizzes: Sequence[SaveQuiz]):
...

def get_all_games(self) -> list[Game]:
...
6 changes: 6 additions & 0 deletions aws_lambdas/daily_quiz/steam/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from pydantic import BaseModel


class SteamGameScreenshotResponse(BaseModel):
file_id: int
full_image_url: str
15 changes: 15 additions & 0 deletions aws_lambdas/daily_quiz/steam/steam_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from .. import protocols
from . import steampowered_api
from .model import SteamGameScreenshotResponse


class SteamAPI(protocols.SteamAPI):
def get_game_screenshots(self, app_id: int, page: int = 1) -> list[SteamGameScreenshotResponse]:
screenshots = steampowered_api.get_community_screenshots(app_id, page)

return [
SteamGameScreenshotResponse(
file_id=int(screenshot["published_file_id"]), full_image_url=screenshot["full_image_url"]
)
for screenshot in screenshots
]
Loading

0 comments on commit 36e5b04

Please sign in to comment.