diff --git a/.dockerignore b/.dockerignore index dccc6dc..dc007a5 100644 --- a/.dockerignore +++ b/.dockerignore @@ -120,7 +120,7 @@ celerybeat.pid *.sage.py # Environments -.env +# .env .venv env/ venv/ diff --git a/.env.example b/.env.example index 319088f..7026673 100644 --- a/.env.example +++ b/.env.example @@ -1,3 +1,2 @@ # required environment variables -MONGO_CONNECTION_URI=mongodb://localhost:27017 -MONGO_DB_NAME=council \ No newline at end of file +MONGO_CONNECTION_URI=mongodb://localhost:27017 \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index c21ce42..e8d5de7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.10 +FROM python:3.11 WORKDIR /src diff --git a/main.py b/main.py index b348080..d7a97d1 100644 --- a/main.py +++ b/main.py @@ -1,17 +1,19 @@ from fastapi import FastAPI, Request from dotenv import load_dotenv -from routers import scrapResult +from routers import scrapResult, commonInfo from contextlib import asynccontextmanager from typing import Dict from model import MongoDB from model.ResponseType import ChartResponse, SexInfo, PartyInfo, AgeInfo + @asynccontextmanager async def initMongo(app: FastAPI): - MongoDB.MongoDB().connect() + MongoDB.client.connect() yield - MongoDB.MongoDB().close() + MongoDB.client.close() + new = ChartResponse[SexInfo] @@ -19,3 +21,4 @@ async def initMongo(app: FastAPI): app.include_router(scrapResult.router) +app.include_router(commonInfo.router) diff --git a/model/BasicResponse.py b/model/BasicResponse.py new file mode 100644 index 0000000..f6523bb --- /dev/null +++ b/model/BasicResponse.py @@ -0,0 +1,16 @@ +from pydantic import BaseModel + + +SUCCESS = 200 +REGION_CODE_ERR = 400 + + +class MessageResponse(BaseModel): + message: str + code: int = SUCCESS + + +class ErrorResponse(BaseModel): + error: str + code: int + message: str diff --git a/model/CommonInfo.py b/model/CommonInfo.py new file mode 100644 index 0000000..b974a6d --- /dev/null +++ b/model/CommonInfo.py @@ -0,0 +1,17 @@ +from pydantic import BaseModel + + +class LocalInfo(BaseModel): + name: str + id: int + + +class RegionInfo(BaseModel): + name: str + id: int + local: list[LocalInfo] + + +class PartyInfo(BaseModel): + name: str + color: str diff --git a/model/MongoDB.py b/model/MongoDB.py index 7e87306..7228259 100644 --- a/model/MongoDB.py +++ b/model/MongoDB.py @@ -4,16 +4,20 @@ load_dotenv() + class MongoDB: def __init__(self): self.client = None - self.db = None - + self.council_db = None + self.district_db = None + def connect(self): self.client = AsyncIOMotorClient(os.getenv("MONGO_CONNECTION_URI")) - self.db = AsyncIOMotorDatabase(self.client, os.getenv("MONGO_DATABASE")) + self.council_db = AsyncIOMotorDatabase(self.client, "council") + self.district_db = AsyncIOMotorDatabase(self.client, "district") def close(self): self.client.close() + client = MongoDB() diff --git a/model/ScrapResult.py b/model/ScrapResult.py index 42ae96d..4f3f422 100644 --- a/model/ScrapResult.py +++ b/model/ScrapResult.py @@ -1,18 +1,79 @@ -from pydantic import BaseModel, Field +from pydantic import BaseModel from enum import StrEnum +from typing import TypeVar, Generic + +class SexType(StrEnum): + male = "남" + female = "여" + + +class FactorType(StrEnum): + sex = "sex" + age = "age" + party = "party" + + +# ============================================== +# = Template Data Types = +# ============================================== +class SexTemplateData(BaseModel): + sexDiversityIndex: float + + +class AgeTemplateData(BaseModel): + ageDiversityIndex: float + + +class PartyTemplateData(BaseModel): + partyDiversityIndex: float + + +# ============================================== +# = Chart Data Types = +# ============================================== +class SexChartDataPoint(BaseModel): + sex: SexType + count: int + + + + +class AgeChartDataPoint(BaseModel): + minAge: int # 닫힌 구간 + maxAge: int # 닫힌 구간 + count: int + + + +class PartyChartDataPoint(BaseModel): + party: str + count: int + + +T = TypeVar("T", SexChartDataPoint, AgeChartDataPoint, PartyChartDataPoint) + +class ChartData(BaseModel, Generic[T]): + data: list[T] + + +# ============================================== +# = Scrap Result Data Types = +# ============================================== class CouncilType(StrEnum): local_council = "local_council" national_council = "national_council" metropolitan_council = "metropolitan_council" - local_leader= "local_leader" + local_leader = "local_leader" metro_leader = "metro_leader" + class CouncilInfo(BaseModel): - name : str + name: str party: str + class ScrapResult(BaseModel): - council_id : str - council_type : CouncilType - councilers : list[CouncilInfo] \ No newline at end of file + council_id: str + council_type: CouncilType + councilers: list[CouncilInfo] diff --git a/routers/commonInfo.py b/routers/commonInfo.py new file mode 100644 index 0000000..1df08f5 --- /dev/null +++ b/routers/commonInfo.py @@ -0,0 +1,39 @@ +from fastapi import APIRouter +from model import MongoDB, CommonInfo + +router = APIRouter(prefix="/localCouncil", tags=["localCouncil"]) + + +@router.get("/regionInfo") +async def getRegionInfo() -> list[CommonInfo.RegionInfo]: + regions = [] + async for metro in MongoDB.client.district_db.get_collection( + "metro_district" + ).find(): + local_districts = [] + async for local in MongoDB.client.district_db.get_collection( + "local_district" + ).find({"metro_id": metro["metro_id"]}): + local_districts.append({"name": local["name_ko"], "id": local["local_id"]}) + regions.append( + CommonInfo.RegionInfo.model_validate( + { + "name": metro["name_ko"], + "id": metro["metro_id"], + "local": local_districts, + } + ) + ) + return regions + + +@router.get("/partyInfo") +async def getPartyInfo() -> list[CommonInfo.PartyInfo]: + parties = [] + async for party in MongoDB.client.district_db.get_collection("party").find(): + parties.append( + CommonInfo.PartyInfo.model_validate( + {"name": party["name"], "color": party["color"]} + ) + ) + return parties diff --git a/routers/scrapResult.py b/routers/scrapResult.py index 2d12937..6687684 100644 --- a/routers/scrapResult.py +++ b/routers/scrapResult.py @@ -1,37 +1,115 @@ from fastapi import APIRouter +from model import BasicResponse, MongoDB, ScrapResult +from utils import diversity +from typing import TypeVar -from model.ResponseType import * - -router = APIRouter(prefix="/localCouncil", tags=["localCouncil"]) - -@router.get("/regionInfo", response_model=RegionInfo) -async def getRegionInfo(): - try: - return [] - except Exception as e: - print(e) - return [] - -@router.get("/partyInfo", response_model=PartyInfo) -async def getPartyInfo(): - try: - return [] - except Exception as e: - print(e) - return [] - -@router.get("/template-data/{metroId}/{localId}/{factor}", response_model=Diversity) -async def getTemplateData(metroId: int, localId: int, factor: str): - try: - return [] - except Exception as e: - print(e) - return [] - -@router.get("/chart-data/{metroId}/{localId}/{factor}", response_model=ChartResponse) -async def getTemplateData(metroId: int, localId: int, factor: str): - try: - return [] - except Exception as e: - print(e) - return [] \ No newline at end of file + +router = APIRouter("/localCouncil", tags=["localCouncil"]) + +AGE_STAIR = 10 + + +@router.get("/template-data/{metroId}/{localId}") +async def getLocalTemplateData( + metroId: int, localId: int, factor: ScrapResult.FactorType +) -> BasicResponse.ErrorResponse | ScrapResult.SexTemplateData | ScrapResult.AgeTemplateData | ScrapResult.PartyTemplateData: + if ( + await MongoDB.client.district_db["local_district"].find_one( + {"local_id": localId, "metro_id": metroId} + ) + is None + ): + return BasicResponse.ErrorResponse.model_validate( + { + "error": "RegionCodeError", + "code": BasicResponse.REGION_CODE_ERR, + "message": f"No local district with metroId {metroId} and localId {localId}.", + } + ) + + councilors = MongoDB.client.council_db["local_councilor"].find( + {"local_id": localId} + ) + + match factor: + case ScrapResult.FactorType.sex: + sex_list = [councilor["sex"] async for councilor in councilors] + sex_diversity_index = diversity.gini_simpson(sex_list) + return ScrapResult.SexTemplateData.model_validate( + {"sexDiversityIndex": sex_diversity_index} + ) + + case ScrapResult.FactorType.age: + age_list = [councilor["age"] async for councilor in councilors] + age_diversity_index = diversity.gini_simpson(age_list, stair=AGE_STAIR) + return ScrapResult.AgeTemplateData.model_validate( + {"ageDiversityIndex": age_diversity_index} + ) + + case ScrapResult.FactorType.party: + party_list = [councilor["party"] async for councilor in councilors] + party_diversity_index = diversity.gini_simpson(party_list) + return ScrapResult.PartyTemplateData.model_validate( + {"partyDiversityIndex": party_diversity_index} + ) + + +T = TypeVar("T", ScrapResult.SexChartData, ScrapResult.AgeChartData, ScrapResult.PartyChartData) + +@router.get("/chart-data/{metroId}/{localId}") +async def getLocalChartData( + metroId: int, localId: int, factor: ScrapResult.FactorType +) -> BasicResponse.ErrorResponse | ScrapResult.ChartData[T]: + if ( + await MongoDB.client.district_db["local_district"].find_one( + {"local_id": localId, "metro_id": metroId} + ) + is None + ): + return BasicResponse.ErrorResponse.model_validate( + { + "error": "RegionCodeError", + "code": BasicResponse.REGION_CODE_ERR, + "message": f"No local district with metroId {metroId} and localId {localId}.", + } + ) + + councilors = MongoDB.client.council_db["local_councilor"].find( + {"local_id": localId} + ) + + match factor: + case ScrapResult.FactorType.sex: + sex_list = [councilor["sex"] async for councilor in councilors] + sex_count = diversity.count(sex_list) + return ScrapResult.ChartData[ScrapResult.SexChartDataPoint].model_validate( + {"data": [{"sex": sex, "count": sex_count[sex]} for sex in sex_count]} + ) + + case ScrapResult.FactorType.age: + age_list = [councilor["age"] async for councilor in councilors] + age_count = diversity.count(age_list, stair=AGE_STAIR) + return ScrapResult.ChartData[ScrapResult.AgeChartDataPoint].model_validate( + { + "data": [ + { + "minAge": age, + "maxAge": age + AGE_STAIR - 1, + "count": age_count[age], + } + for age in age_count + ] + } + ) + + case ScrapResult.FactorType.party: + party_list = [councilor["party"] async for councilor in councilors] + party_count = diversity.count(party_list) + return ScrapResult.ChartData[ScrapResult.PartyChartDataPoint].model_validate( + { + "data": [ + {"party": party, "count": party_count[party]} + for party in party_count + ] + } + ) \ No newline at end of file diff --git a/utils/diversity.py b/utils/diversity.py index f118107..95536e0 100644 --- a/utils/diversity.py +++ b/utils/diversity.py @@ -1,27 +1,45 @@ from collections import Counter import math -def count(data, stair = 0): + +def count(data, stair=0): """ - Count the number of occurrences of each value in a dataset + Returns a counter object of the data, while stairing them to appropriate bins if stair > 0 """ - counts = Counter() - for row in data: - counts[row[stair]] += 1 - return counts + if stair > 0: + if isinstance(data[0], str): + raise TypeError("stair is not defined for string data") + data = [math.floor(d / stair) * stair for d in data] + return Counter(data) + -def gini_simpson(data, stair, opts): +def gini_simpson(data, stair=0, opts=True): """ Gini-Simpson diversity index """ counts = count(data, stair) total = sum(counts.values()) - return 1 - sum((n / total) ** 2 for n in counts.values()) + gs_idx = 1 - sum((n / total) ** 2 for n in counts.values()) + + if opts: + num_cats = len([c for c in counts.values() if c > 0]) + max_gs_idx = (num_cats - 1) / num_cats * total / (total - 1) + gs_idx /= max_gs_idx + + return gs_idx -def shannon(data, stair, opts): + +def shannon(data, stair=0, opts=True): """ Shannon diversity index """ counts = count(data, stair) total = sum(counts.values()) - return -sum((n / total) * math.log(n / total) for n in counts.values()) \ No newline at end of file + sh_idx = -sum((n / total) * math.log(n / total) for n in counts.values()) + + if opts: + num_cats = len([c for c in counts.values() if c > 0]) + max_sh_idx = math.log(num_cats) + sh_idx /= max_sh_idx + + return sh_idx