From a8debc74e09e6db72c449f68e0726b8fca6b08b2 Mon Sep 17 00:00:00 2001 From: pingpingy1 Date: Wed, 15 Nov 2023 20:53:40 +0900 Subject: [PATCH 1/7] [feat] modify diversity index functions --- utils/diversity.py | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/utils/diversity.py b/utils/diversity.py index 3449830..c2be88c 100644 --- a/utils/diversity.py +++ b/utils/diversity.py @@ -1,21 +1,45 @@ from collections import Counter import math -def count(data, stair = 0): + +def count(data, stair=0): + """ + Returns a counter object of the data, while stairing them to appropriate bins if stair > 0 + """ + if stair > 0: + if isinstance(data[0], str): + raise TypeError("stair is not defined for string data") + data = [math.floor(d / stair) * stair for d in data] return Counter(data) -def gini_simpson(data, stair, opts): + +def gini_simpson(data, stair=0, opts=False): """ Gini-Simpson diversity index """ counts = count(data, stair) total = sum(counts.values()) - return 1 - sum((n / total) ** 2 for n in counts.values()) + gs_idx = 1 - sum((n / total) ** 2 for n in counts.values()) + + if opts: + num_cats = len([c for c in counts.values() if c > 0]) + max_gs_idx = (num_cats - 1) / num_cats * total / (total - 1) + gs_idx /= max_gs_idx -def shannon(data, stair, opts): + return gs_idx + + +def shannon(data, stair=0, opts=False): """ Shannon diversity index """ counts = count(data, stair) total = sum(counts.values()) - return -sum((n / total) * math.log(n / total) for n in counts.values()) \ No newline at end of file + sh_idx = -sum((n / total) * math.log(n / total) for n in counts.values()) + + if opts: + num_cats = len([c for c in counts.values() if c > 0]) + max_sh_idx = math.log(num_cats) + sh_idx /= max_sh_idx + + return sh_idx From 25982ed19786afb916ff185789fd4b0a52087e42 Mon Sep 17 00:00:00 2001 From: pingpingy1 Date: Fri, 17 Nov 2023 02:00:58 +0900 Subject: [PATCH 2/7] [feat] modify Docker environment --- .dockerignore | 2 +- .env.example | 3 +-- Dockerfile | 2 +- model/MongoDB.py | 10 +++++++--- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/.dockerignore b/.dockerignore index dccc6dc..dc007a5 100644 --- a/.dockerignore +++ b/.dockerignore @@ -120,7 +120,7 @@ celerybeat.pid *.sage.py # Environments -.env +# .env .venv env/ venv/ diff --git a/.env.example b/.env.example index 319088f..7026673 100644 --- a/.env.example +++ b/.env.example @@ -1,3 +1,2 @@ # required environment variables -MONGO_CONNECTION_URI=mongodb://localhost:27017 -MONGO_DB_NAME=council \ No newline at end of file +MONGO_CONNECTION_URI=mongodb://localhost:27017 \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index c21ce42..e8d5de7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.10 +FROM python:3.11 WORKDIR /src diff --git a/model/MongoDB.py b/model/MongoDB.py index 7e87306..7228259 100644 --- a/model/MongoDB.py +++ b/model/MongoDB.py @@ -4,16 +4,20 @@ load_dotenv() + class MongoDB: def __init__(self): self.client = None - self.db = None - + self.council_db = None + self.district_db = None + def connect(self): self.client = AsyncIOMotorClient(os.getenv("MONGO_CONNECTION_URI")) - self.db = AsyncIOMotorDatabase(self.client, os.getenv("MONGO_DATABASE")) + self.council_db = AsyncIOMotorDatabase(self.client, "council") + self.district_db = AsyncIOMotorDatabase(self.client, "district") def close(self): self.client.close() + client = MongoDB() From 2ffd8726ce6db4b63f98c9600f5331d3e3263204 Mon Sep 17 00:00:00 2001 From: pingpingy1 Date: Fri, 17 Nov 2023 02:07:29 +0900 Subject: [PATCH 3/7] =?UTF-8?q?[feat]=20=EC=A7=80=EC=97=AD,=20=EC=A0=95?= =?UTF-8?q?=EB=8B=B9=20=EC=A0=95=EB=B3=B4=20=EC=A0=9C=EA=B3=B5=20API=20?= =?UTF-8?q?=EC=9E=91=EC=84=B1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- main.py | 9 ++++++--- model/CommonInfo.py | 17 +++++++++++++++++ routers/commonInfo.py | 39 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+), 3 deletions(-) create mode 100644 model/CommonInfo.py create mode 100644 routers/commonInfo.py diff --git a/main.py b/main.py index e80f472..14ccd62 100644 --- a/main.py +++ b/main.py @@ -1,16 +1,19 @@ from fastapi import FastAPI, Request from dotenv import load_dotenv -from routers import scrapResult +from routers import scrapResult, commonInfo from contextlib import asynccontextmanager from typing import Dict from model import MongoDB + @asynccontextmanager async def initMongo(app: FastAPI): - MongoDB.MongoDB().connect() + MongoDB.client.connect() yield - MongoDB.MongoDB().close() + MongoDB.client.close() + app = FastAPI(lifespan=initMongo, responses={404: {"description": "Not found"}}) app.include_router(scrapResult.router) +app.include_router(commonInfo.router) diff --git a/model/CommonInfo.py b/model/CommonInfo.py new file mode 100644 index 0000000..b974a6d --- /dev/null +++ b/model/CommonInfo.py @@ -0,0 +1,17 @@ +from pydantic import BaseModel + + +class LocalInfo(BaseModel): + name: str + id: int + + +class RegionInfo(BaseModel): + name: str + id: int + local: list[LocalInfo] + + +class PartyInfo(BaseModel): + name: str + color: str diff --git a/routers/commonInfo.py b/routers/commonInfo.py new file mode 100644 index 0000000..cb41cbf --- /dev/null +++ b/routers/commonInfo.py @@ -0,0 +1,39 @@ +from fastapi import APIRouter +from model import MongoDB, CommonInfo + +router = APIRouter() + + +@router.get("/localCouncil/regionInfo") +async def getRegionInfo() -> list[CommonInfo.RegionInfo]: + regions = [] + async for metro in MongoDB.client.district_db.get_collection( + "metro_district" + ).find(): + local_districts = [] + async for local in MongoDB.client.district_db.get_collection( + "local_district" + ).find({"metro_id": metro["metro_id"]}): + local_districts.append({"name": local["name_ko"], "id": local["local_id"]}) + regions.append( + CommonInfo.RegionInfo.model_validate( + { + "name": metro["name_ko"], + "id": metro["metro_id"], + "local": local_districts, + } + ) + ) + return regions + + +@router.get("/localCouncil/partyInfo") +async def getPartyInfo() -> list[CommonInfo.PartyInfo]: + parties = [] + async for party in MongoDB.client.district_db.get_collection("party").find(): + parties.append( + CommonInfo.PartyInfo.model_validate( + {"name": party["name"], "color": party["color"]} + ) + ) + return parties From 240b39306486b20ddbd4f519e5dedc2075a3ed56 Mon Sep 17 00:00:00 2001 From: pingpingy1 Date: Fri, 17 Nov 2023 02:08:52 +0900 Subject: [PATCH 4/7] =?UTF-8?q?[feat]=20=ED=85=9C=ED=94=8C=EB=A6=BF,=20?= =?UTF-8?q?=EC=B0=A8=ED=8A=B8=20=EB=8D=B0=EC=9D=B4=ED=84=B0=20=EC=A0=9C?= =?UTF-8?q?=EA=B3=B5=20API=20=EC=9E=91=EC=84=B1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- model/BasicResponse.py | 16 ++++++ model/ScrapResult.py | 77 +++++++++++++++++++++++++--- routers/scrapResult.py | 114 ++++++++++++++++++++++++++++++++++++++--- utils/diversity.py | 4 +- 4 files changed, 195 insertions(+), 16 deletions(-) create mode 100644 model/BasicResponse.py diff --git a/model/BasicResponse.py b/model/BasicResponse.py new file mode 100644 index 0000000..f6523bb --- /dev/null +++ b/model/BasicResponse.py @@ -0,0 +1,16 @@ +from pydantic import BaseModel + + +SUCCESS = 200 +REGION_CODE_ERR = 400 + + +class MessageResponse(BaseModel): + message: str + code: int = SUCCESS + + +class ErrorResponse(BaseModel): + error: str + code: int + message: str diff --git a/model/ScrapResult.py b/model/ScrapResult.py index 25a1220..6f270ff 100644 --- a/model/ScrapResult.py +++ b/model/ScrapResult.py @@ -1,18 +1,81 @@ -from pydantic import BaseModel, Field -from enum import Enum, StrEnum +from pydantic import BaseModel +from enum import StrEnum + +class SexType(StrEnum): + male = "남" + female = "여" + + +class FactorType(StrEnum): + sex = "sex" + age = "age" + party = "party" + + +# ============================================== +# = Template Data Types = +# ============================================== +class SexTemplateData(BaseModel): + sexDiversityIndex: float + + +class AgeTemplateData(BaseModel): + ageDiversityIndex: float + + +class PartyTemplateData(BaseModel): + partyDiversityIndex: float + + +# ============================================== +# = Chart Data Types = +# ============================================== +class SexChartDataPoint(BaseModel): + sex: SexType + count: int + + +class SexChartData(BaseModel): + data: list[SexChartDataPoint] + + +class AgeChartDataPoint(BaseModel): + minAge: int # 닫힌 구간 + maxAge: int # 닫힌 구간 + count: int + + +class AgeChartData(BaseModel): + data: list[AgeChartDataPoint] + + +class PartyChartDataPoint(BaseModel): + party: str + count: int + + +class PartyChartData(BaseModel): + data: list[PartyChartDataPoint] + + +# ============================================== +# = Scrap Result Data Types = +# ============================================== class CouncilType(StrEnum): local_council = "local_council" national_council = "national_council" metropolitan_council = "metropolitan_council" - local_leader= "local_leader" + local_leader = "local_leader" metro_leader = "metro_leader" + class CouncilInfo(BaseModel): - name : str + name: str party: str + class ScrapResult(BaseModel): - council_id : str - council_type : CouncilType - councilers : list[CouncilInfo] \ No newline at end of file + council_id: str + council_type: CouncilType + councilers: list[CouncilInfo] diff --git a/routers/scrapResult.py b/routers/scrapResult.py index 4d5f4a2..a07d55c 100644 --- a/routers/scrapResult.py +++ b/routers/scrapResult.py @@ -1,11 +1,111 @@ from fastapi import APIRouter +from model import BasicResponse, MongoDB, ScrapResult +from utils import diversity router = APIRouter() -@router.get("/") -async def getScrapResult(): - try: - return {"message": "No World"} - except Exception as e: - print(e) - return {"message": "Error"} \ No newline at end of file +AGE_STAIR = 10 + + +@router.get("/localCouncil/template-data/{metroId}/{localId}") +async def getLocalTemplateData( + metroId: int, localId: int, factor: ScrapResult.FactorType +) -> BasicResponse.ErrorResponse | ScrapResult.SexTemplateData | ScrapResult.AgeTemplateData | ScrapResult.PartyTemplateData: + if ( + await MongoDB.client.district_db["local_district"].find_one( + {"local_id": localId, "metro_id": metroId} + ) + is None + ): + return BasicResponse.ErrorResponse.model_validate( + { + "error": "RegionCodeError", + "code": BasicResponse.REGION_CODE_ERR, + "message": f"No local district with metroId {metroId} and localId {localId}.", + } + ) + + councilors = MongoDB.client.council_db["local_councilor"].find( + {"local_id": localId} + ) + + match factor: + case ScrapResult.FactorType.sex: + sex_list = [councilor["sex"] async for councilor in councilors] + sex_diversity_index = diversity.gini_simpson(sex_list) + return ScrapResult.SexTemplateData.model_validate( + {"sexDiversityIndex": sex_diversity_index} + ) + + case ScrapResult.FactorType.age: + age_list = [councilor["age"] async for councilor in councilors] + age_diversity_index = diversity.gini_simpson(age_list, stair=AGE_STAIR) + return ScrapResult.AgeTemplateData.model_validate( + {"ageDiversityIndex": age_diversity_index} + ) + + case ScrapResult.FactorType.party: + party_list = [councilor["party"] async for councilor in councilors] + party_diversity_index = diversity.gini_simpson(party_list) + return ScrapResult.PartyTemplateData.model_validate( + {"partyDiversityIndex": party_diversity_index} + ) + + +@router.get("/localCouncil/chart-data/{metroId}/{localId}") +async def getLocalChartData( + metroId: int, localId: int, factor: ScrapResult.FactorType +) -> BasicResponse.ErrorResponse | ScrapResult.SexChartData | ScrapResult.AgeChartData | ScrapResult.PartyChartData: + if ( + await MongoDB.client.district_db["local_district"].find_one( + {"local_id": localId, "metro_id": metroId} + ) + is None + ): + return BasicResponse.ErrorResponse.model_validate( + { + "error": "RegionCodeError", + "code": BasicResponse.REGION_CODE_ERR, + "message": f"No local district with metroId {metroId} and localId {localId}.", + } + ) + + councilors = MongoDB.client.council_db["local_councilor"].find( + {"local_id": localId} + ) + + match factor: + case ScrapResult.FactorType.sex: + sex_list = [councilor["sex"] async for councilor in councilors] + sex_count = diversity.count(sex_list) + return ScrapResult.SexChartData.model_validate( + {"data": [{"sex": sex, "count": sex_count[sex]} for sex in sex_count]} + ) + + case ScrapResult.FactorType.age: + age_list = [councilor["age"] async for councilor in councilors] + age_count = diversity.count(age_list, stair=AGE_STAIR) + return ScrapResult.AgeChartData.model_validate( + { + "data": [ + { + "minAge": age, + "maxAge": age + AGE_STAIR - 1, + "count": age_count[age], + } + for age in age_count + ] + } + ) + + case ScrapResult.FactorType.party: + party_list = [councilor["party"] async for councilor in councilors] + party_count = diversity.count(party_list) + return ScrapResult.PartyChartData.model_validate( + { + "data": [ + {"party": party, "count": party_count[party]} + for party in party_count + ] + } + ) diff --git a/utils/diversity.py b/utils/diversity.py index c2be88c..95536e0 100644 --- a/utils/diversity.py +++ b/utils/diversity.py @@ -13,7 +13,7 @@ def count(data, stair=0): return Counter(data) -def gini_simpson(data, stair=0, opts=False): +def gini_simpson(data, stair=0, opts=True): """ Gini-Simpson diversity index """ @@ -29,7 +29,7 @@ def gini_simpson(data, stair=0, opts=False): return gs_idx -def shannon(data, stair=0, opts=False): +def shannon(data, stair=0, opts=True): """ Shannon diversity index """ From 54c7603c181c92b7efb4b0ac5e256a0744d44f05 Mon Sep 17 00:00:00 2001 From: happycastle <41810556+happycastle114@users.noreply.github.com> Date: Fri, 17 Nov 2023 05:26:57 +0900 Subject: [PATCH 5/7] =?UTF-8?q?Generic=20=EB=8F=84=EC=9E=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- model/ScrapResult.py | 12 +++++------- routers/scrapResult.py | 5 ++++- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/model/ScrapResult.py b/model/ScrapResult.py index 6f270ff..4f3f422 100644 --- a/model/ScrapResult.py +++ b/model/ScrapResult.py @@ -1,5 +1,6 @@ from pydantic import BaseModel from enum import StrEnum +from typing import TypeVar, Generic class SexType(StrEnum): @@ -36,8 +37,6 @@ class SexChartDataPoint(BaseModel): count: int -class SexChartData(BaseModel): - data: list[SexChartDataPoint] class AgeChartDataPoint(BaseModel): @@ -46,17 +45,16 @@ class AgeChartDataPoint(BaseModel): count: int -class AgeChartData(BaseModel): - data: list[AgeChartDataPoint] - class PartyChartDataPoint(BaseModel): party: str count: int -class PartyChartData(BaseModel): - data: list[PartyChartDataPoint] +T = TypeVar("T", SexChartDataPoint, AgeChartDataPoint, PartyChartDataPoint) + +class ChartData(BaseModel, Generic[T]): + data: list[T] # ============================================== diff --git a/routers/scrapResult.py b/routers/scrapResult.py index a07d55c..4309471 100644 --- a/routers/scrapResult.py +++ b/routers/scrapResult.py @@ -1,6 +1,7 @@ from fastapi import APIRouter from model import BasicResponse, MongoDB, ScrapResult from utils import diversity +from typing import TypeVar router = APIRouter() @@ -52,10 +53,12 @@ async def getLocalTemplateData( ) +T = TypeVar("T", ScrapResult.SexChartData, ScrapResult.AgeChartData, ScrapResult.PartyChartData) + @router.get("/localCouncil/chart-data/{metroId}/{localId}") async def getLocalChartData( metroId: int, localId: int, factor: ScrapResult.FactorType -) -> BasicResponse.ErrorResponse | ScrapResult.SexChartData | ScrapResult.AgeChartData | ScrapResult.PartyChartData: +) -> BasicResponse.ErrorResponse | ScrapResult.ChartData[T]: if ( await MongoDB.client.district_db["local_district"].find_one( {"local_id": localId, "metro_id": metroId} From 0623512ea67638ea6913c17a14a03518ec955204 Mon Sep 17 00:00:00 2001 From: happycastle <41810556+happycastle114@users.noreply.github.com> Date: Fri, 17 Nov 2023 05:28:35 +0900 Subject: [PATCH 6/7] Add: Prefix --- routers/commonInfo.py | 6 +++--- routers/scrapResult.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/routers/commonInfo.py b/routers/commonInfo.py index cb41cbf..1df08f5 100644 --- a/routers/commonInfo.py +++ b/routers/commonInfo.py @@ -1,10 +1,10 @@ from fastapi import APIRouter from model import MongoDB, CommonInfo -router = APIRouter() +router = APIRouter(prefix="/localCouncil", tags=["localCouncil"]) -@router.get("/localCouncil/regionInfo") +@router.get("/regionInfo") async def getRegionInfo() -> list[CommonInfo.RegionInfo]: regions = [] async for metro in MongoDB.client.district_db.get_collection( @@ -27,7 +27,7 @@ async def getRegionInfo() -> list[CommonInfo.RegionInfo]: return regions -@router.get("/localCouncil/partyInfo") +@router.get("/partyInfo") async def getPartyInfo() -> list[CommonInfo.PartyInfo]: parties = [] async for party in MongoDB.client.district_db.get_collection("party").find(): diff --git a/routers/scrapResult.py b/routers/scrapResult.py index 4309471..cdcfc4a 100644 --- a/routers/scrapResult.py +++ b/routers/scrapResult.py @@ -3,12 +3,12 @@ from utils import diversity from typing import TypeVar -router = APIRouter() +router = APIRouter("/localCouncil", tags=["localCouncil"]) AGE_STAIR = 10 -@router.get("/localCouncil/template-data/{metroId}/{localId}") +@router.get("/template-data/{metroId}/{localId}") async def getLocalTemplateData( metroId: int, localId: int, factor: ScrapResult.FactorType ) -> BasicResponse.ErrorResponse | ScrapResult.SexTemplateData | ScrapResult.AgeTemplateData | ScrapResult.PartyTemplateData: @@ -55,7 +55,7 @@ async def getLocalTemplateData( T = TypeVar("T", ScrapResult.SexChartData, ScrapResult.AgeChartData, ScrapResult.PartyChartData) -@router.get("/localCouncil/chart-data/{metroId}/{localId}") +@router.get("/chart-data/{metroId}/{localId}") async def getLocalChartData( metroId: int, localId: int, factor: ScrapResult.FactorType ) -> BasicResponse.ErrorResponse | ScrapResult.ChartData[T]: From e68e603b07d63e04548c576f1dbdbbc0caebad08 Mon Sep 17 00:00:00 2001 From: happycastle <41810556+happycastle114@users.noreply.github.com> Date: Fri, 17 Nov 2023 05:58:50 +0900 Subject: [PATCH 7/7] Remove Undeclared Types --- routers/scrapResult.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/routers/scrapResult.py b/routers/scrapResult.py index cdcfc4a..3cc33b1 100644 --- a/routers/scrapResult.py +++ b/routers/scrapResult.py @@ -81,14 +81,14 @@ async def getLocalChartData( case ScrapResult.FactorType.sex: sex_list = [councilor["sex"] async for councilor in councilors] sex_count = diversity.count(sex_list) - return ScrapResult.SexChartData.model_validate( + return ScrapResult.ChartData[ScrapResult.SexChartDataPoint].model_validate( {"data": [{"sex": sex, "count": sex_count[sex]} for sex in sex_count]} ) case ScrapResult.FactorType.age: age_list = [councilor["age"] async for councilor in councilors] age_count = diversity.count(age_list, stair=AGE_STAIR) - return ScrapResult.AgeChartData.model_validate( + return ScrapResult.ChartData[ScrapResult.AgeChartDataPoint].model_validate( { "data": [ { @@ -104,7 +104,7 @@ async def getLocalChartData( case ScrapResult.FactorType.party: party_list = [councilor["party"] async for councilor in councilors] party_count = diversity.count(party_list) - return ScrapResult.PartyChartData.model_validate( + return ScrapResult.ChartData[ScrapResult.PartyChartDataPoint].model_validate( { "data": [ {"party": party, "count": party_count[party]}