From 6efeee3ba84f729b3b428d6592a1f498aa5fcfed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1niel=20B=2E=20Papp?= Date: Thu, 23 Feb 2023 11:29:15 -0500 Subject: [PATCH 01/17] chore(docs): Updated docs generation --- .github/workflows/docs.yml | 23 + {html => docs}/emse-mms/api.html | 0 {html => docs}/emse-mms/app.html | 16 +- {html => docs}/emse-mms/db/config.html | 0 {html => docs}/emse-mms/db/index.html | 0 {html => docs}/emse-mms/index.html | 4 +- .../emse-mms}/recommend.html | 102 +++- {html => docs}/emse-mms/utils/db.html | 0 docs/emse-mms/utils/fetch.html | 449 ++++++++++++++++++ {html => docs}/emse-mms/utils/helper.html | 0 {html => docs}/emse-mms/utils/index.html | 5 + {html => docs}/emse-mms/utils/recs.html | 0 {html => docs}/emse-mms/utils/response.html | 0 {html => docs}/emse-mms/utils/sdl.html | 0 {html => docs}/emse-mms/utils/seed.html | 125 +++-- {html => docs}/emse-mms/utils/similarity.html | 0 html/emse-mms/endpoints/index.html | 65 --- 17 files changed, 637 insertions(+), 152 deletions(-) create mode 100644 .github/workflows/docs.yml rename {html => docs}/emse-mms/api.html (100%) rename {html => docs}/emse-mms/app.html (97%) rename {html => docs}/emse-mms/db/config.html (100%) rename {html => docs}/emse-mms/db/index.html (100%) rename {html => docs}/emse-mms/index.html (96%) rename {html/emse-mms/endpoints => docs/emse-mms}/recommend.html (67%) rename {html => docs}/emse-mms/utils/db.html (100%) create mode 100644 docs/emse-mms/utils/fetch.html rename {html => docs}/emse-mms/utils/helper.html (100%) rename {html => docs}/emse-mms/utils/index.html (97%) rename {html => docs}/emse-mms/utils/recs.html (100%) rename {html => docs}/emse-mms/utils/response.html (100%) rename {html => docs}/emse-mms/utils/sdl.html (100%) rename {html => docs}/emse-mms/utils/seed.html (97%) rename {html => docs}/emse-mms/utils/similarity.html (100%) delete mode 100644 html/emse-mms/endpoints/index.html diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..2addde6 --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,23 @@ +name: Build and Deploy Docs + +on: + push: + branches: [ 'dev', 'master', 'ALMP-**' ] + +jobs: + docs: + runs-on: ubuntu-latest + name: Create Docs + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.9.13' + cache: 'pip' # caching pip dependencies + - run: pip install -r requirements.txt + - name: Generate docs + run: pdoc --html --output-dir docs --force . + - name: Deploy docs + uses: JamesIves/github-pages-deploy-action@v4 + with: + folder: docs \ No newline at end of file diff --git a/html/emse-mms/api.html b/docs/emse-mms/api.html similarity index 100% rename from html/emse-mms/api.html rename to docs/emse-mms/api.html diff --git a/html/emse-mms/app.html b/docs/emse-mms/app.html similarity index 97% rename from html/emse-mms/app.html rename to docs/emse-mms/app.html index bf88cb8..504dc1c 100644 --- a/html/emse-mms/app.html +++ b/docs/emse-mms/app.html @@ -46,16 +46,17 @@

Module emse-mms.app

db = Seeder( skip=[Skipper.all], - cleanup=[Skipper.all] + cleanup=[Skipper.all], + iterations=100 ) await db.seedAll() await db.cleanupAll() + await getUserProfile(ID="63da9e40020a625cc55f64c5") exit(0) # seedModuleModel() # seedDbFeedback() - # getReviews(userID="63da9e40020a625cc55f64c5") def getReviews(userID): @@ -92,7 +93,7 @@

Module emse-mms.app

'id': ID } ) - return account.dict() + print(account) if __name__ == '__main__': @@ -158,7 +159,7 @@

Functions

'id': ID } ) - return account.dict() + print(account)
@@ -180,16 +181,17 @@

Functions

db = Seeder( skip=[Skipper.all], - cleanup=[Skipper.all] + cleanup=[Skipper.all], + iterations=100 ) await db.seedAll() await db.cleanupAll() + await getUserProfile(ID="63da9e40020a625cc55f64c5") exit(0) # seedModuleModel() - # seedDbFeedback() - # getReviews(userID="63da9e40020a625cc55f64c5")
+ # seedDbFeedback() diff --git a/html/emse-mms/db/config.html b/docs/emse-mms/db/config.html similarity index 100% rename from html/emse-mms/db/config.html rename to docs/emse-mms/db/config.html diff --git a/html/emse-mms/db/index.html b/docs/emse-mms/db/index.html similarity index 100% rename from html/emse-mms/db/index.html rename to docs/emse-mms/db/index.html diff --git a/html/emse-mms/index.html b/docs/emse-mms/index.html similarity index 96% rename from html/emse-mms/index.html rename to docs/emse-mms/index.html index 3ac5a67..cf321a3 100644 --- a/html/emse-mms/index.html +++ b/docs/emse-mms/index.html @@ -38,7 +38,7 @@

Sub-modules

-
emse-mms.endpoints
+
emse-mms.recommend
@@ -66,7 +66,7 @@

Index

  • emse-mms.api
  • emse-mms.app
  • emse-mms.db
  • -
  • emse-mms.endpoints
  • +
  • emse-mms.recommend
  • emse-mms.utils
  • diff --git a/html/emse-mms/endpoints/recommend.html b/docs/emse-mms/recommend.html similarity index 67% rename from html/emse-mms/endpoints/recommend.html rename to docs/emse-mms/recommend.html index 8023503..69b88bd 100644 --- a/html/emse-mms/endpoints/recommend.html +++ b/docs/emse-mms/recommend.html @@ -4,7 +4,7 @@ -emse-mms.endpoints.recommend API documentation +emse-mms.recommend API documentation @@ -19,22 +19,58 @@
    -

    Module emse-mms.endpoints.recommend

    +

    Module emse-mms.recommend

    Expand source code -
    from typing import Union
    -from fastapi import FastAPI
    +
    # get a set of modules the user has reviewed
    +# get the rating by the user for each module
    +# multiply the rating by the weight of the module
    +# divide the sum of the weighted ratings by the sum of the ratings
    +import asyncio
     
    -app = FastAPI()
    +import pandas as pd
     
    +from prisma import Prisma
     
    -@app.get("/recommend/")
    -async def read_item(user_id: str):
    -    return {"user_id": user_id}
    + +async def getRecsForUser(userID: str): + prism = Prisma() + await prism.connect() + + # get a set of modules the user has reviewed + user_reviews = await prism.modulefeedback.find_many(where={'student': {'id': userID}}) + + reviews = list(map(lambda x: x.dict(), user_reviews)) + + # all the reviews by the user + df = pd.DataFrame(reviews) + + all_reviews = await prism.modulefeedback.find_many() + + all_reviews = list(map(lambda x: x.dict(), all_reviews)) + + df_all_reviews = pd.DataFrame(all_reviews) + + # get the weighted average of the reviews per module + df['weighted_rating'] = df['rating'] * df_all_reviews['rating'].mean() + + df.drop(columns=['module', 'student'], inplace=True) + print(df.info()) + print(df.head()) + + + +async def main(): + await getRecsForUser(userID='63f3b1cb9422322eb675292f') + pass + + +if __name__ == '__main__': + asyncio.run(main())
    @@ -44,8 +80,43 @@

    Module emse-mms.endpoints.recommend

    Functions

    -
    -async def read_item(user_id: str) +
    +async def getRecsForUser(userID: str) +
    +
    +
    +
    + +Expand source code + +
    async def getRecsForUser(userID: str):
    +    prism = Prisma()
    +    await prism.connect()
    +
    +    # get a set of modules the user has reviewed
    +    user_reviews = await prism.modulefeedback.find_many(where={'student': {'id': userID}})
    +
    +    reviews = list(map(lambda x: x.dict(), user_reviews))
    +
    +    # all the reviews by the user
    +    df = pd.DataFrame(reviews)
    +
    +    all_reviews = await prism.modulefeedback.find_many()
    +
    +    all_reviews = list(map(lambda x: x.dict(), all_reviews))
    +
    +    df_all_reviews = pd.DataFrame(all_reviews)
    +
    +    # get the weighted average of the reviews per module
    +    df['weighted_rating'] = df['rating'] * df_all_reviews['rating'].mean()
    +
    +    df.drop(columns=['module', 'student'], inplace=True)
    +    print(df.info())
    +    print(df.head())
    +
    +
    +
    +async def main()
    @@ -53,9 +124,9 @@

    Functions

    Expand source code -
    @app.get("/recommend/")
    -async def read_item(user_id: str):
    -    return {"user_id": user_id}
    +
    async def main():
    +    await getRecsForUser(userID='63f3b1cb9422322eb675292f')
    +    pass
    @@ -71,12 +142,13 @@

    Index

    diff --git a/html/emse-mms/utils/db.html b/docs/emse-mms/utils/db.html similarity index 100% rename from html/emse-mms/utils/db.html rename to docs/emse-mms/utils/db.html diff --git a/docs/emse-mms/utils/fetch.html b/docs/emse-mms/utils/fetch.html new file mode 100644 index 0000000..8f08edc --- /dev/null +++ b/docs/emse-mms/utils/fetch.html @@ -0,0 +1,449 @@ + + + + + + +emse-mms.utils.fetch API documentation + + + + + + + + + + + +
    +
    +
    +

    Module emse-mms.utils.fetch

    +
    +
    +
    + +Expand source code + +
    import asyncio
    +import logging
    +from typing import Union
    +import pandas as pd
    +from prisma import Prisma
    +
    +
    +class Fetcher:
    +    def __init__(self, url: str):
    +        self.url = url
    +        self.response = None
    +        self.prisma = Prisma()
    +
    +        self.logger = logging.getLogger('__fetch__')
    +        logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)
    +
    +    def _createModuleMutation(self):
    +        self.response = """query{
    +                          module(input:{}){
    +                            id
    +                            moduleName
    +                            moduleNumber
    +                          }
    +                        }"""
    +        return self.response
    +
    +    def _createModuleFeedbackMutation(self):
    +        self.response = """query{
    +                          moduleFeedback(input:{}){
    +                            id
    +                            rating
    +                            feedback
    +                            student{
    +                              id
    +                            }
    +                            module{
    +                              id
    +                            }
    +                          }
    +                        }"""
    +        return self.response
    +
    +    async def _getModules(self):
    +        """
    +        Gets all modules from the database and returns them as a list of dictionaries.
    +        :return: class<list> of class<dict> of type<Module>
    +        """
    +        await self.prisma.connect()
    +        self.modules = await self.prisma.module.find_many()
    +
    +        mods = []
    +
    +        for i in range(len(self.modules)):
    +            mods.append(self.modules[i].dict())
    +
    +        await self.prisma.disconnect()
    +
    +        return mods
    +
    +    async def getModules(self):
    +        self.logger.info('Fetching modules...')
    +
    +        res = await self._getModules()
    +
    +        if res:
    +            self.logger.info('Modules successfully fetched')
    +            df = pd.DataFrame(res)
    +            print(df.info())
    +        else:
    +            self.logger.error('Failed to fetch modules')
    +            return None
    +
    +        return self.response
    +
    +    async def _getModuleFeedback(self):
    +        """
    +        Gets all feedbacks from the database and returns them as a list of dictionaries.
    +        :return: class<list> of class<dict> of type<ModuleFeedback>
    +        """
    +        await self.prisma.connect()
    +        self.feedbacks = await self.prisma.modulefeedback.find_many()
    +
    +        feedbacks = []
    +
    +        for i in range(len(self.feedbacks)):
    +            feedbacks.append(self.feedbacks[i].dict())
    +        await self.prisma.disconnect()
    +        return feedbacks
    +
    +    async def getModuleFeedback(self):
    +        self.logger.info('Fetching module feedback...')
    +
    +        res = await self._getModuleFeedback()
    +
    +        if res:
    +            self.logger.info('Module feedback successfully fetched')
    +            df = pd.DataFrame(res)
    +            print(df.info())
    +        else:
    +            self.logger.error('Failed to fetch module feedback')
    +            return None
    +
    +        return self.response
    +
    +    def convertJSONasDataFrame(self, model: str):
    +        df = pd.DataFrame(self.response['data'][model])
    +        return df
    +
    +    def convertObjectTOColumn(self, model: str, column: Union[str, list, None]):
    +        df = self.convertJSONasDataFrame(model)
    +        if isinstance(column, list):
    +            for col in column:
    +                conv = df[col].apply(lambda x: x['id'] if x else None)
    +                df[col] = conv
    +            print(df.info())
    +        elif isinstance(column, str):
    +            conv = df[column].apply(lambda x: x['id'] if x else None)
    +            df[column] = conv
    +            print(df.head())
    +        else:
    +            print(df.head())
    +
    +
    +async def main():
    +    fetcher = Fetcher('http://localhost:4000/graphql')
    +    await fetcher.getModules()
    +    # fetcher.convertObjectTOColumn('module', None)
    +    await fetcher.getModuleFeedback()
    +    # fetcher.convertObjectTOColumn('moduleFeedback', ['module', 'student'])
    +
    +
    +if __name__ == '__main__':
    +    asyncio.run(main())
    +
    +
    +
    +
    +
    +
    +
    +

    Functions

    +
    +
    +async def main() +
    +
    +
    +
    + +Expand source code + +
    async def main():
    +    fetcher = Fetcher('http://localhost:4000/graphql')
    +    await fetcher.getModules()
    +    # fetcher.convertObjectTOColumn('module', None)
    +    await fetcher.getModuleFeedback()
    +    # fetcher.convertObjectTOColumn('moduleFeedback', ['module', 'student'])
    +
    +
    +
    +
    +
    +

    Classes

    +
    +
    +class Fetcher +(url: str) +
    +
    +
    +
    + +Expand source code + +
    class Fetcher:
    +    def __init__(self, url: str):
    +        self.url = url
    +        self.response = None
    +        self.prisma = Prisma()
    +
    +        self.logger = logging.getLogger('__fetch__')
    +        logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)
    +
    +    def _createModuleMutation(self):
    +        self.response = """query{
    +                          module(input:{}){
    +                            id
    +                            moduleName
    +                            moduleNumber
    +                          }
    +                        }"""
    +        return self.response
    +
    +    def _createModuleFeedbackMutation(self):
    +        self.response = """query{
    +                          moduleFeedback(input:{}){
    +                            id
    +                            rating
    +                            feedback
    +                            student{
    +                              id
    +                            }
    +                            module{
    +                              id
    +                            }
    +                          }
    +                        }"""
    +        return self.response
    +
    +    async def _getModules(self):
    +        """
    +        Gets all modules from the database and returns them as a list of dictionaries.
    +        :return: class<list> of class<dict> of type<Module>
    +        """
    +        await self.prisma.connect()
    +        self.modules = await self.prisma.module.find_many()
    +
    +        mods = []
    +
    +        for i in range(len(self.modules)):
    +            mods.append(self.modules[i].dict())
    +
    +        await self.prisma.disconnect()
    +
    +        return mods
    +
    +    async def getModules(self):
    +        self.logger.info('Fetching modules...')
    +
    +        res = await self._getModules()
    +
    +        if res:
    +            self.logger.info('Modules successfully fetched')
    +            df = pd.DataFrame(res)
    +            print(df.info())
    +        else:
    +            self.logger.error('Failed to fetch modules')
    +            return None
    +
    +        return self.response
    +
    +    async def _getModuleFeedback(self):
    +        """
    +        Gets all feedbacks from the database and returns them as a list of dictionaries.
    +        :return: class<list> of class<dict> of type<ModuleFeedback>
    +        """
    +        await self.prisma.connect()
    +        self.feedbacks = await self.prisma.modulefeedback.find_many()
    +
    +        feedbacks = []
    +
    +        for i in range(len(self.feedbacks)):
    +            feedbacks.append(self.feedbacks[i].dict())
    +        await self.prisma.disconnect()
    +        return feedbacks
    +
    +    async def getModuleFeedback(self):
    +        self.logger.info('Fetching module feedback...')
    +
    +        res = await self._getModuleFeedback()
    +
    +        if res:
    +            self.logger.info('Module feedback successfully fetched')
    +            df = pd.DataFrame(res)
    +            print(df.info())
    +        else:
    +            self.logger.error('Failed to fetch module feedback')
    +            return None
    +
    +        return self.response
    +
    +    def convertJSONasDataFrame(self, model: str):
    +        df = pd.DataFrame(self.response['data'][model])
    +        return df
    +
    +    def convertObjectTOColumn(self, model: str, column: Union[str, list, None]):
    +        df = self.convertJSONasDataFrame(model)
    +        if isinstance(column, list):
    +            for col in column:
    +                conv = df[col].apply(lambda x: x['id'] if x else None)
    +                df[col] = conv
    +            print(df.info())
    +        elif isinstance(column, str):
    +            conv = df[column].apply(lambda x: x['id'] if x else None)
    +            df[column] = conv
    +            print(df.head())
    +        else:
    +            print(df.head())
    +
    +

    Methods

    +
    +
    +def convertJSONasDataFrame(self, model: str) +
    +
    +
    +
    + +Expand source code + +
    def convertJSONasDataFrame(self, model: str):
    +    df = pd.DataFrame(self.response['data'][model])
    +    return df
    +
    +
    +
    +def convertObjectTOColumn(self, model: str, column: Union[str, list, ForwardRef(None)]) +
    +
    +
    +
    + +Expand source code + +
    def convertObjectTOColumn(self, model: str, column: Union[str, list, None]):
    +    df = self.convertJSONasDataFrame(model)
    +    if isinstance(column, list):
    +        for col in column:
    +            conv = df[col].apply(lambda x: x['id'] if x else None)
    +            df[col] = conv
    +        print(df.info())
    +    elif isinstance(column, str):
    +        conv = df[column].apply(lambda x: x['id'] if x else None)
    +        df[column] = conv
    +        print(df.head())
    +    else:
    +        print(df.head())
    +
    +
    +
    +async def getModuleFeedback(self) +
    +
    +
    +
    + +Expand source code + +
    async def getModuleFeedback(self):
    +    self.logger.info('Fetching module feedback...')
    +
    +    res = await self._getModuleFeedback()
    +
    +    if res:
    +        self.logger.info('Module feedback successfully fetched')
    +        df = pd.DataFrame(res)
    +        print(df.info())
    +    else:
    +        self.logger.error('Failed to fetch module feedback')
    +        return None
    +
    +    return self.response
    +
    +
    +
    +async def getModules(self) +
    +
    +
    +
    + +Expand source code + +
    async def getModules(self):
    +    self.logger.info('Fetching modules...')
    +
    +    res = await self._getModules()
    +
    +    if res:
    +        self.logger.info('Modules successfully fetched')
    +        df = pd.DataFrame(res)
    +        print(df.info())
    +    else:
    +        self.logger.error('Failed to fetch modules')
    +        return None
    +
    +    return self.response
    +
    +
    +
    +
    +
    +
    +
    + +
    + + + \ No newline at end of file diff --git a/html/emse-mms/utils/helper.html b/docs/emse-mms/utils/helper.html similarity index 100% rename from html/emse-mms/utils/helper.html rename to docs/emse-mms/utils/helper.html diff --git a/html/emse-mms/utils/index.html b/docs/emse-mms/utils/index.html similarity index 97% rename from html/emse-mms/utils/index.html rename to docs/emse-mms/utils/index.html index 27b80f5..cd7e4ee 100644 --- a/html/emse-mms/utils/index.html +++ b/docs/emse-mms/utils/index.html @@ -30,6 +30,10 @@

    Sub-modules

    +
    emse-mms.utils.fetch
    +
    +
    +
    emse-mms.utils.helper
    @@ -77,6 +81,7 @@

    Index

  • Sub-modules

    • emse-mms.utils.db
    • +
    • emse-mms.utils.fetch
    • emse-mms.utils.helper
    • emse-mms.utils.recs
    • emse-mms.utils.response
    • diff --git a/html/emse-mms/utils/recs.html b/docs/emse-mms/utils/recs.html similarity index 100% rename from html/emse-mms/utils/recs.html rename to docs/emse-mms/utils/recs.html diff --git a/html/emse-mms/utils/response.html b/docs/emse-mms/utils/response.html similarity index 100% rename from html/emse-mms/utils/response.html rename to docs/emse-mms/utils/response.html diff --git a/html/emse-mms/utils/sdl.html b/docs/emse-mms/utils/sdl.html similarity index 100% rename from html/emse-mms/utils/sdl.html rename to docs/emse-mms/utils/sdl.html diff --git a/html/emse-mms/utils/seed.html b/docs/emse-mms/utils/seed.html similarity index 97% rename from html/emse-mms/utils/seed.html rename to docs/emse-mms/utils/seed.html index d972745..6293c90 100644 --- a/html/emse-mms/utils/seed.html +++ b/docs/emse-mms/utils/seed.html @@ -28,6 +28,8 @@

      Module emse-mms.utils.seed

      from enum import Enum
       import os
      +from typing import Union
      +
       from essential_generators import DocumentGenerator
       from numpy import random
       from prisma import Prisma
      @@ -88,28 +90,6 @@ 

      Module emse-mms.utils.seed

      print('Plan of study model seeded successfully!') -# async def seedEnrollmentDB(): -# print('Seeding enrollment model...') -# gen = DocumentGenerator() -# prisma = Prisma() -# await prisma.connect() -# iterations = 25 -# -# modules = await prisma.module.find_many() -# -# for account in accounts: -# # create enrollment for each module 25 times -# for module in modules: -# # create enrollment for each module 25 times -# await prisma.moduleenrollment.create( -# data={ -# "role": gen.word(), -# } -# ) -# -# print('Enrollment model seeded successfully!') - - def seedDbFeedback(): print('Seeding feedback model...') gen = DocumentGenerator() @@ -202,10 +182,12 @@

      Module emse-mms.utils.seed

      database and the essential_generators library to create document templates. """ - def __init__(self, skip: [Skipper] = None, cleanup: [Skipper] = None): + def __init__(self, skip: [Skipper] = None, cleanup: [Skipper] = None, iterations: int = 25): self.gen = DocumentGenerator() + self.gen.init_word_cache(5000) + self.gen.init_sentence_cache(5000) self.prisma = Prisma() - self.iterations = 25 + self.iterations = iterations self.accounts = [] self.modules = [] self.enrollments = [] @@ -325,16 +307,16 @@

      Module emse-mms.utils.seed

      template = { 'moduleName': { - 'typemap': 'word', + 'typemap': 'sentence', 'unique': True, 'tries': 100 }, 'moduleNumber': { - 'typemap': 'small_int', + 'typemap': 'integer', 'unique': True, 'tries': 100 }, - 'description': 'sentence', + 'description': 'paragraph', 'duration': 'small_int', 'intro': 'sentence', 'numSlides': 'small_int', @@ -480,31 +462,36 @@

      Module emse-mms.utils.seed

      self.logger.info('Skipping all model cleanup...') return else: + # Deleting all users if Skipper.user in self.cleanup: self.logger.info('Skipping user model cleanup...') else: await self._cleanupUserDB() - if Skipper.module in self.cleanup: - self.logger.info('Skipping module model cleanup...') - else: - await self._cleanupModuleDB() - - if Skipper.feedback in self.cleanup: - self.logger.info('Skipping feedback model cleanup...') - else: - await self._cleanupFeedbackDB() - + # Deleting all plans of study if Skipper.plan in self.cleanup: self.logger.info('Skipping plan of study model cleanup...') else: await self._cleanupPlanOfStudyDB() + # Deleting all enrollments if Skipper.enrollment in self.cleanup: self.logger.info('Skipping enrollment model cleanup...') else: await self._cleanupEnrollmentDB() + # Deleting all modules + if Skipper.module in self.cleanup: + self.logger.info('Skipping module model cleanup...') + else: + await self._cleanupModuleDB() + + # Deleting all module feedback + if Skipper.feedback in self.cleanup: + self.logger.info('Skipping feedback model cleanup...') + else: + await self._cleanupFeedbackDB() + self.logger.info('All models cleaned up successfully!') await self.disconnect() @@ -726,7 +713,7 @@

      Classes

      class Seeder -(skip: [Skipper'>] = None, cleanup: [Skipper'>] = None) +(skip: [Skipper'>] = None, cleanup: [Skipper'>] = None, iterations: int = 25)

      Seeder class for seeding the database with dummy data. Makes use of the prisma python client to connection to the @@ -741,10 +728,12 @@

      Classes

      database and the essential_generators library to create document templates. """ - def __init__(self, skip: [Skipper] = None, cleanup: [Skipper] = None): + def __init__(self, skip: [Skipper] = None, cleanup: [Skipper] = None, iterations: int = 25): self.gen = DocumentGenerator() + self.gen.init_word_cache(5000) + self.gen.init_sentence_cache(5000) self.prisma = Prisma() - self.iterations = 25 + self.iterations = iterations self.accounts = [] self.modules = [] self.enrollments = [] @@ -864,16 +853,16 @@

      Classes

      template = { 'moduleName': { - 'typemap': 'word', + 'typemap': 'sentence', 'unique': True, 'tries': 100 }, 'moduleNumber': { - 'typemap': 'small_int', + 'typemap': 'integer', 'unique': True, 'tries': 100 }, - 'description': 'sentence', + 'description': 'paragraph', 'duration': 'small_int', 'intro': 'sentence', 'numSlides': 'small_int', @@ -1019,31 +1008,36 @@

      Classes

      self.logger.info('Skipping all model cleanup...') return else: + # Deleting all users if Skipper.user in self.cleanup: self.logger.info('Skipping user model cleanup...') else: await self._cleanupUserDB() - if Skipper.module in self.cleanup: - self.logger.info('Skipping module model cleanup...') - else: - await self._cleanupModuleDB() - - if Skipper.feedback in self.cleanup: - self.logger.info('Skipping feedback model cleanup...') - else: - await self._cleanupFeedbackDB() - + # Deleting all plans of study if Skipper.plan in self.cleanup: self.logger.info('Skipping plan of study model cleanup...') else: await self._cleanupPlanOfStudyDB() + # Deleting all enrollments if Skipper.enrollment in self.cleanup: self.logger.info('Skipping enrollment model cleanup...') else: await self._cleanupEnrollmentDB() + # Deleting all modules + if Skipper.module in self.cleanup: + self.logger.info('Skipping module model cleanup...') + else: + await self._cleanupModuleDB() + + # Deleting all module feedback + if Skipper.feedback in self.cleanup: + self.logger.info('Skipping feedback model cleanup...') + else: + await self._cleanupFeedbackDB() + self.logger.info('All models cleaned up successfully!') await self.disconnect() @@ -1095,31 +1089,36 @@

      Methods

      self.logger.info('Skipping all model cleanup...') return else: + # Deleting all users if Skipper.user in self.cleanup: self.logger.info('Skipping user model cleanup...') else: await self._cleanupUserDB() - if Skipper.module in self.cleanup: - self.logger.info('Skipping module model cleanup...') - else: - await self._cleanupModuleDB() - - if Skipper.feedback in self.cleanup: - self.logger.info('Skipping feedback model cleanup...') - else: - await self._cleanupFeedbackDB() - + # Deleting all plans of study if Skipper.plan in self.cleanup: self.logger.info('Skipping plan of study model cleanup...') else: await self._cleanupPlanOfStudyDB() + # Deleting all enrollments if Skipper.enrollment in self.cleanup: self.logger.info('Skipping enrollment model cleanup...') else: await self._cleanupEnrollmentDB() + # Deleting all modules + if Skipper.module in self.cleanup: + self.logger.info('Skipping module model cleanup...') + else: + await self._cleanupModuleDB() + + # Deleting all module feedback + if Skipper.feedback in self.cleanup: + self.logger.info('Skipping feedback model cleanup...') + else: + await self._cleanupFeedbackDB() + self.logger.info('All models cleaned up successfully!') await self.disconnect()
      diff --git a/html/emse-mms/utils/similarity.html b/docs/emse-mms/utils/similarity.html similarity index 100% rename from html/emse-mms/utils/similarity.html rename to docs/emse-mms/utils/similarity.html diff --git a/html/emse-mms/endpoints/index.html b/html/emse-mms/endpoints/index.html deleted file mode 100644 index a0d1823..0000000 --- a/html/emse-mms/endpoints/index.html +++ /dev/null @@ -1,65 +0,0 @@ - - - - - - -emse-mms.endpoints API documentation - - - - - - - - - - - -
      - - -
      - - - \ No newline at end of file From f89dc8b5e50dc862cc08c732a4bbb181ac2e0da6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1niel=20B=2E=20Papp?= Date: Thu, 23 Feb 2023 11:42:46 -0500 Subject: [PATCH 02/17] fix(deps): Added pdoc to dependency list --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index bee20cd..4cc0ea5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,5 @@ pandas==1.4.4 scikit-learn==1.0.2 prisma==0.8.1 sklearn~=0.0.post1 -fastapi~=0.92.0 \ No newline at end of file +fastapi~=0.92.0 +pdoc3~=0.10.0 \ No newline at end of file From 2c0d4f8e190650c0742205dcb7f75d3e52287438 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1niel=20B=2E=20Papp?= Date: Thu, 23 Feb 2023 11:47:55 -0500 Subject: [PATCH 03/17] fix(docs): Updated steps to generate in CI/CD --- .github/workflows/docs.yml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 2addde6..a400b14 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -14,7 +14,14 @@ jobs: with: python-version: '3.9.13' cache: 'pip' # caching pip dependencies - - run: pip install -r requirements.txt + - name: Install dependencies + run: pip install -r requirements.txt + - name: Set env variables + run: | + echo "DATABASE_URL=${{secrets.DATABASE_URL}}" > .env + echo "DIRECT_URL=${{secrets.DIRECT_URL}}" > .env + - name: Generate Prisma types + run: prisma generate - name: Generate docs run: pdoc --html --output-dir docs --force . - name: Deploy docs From 6c71a0588b0abf4ddfc95fbc3d6215ecebace737 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1niel=20B=2E=20Papp?= Date: Thu, 23 Feb 2023 11:53:19 -0500 Subject: [PATCH 04/17] fix(docs): Updated output path --- .github/workflows/docs.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index a400b14..6c51cc1 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -27,4 +27,4 @@ jobs: - name: Deploy docs uses: JamesIves/github-pages-deploy-action@v4 with: - folder: docs \ No newline at end of file + folder: docs/emse-mms \ No newline at end of file From 37a1ebee43a567f407a0dfaf3cd875d114144832 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1niel=20B=2E=20Papp?= Date: Thu, 23 Feb 2023 14:36:23 -0500 Subject: [PATCH 05/17] chore(seed): Removed unused code --- utils/seed.py | 127 +++++++------------------------------------------- 1 file changed, 16 insertions(+), 111 deletions(-) diff --git a/utils/seed.py b/utils/seed.py index 2a00070..db69be7 100644 --- a/utils/seed.py +++ b/utils/seed.py @@ -7,116 +7,6 @@ from prisma import Prisma import logging from requests import post -from utils.sdl import createMutationString, createModuleMutationString - - -async def seedUserDB(): - print('Seeding user model...') - prisma = Prisma() - await prisma.connect() - gen = DocumentGenerator() - iterations = 25 - accounts = [] - - for i in range(iterations): - accounts.append({ - 'firstName': gen.name(), - 'lastName': gen.name(), - 'email': gen.email(), - 'openID': str(gen.integer()), - }) - - users = await prisma.user.create_many(data=accounts) - - print('User model seeded successfully with %d documents!' % users) - await prisma.disconnect() - - -async def seedPlanOfStudyDB(): - print('Seeding plan of study model...') - gen = DocumentGenerator() - prisma = Prisma() - await prisma.connect() - - modules = await prisma.module.find_many() - accounts = await prisma.user.find_many() - - for account in accounts: - for module in modules: - # create plan of study for each module 25 times - await prisma.planofstudy.create( - data={ - "modules": { - "connect": { - "id": module.get('id') - } - }, - "student": { - "connect": { - "id": account['id'] - } - } - } - ) - - print('Plan of study model seeded successfully!') - - -def seedDbFeedback(): - print('Seeding feedback model...') - gen = DocumentGenerator() - iterations = 25 - modules = getModules()['module'] - for module in modules: - # create feedback for each module 25 times - for i in range(iterations): - res = post('http://%s/graphql' % os.environ.get("API_URL", "localhost:4000"), - {}, - {'query': createMutationString( - comment=gen.sentence(), - rating=random.randint(1, 6), - moduleID=module['id'], - ) - }) - print(res.json()) - - print('Feedback model seeded successfully!') - - -def seedModuleModel(): - print('Seeding module model...') - gen = DocumentGenerator() - iterations = 25 - for i in range(iterations): - key_length = random.randint(1, 11) - res = post('http://%s/graphql' % os.environ.get("API_URL", "localhost:4000"), - {}, - {'query': createModuleMutationString( - moduleName=gen.word(), - moduleNumber=random.randint(1, 1000), - description=gen.sentence(), - duration=random.randint(1, 100), - intro=gen.sentence(), - numSlides=random.randint(1, 100), - keywords=[gen.word() for i in range(key_length)] - ) - }) - print(res.json()) - - print('Module model seeded successfully!') - - -def getModules(): - mods = post('http://%s/graphql' % os.environ.get("API_URL", "localhost:4000"), {}, { - 'query': """query{ - module(input:{}){ - id - moduleName - moduleNumber - } - }""" - }) - return mods.json()['data'] def getModuleFeedback(): @@ -154,7 +44,7 @@ class Seeder: database and the essential_generators library to create document templates. """ - def __init__(self, skip: [Skipper] = None, cleanup: [Skipper] = None, iterations: int = 25): + def __init__(self, skip: [Skipper] = None, cleanup: [Skipper] = None, iterations: int = 25, target: Union[str, None] = None): self.gen = DocumentGenerator() self.gen.init_word_cache(5000) self.gen.init_sentence_cache(5000) @@ -167,6 +57,7 @@ def __init__(self, skip: [Skipper] = None, cleanup: [Skipper] = None, iterations self.plans = [] self.skip = skip self.cleanup = cleanup + self.target = target self.logger = logging.getLogger('__seed__') logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO) @@ -182,6 +73,20 @@ async def disconnect(self): """ await self.prisma.disconnect() + async def createTargetUser(self): + """ + Creates a target user for testing purposes. + """ + account = await self.prisma.user.create(data={ + 'firstName': 'Test User', + 'lastName': 'Test User', + 'email': '', + 'openID': '' + }) + + self.logger.info('Created target user: %s' % account.id) + return account.id + async def _getUserAccounts(self): """ Gets all user accounts from the database and returns them as a list of dictionaries. From d6db68d049c23a358dfb297df828e4e7979bd2d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1niel=20B=2E=20Papp?= Date: Thu, 23 Feb 2023 14:37:06 -0500 Subject: [PATCH 06/17] chore(app): Removed unused code --- app.py | 35 ++++------------------------------- 1 file changed, 4 insertions(+), 31 deletions(-) diff --git a/app.py b/app.py index c532998..d9e9c58 100644 --- a/app.py +++ b/app.py @@ -1,52 +1,25 @@ -from pandas import DataFrame import asyncio from prisma import Prisma from utils.db import testConnection -from utils.response import getModuleFromID -from utils.seed import getModuleFeedback, Seeder, Skipper -from utils.helper import convertReviewsFromDbToDf - +from utils.seed import Seeder, Skipper async def main() -> None: print('Starting application...') testConnection() - # await seedUserDB() - # seedModuleModel() - # await seedPlanOfStudyDB() - # await seedEnrollmentDB() db = Seeder( skip=[Skipper.all], cleanup=[Skipper.all], - iterations=100 + iterations=100, ) await db.seedAll() await db.cleanupAll() - await getUserProfile(ID="63da9e40020a625cc55f64c5") + # targetID = await db.createTargetUser() + await getUserProfile(ID='63f7a3068b546b91eadb20a6') exit(0) - # seedModuleModel() - # seedDbFeedback() - - -def getReviews(userID): - # read data - mod_data = getModuleFeedback() - df = convertReviewsFromDbToDf(mod_data['module'], userID) - - # print(df) - # print(df.groupby(['userID', 'moduleID']).sum().sort_values('rating', ascending=False).head()) - # print(df.groupby('moduleID')['rating'].sum().sort_values(ascending=False).head()) - - # get highest rated modules - top_mods: DataFrame = df.groupby('moduleID')['rating'].sum().sort_values(ascending=False) - - # run response for each row of the highest rated modules - print(top_mods) - res_top_mods = top_mods.reset_index() - res_top_mods.apply(lambda row: getModuleFromID(row), axis=1) async def getUserProfile(ID): From 6630227ed76ece5cae6e4ad77404f8318e552032 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1niel=20B=2E=20Papp?= Date: Thu, 23 Feb 2023 14:37:28 -0500 Subject: [PATCH 07/17] feat(api): Started serving recommendation model over HTTP --- api.py | 8 ++-- recommend.py | 110 ++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 84 insertions(+), 34 deletions(-) diff --git a/api.py b/api.py index 9f8b79c..101e0c7 100644 --- a/api.py +++ b/api.py @@ -1,12 +1,14 @@ from fastapi import FastAPI from app import getUserProfile +from recommend import Recommender app = FastAPI() @app.get("/recommend/") async def read_item(userID: str): - account = await getUserProfile(userID) - print(account) - return {"user": account} + rec = Recommender(target=userID) + await rec.recommend() + # account = await getUserProfile(userID) + return {"user": userID} diff --git a/recommend.py b/recommend.py index 354b01f..e9d4fb2 100644 --- a/recommend.py +++ b/recommend.py @@ -3,43 +3,91 @@ # multiply the rating by the weight of the module # divide the sum of the weighted ratings by the sum of the ratings import asyncio +import random import pandas as pd from prisma import Prisma -async def getRecsForUser(userID: str): - prism = Prisma() - await prism.connect() - - # get a set of modules the user has reviewed - user_reviews = await prism.modulefeedback.find_many(where={'student': {'id': userID}}) - - reviews = list(map(lambda x: x.dict(), user_reviews)) - - # all the reviews by the user - df = pd.DataFrame(reviews) - - all_reviews = await prism.modulefeedback.find_many() - - all_reviews = list(map(lambda x: x.dict(), all_reviews)) - - df_all_reviews = pd.DataFrame(all_reviews) - - # get the weighted average of the reviews per module - df['weighted_rating'] = df['rating'] * df_all_reviews['rating'].mean() - - df.drop(columns=['module', 'student'], inplace=True) - print(df.info()) - print(df.head()) - - - -async def main(): - await getRecsForUser(userID='63f3b1cb9422322eb675292f') - pass +async def main(target: str): + rec = Recommender(target) + await rec.recommend() + + +class Recommender: + def __init__(self, target: str): + self.target = target + self.sample = [] + self.modules = [] + self.recs = [] + + async def _sampleModules(self, skip: bool = True): + prisma = Prisma() + await prisma.connect() + + modules = await prisma.module.find_many(include={'feedback': True}) + + modules = list(map(lambda x: x.dict(), modules)) + + rand = random.sample(modules, len(modules) // 2) + + # create module feedback for sample modules for target user + if not skip: + await self._seedTargetFeedback(rand) + + await prisma.disconnect() + + self.sample = rand + + async def _seedTargetFeedback(self, sample: list): + prisma = Prisma() + await prisma.connect() + for module in sample: + await prisma.modulefeedback.create( + data={ + 'feedback': 'This is a sample review', + 'rating': random.randint(1, 5), + 'module': { + 'connect': { + 'id': module + } + }, + 'student': { + 'connect': { + 'id': self.target + } + } + } + ) + print(f'Created feedback for module {module}') + + await prisma.disconnect() + + async def recommend(self): + """ + At this point our target user has reviewed half of the modules in the database. + We have access to the modules that the user has reviewed through the modules parameter, and now + we can now get recommendations for the user + """ + prism = Prisma() + await prism.connect() + await self._sampleModules(skip=True) + + df = pd.DataFrame(self.sample) + + # convert our review data into a user x module matrix + # find cosine similarity between the target user and all other users + # get the top 5 users with the highest similarity + # get the modules that the top 5 users have reviewed + # get the modules that the target user has not reviewed + # get the modules that the top 5 users have reviewed that the target user has not reviewed + # get the average rating for each module + # get the top 5 modules with the highest average rating + # return the top 5 modules with the highest average rating as recommendations + + await prism.disconnect() if __name__ == '__main__': - asyncio.run(main()) + asyncio.run(main(target="63f7a3068b546b91eadb20a6")) From 7cdcc9f71e7a4f61c0b022da5e90feb7bc75a4da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1niel=20B=2E=20Papp?= Date: Thu, 23 Feb 2023 14:38:50 -0500 Subject: [PATCH 08/17] chore(vsc): Removed python cache from tracked directories --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index e608132..c630a79 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ input/ .DS_Store /.env +__pycache__/ \ No newline at end of file From d9191702784459522a0971d83c86b71dfdd9e66e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1niel=20B=2E=20Papp?= Date: Tue, 28 Feb 2023 12:35:52 -0500 Subject: [PATCH 09/17] feat(api): Started serving API through Docker --- docker-compose.yml | 47 +++++++++++++++------------------------------- requirements.txt | 3 ++- 2 files changed, 17 insertions(+), 33 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 0867ec5..5bc85d7 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,42 +1,26 @@ version: "3.8" services: -# db: -# image: mongo:latest -# container_name: db -# environment: -# MONGO_INITDB_ROOT_USERNAME: root -# MONGO_INITDB_ROOT_PASSWORD: example -# ports: -# - "27017:27017" -# - '28017:28017' -# networks: -# - mmsNetwork -# command: > -# bash -c "mongod --replSet rs0 --bind_ip localhost,db" -# -# mongo-express: -# depends_on: -# - db -# image: mongo-express -# ports: -# - "8081:8081" -# environment: -# ME_CONFIG_MONGODB_ADMINUSERNAME: root -# ME_CONFIG_MONGODB_ADMINPASSWORD: example -# ME_CONFIG_MONGODB_URL: mongodb://root:example@db:27017/ -# ME_CONFIG_MONGODB_ENABLE_ADMIN: true -# networks: -# - mmsNetwork - app: - depends_on: - - db container_name: app build: context: . dockerfile: Dockerfile - + command: > + bash -c "prisma generate && python app.py" + networks: + - mmsNetwork + api: + container_name: api + build: + context: . + dockerfile: Dockerfile + ports: + - "8000:8000" + command: > + bash -c "prisma generate && python3 -m uvicorn api:app --reload --host 0.0.0.0" + networks: + - mmsNetwork client: depends_on: - redis @@ -46,7 +30,6 @@ services: - .env ports: - "4000:4000" - - "5555:5555" command: > bash -c "yarn && yarn generate && yarn dev" networks: diff --git a/requirements.txt b/requirements.txt index 4cc0ea5..712b82d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,4 +8,5 @@ scikit-learn==1.0.2 prisma==0.8.1 sklearn~=0.0.post1 fastapi~=0.92.0 -pdoc3~=0.10.0 \ No newline at end of file +pdoc3~=0.10.0 +uvicorn~=0.20.0 \ No newline at end of file From 7a1b42986c88b1ed5746987a66836bfd01c9ab4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1niel=20B=2E=20Papp?= Date: Tue, 28 Feb 2023 12:37:12 -0500 Subject: [PATCH 10/17] fix(build): Changed Dockerfile steps and removed run command --- Dockerfile | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/Dockerfile b/Dockerfile index b54f401..eab2e80 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,12 +1,11 @@ -FROM python:3.9.13 +FROM python:3.9.13 as base -RUN apt-get update +RUN apt-get -y update WORKDIR /usr/src/app COPY requirements.txt ./ -RUN pip install --no-cache-dir -r requirements.txt -COPY . . +RUN pip install --no-cache-dir -r requirements.txt -CMD [ "python", "./app.py" ] \ No newline at end of file +COPY . . \ No newline at end of file From 9fbc781e4848c7a188a5e9ab83205f7a26a072fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1niel=20B=2E=20Papp?= Date: Tue, 28 Feb 2023 12:38:06 -0500 Subject: [PATCH 11/17] chore(app): Cleaned up entry file --- app.py | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/app.py b/app.py index d9e9c58..8869e1e 100644 --- a/app.py +++ b/app.py @@ -4,6 +4,7 @@ from utils.db import testConnection from utils.seed import Seeder, Skipper + async def main() -> None: print('Starting application...') testConnection() @@ -17,29 +18,9 @@ async def main() -> None: await db.seedAll() await db.cleanupAll() # targetID = await db.createTargetUser() - await getUserProfile(ID='63f7a3068b546b91eadb20a6') exit(0) -async def getUserProfile(ID): - print('Fetching user data...') - # get user from ID - # find enrolled modules - # remove modules that already enrolled in - # get feedback for each module - # get similarity matrix - # get recommendations - # return recommendations - prisma = Prisma() - await prisma.connect() - account = await prisma.user.find_unique( - where={ - 'id': ID - } - ) - print(account) - - if __name__ == '__main__': asyncio.run(main()) From 2ef8743fdc73c6e92eb275249533b22008d8b66c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1niel=20B=2E=20Papp?= Date: Tue, 28 Feb 2023 12:38:36 -0500 Subject: [PATCH 12/17] feat(rec): Converted movie rec system to recommender class --- api.py | 14 +++-- recommend.py | 174 ++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 182 insertions(+), 6 deletions(-) diff --git a/api.py b/api.py index 101e0c7..f8d03ee 100644 --- a/api.py +++ b/api.py @@ -1,14 +1,20 @@ from fastapi import FastAPI from app import getUserProfile -from recommend import Recommender +from recommend import Recommender, Recs app = FastAPI() @app.get("/recommend/") async def read_item(userID: str): - rec = Recommender(target=userID) - await rec.recommend() + # rec = Recommender(target=userID) + # await rec.recommend() # account = await getUserProfile(userID) - return {"user": userID} + + rec = Recs() + res = rec.run() + + print(res.json()) + + return {"user": userID, "data": res} diff --git a/recommend.py b/recommend.py index e9d4fb2..e891013 100644 --- a/recommend.py +++ b/recommend.py @@ -6,13 +6,16 @@ import random import pandas as pd +from math import sqrt from prisma import Prisma async def main(target: str): - rec = Recommender(target) - await rec.recommend() + # rec = Recommender(target) + # await rec.recommend() + rec = Recs() + rec.run() class Recommender: @@ -89,5 +92,172 @@ async def recommend(self): await prism.disconnect() +class Recs: + def __init__(self, target=None): + if target is None: + target = [ + { + 'title': 'Breakfast Club, The', + 'rating': 5 + }, + { + 'title': 'Toy Story', + 'rating': 3.5 + }, + { + 'title': 'Jumanji', + 'rating': 2 + }, + { + 'title': 'Pulp Fiction', + 'rating': 5 + }, + { + 'title': 'Akira', + 'rating': 4.5 + } + ] + self.inputMovies = pd.DataFrame(target) + self.movies_df = pd.read_csv('input/movies.csv') + self.ratings_df = pd.read_csv('input/ratings.csv') + self.userSubsetGroup = None + self.pearsonCorrelationDict = dict() + self.tempTopUsersRating = None + + def cleanData(self): + # Using regular expressions to find a year stored between parentheses + # We specify the parentheses, so we don’t conflict with movies that have years in their titles + + print(self.movies_df.head()) + + self.movies_df['year'] = self.movies_df.title.str.extract('(\(\d\d\d\d\))', expand=False) + + # Removing the parentheses + + self.movies_df['year'] = self.movies_df.year.str.extract('(\d\d\d\d)', expand=False) + + # Removing the years from the ‘title’ column + + self.movies_df['title'] = self.movies_df.title.str.replace('(\(\d\d\d\d\))', '') + # Applying the strip function to get rid of any ending whitespace characters that may have appeared + + self.movies_df['title'] = self.movies_df['title'].apply(lambda x: x.strip()) + + print(self.movies_df.head()) + + self.movies_df = self.movies_df.drop('genres', 1) + + def handleUserInput(self): + inputID = self.movies_df[self.movies_df['title'].isin(self.inputMovies['title'].tolist())] + + inputMovies = pd.merge(inputID, self.inputMovies) + + inputMovies = inputMovies.drop('year', 1) + + print(inputMovies) + self.inputMovies = inputMovies + + def createSubset(self): + userSubset = self.ratings_df[self.ratings_df['movieId'].isin(self.inputMovies['movieId'].tolist())] + + userSubset.head() + + userSubsetGroup = userSubset.groupby(['userId']) + + userSubsetGroup = sorted(userSubsetGroup, key=lambda x: len(x[1]), reverse=True) + + print(userSubsetGroup[0:3]) + + self.userSubsetGroup = userSubsetGroup + + def createSimilarityMatrix(self): + pearsonCorrelationDict = {} + + for name, group in self.userSubsetGroup: + group = group.sort_values(by='movieId') + + inputMovies = self.inputMovies.sort_values(by='movieId') + + nRatings = len(group) + + temp_df = inputMovies[inputMovies['movieId'].isin(group['movieId'].tolist())] + tempRatingList = temp_df['rating'].tolist() + tempGroupList = group['rating'].tolist() + + Sxx = sum([i ** 2 for i in tempRatingList]) - pow(sum(tempRatingList), 2) / float(nRatings) + Syy = sum([i ** 2 for i in tempGroupList]) - pow(sum(tempGroupList), 2) / float(nRatings) + Sxy = sum(i * j for i, j in zip(tempRatingList, tempGroupList)) - sum(tempRatingList) * sum( + tempGroupList) / float(nRatings) + + if Sxx != 0 and Syy != 0: + pearsonCorrelationDict[name] = Sxy / sqrt(Sxx * Syy) + else: + pearsonCorrelationDict[name] = 0 + + print(pearsonCorrelationDict.items()) + self.pearsonCorrelationDict = pearsonCorrelationDict + + def topUser(self): + pearsonDF = pd.DataFrame.from_dict(self.pearsonCorrelationDict, orient='index') + pearsonDF.columns = ['similarityIndex'] + pearsonDF['userId'] = pearsonDF.index + pearsonDF.index = range(len(pearsonDF)) + + print(pearsonDF.head()) + + topUsers = pearsonDF.sort_values(by='similarityIndex', ascending=False)[0:50] + + print(topUsers.head()) + + topUsersRating = topUsers.merge(self.ratings_df, left_on='userId', right_on='userId', how='inner') + + print(topUsersRating.head()) + + topUsersRating['weightedRating'] = topUsersRating['similarityIndex'] * topUsersRating['rating'] + + print(topUsersRating.head()) + + tempTopUsersRating = topUsersRating.groupby('movieId').sum()[['similarityIndex', 'weightedRating']] + + tempTopUsersRating.columns = ['sum_similarityIndex', 'sum_weightedRating'] + + print(tempTopUsersRating.head()) + + self.tempTopUsersRating = tempTopUsersRating + + def recommend(self): + recommendation_df = pd.DataFrame() + + recommendation_df['weighted average recommendation score'] = self.tempTopUsersRating['sum_weightedRating'] / \ + self.tempTopUsersRating['sum_similarityIndex'] + recommendation_df['movieId'] = self.tempTopUsersRating.index + + print(recommendation_df.head()) + + recommendation_df = recommendation_df.sort_values(by='weighted average recommendation score', ascending=False) + + print(recommendation_df.head(10)) + + movies_df = self.movies_df.loc[self.movies_df['movieId'].isin(recommendation_df.head(10)['movieId'].tolist())] + + print(movies_df.head(10)) + + self.movies_df = movies_df + + def convertResultToJSON(self): + movies = self.movies_df + + return movies.to_json(orient="records") + + def run(self): + self.cleanData() + self.handleUserInput() + self.createSubset() + self.createSimilarityMatrix() + self.topUser() + self.recommend() + return self.convertResultToJSON() + + if __name__ == '__main__': asyncio.run(main(target="63f7a3068b546b91eadb20a6")) From 112ceb3f6be1ddf60c5f768a03d03653d2c8c687 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1niel=20B=2E=20Papp?= Date: Tue, 28 Feb 2023 12:39:23 -0500 Subject: [PATCH 13/17] fix(api): Changed client API url to be Docker native --- utils/db.py | 2 +- utils/fetch.py | 2 +- utils/response.py | 2 +- utils/seed.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/utils/db.py b/utils/db.py index 3bb0e2e..14d1592 100644 --- a/utils/db.py +++ b/utils/db.py @@ -4,7 +4,7 @@ def testConnection(): print('Testing connection...') # test connection to API - response = post('http://localhost:4000/graphql', {}, { + response = post('http://client:4000/graphql', {}, { 'query': """query{ module(input:{ }){ diff --git a/utils/fetch.py b/utils/fetch.py index 04f1f2c..567daac 100644 --- a/utils/fetch.py +++ b/utils/fetch.py @@ -122,7 +122,7 @@ def convertObjectTOColumn(self, model: str, column: Union[str, list, None]): async def main(): - fetcher = Fetcher('http://localhost:4000/graphql') + fetcher = Fetcher('http://client:4000/graphql') await fetcher.getModules() # fetcher.convertObjectTOColumn('module', None) await fetcher.getModuleFeedback() diff --git a/utils/response.py b/utils/response.py index cbd8d60..51fced4 100644 --- a/utils/response.py +++ b/utils/response.py @@ -11,7 +11,7 @@ def getModuleFromID(modules_df): # append to array in json file print('Getting module from ID: %s' % modules_df['moduleID']) - res = post('http://%s/graphql' % os.environ.get("API_URL", "localhost:4000"), {}, { + res = post('http://%s/graphql' % os.environ.get("API_URL", "client:4000"), {}, { 'query': """query{ module(input:{ id: "%s" diff --git a/utils/seed.py b/utils/seed.py index db69be7..7f131b3 100644 --- a/utils/seed.py +++ b/utils/seed.py @@ -10,7 +10,7 @@ def getModuleFeedback(): - mods = post('http://%s/graphql' % os.environ.get("API_URL", "localhost:4000"), {}, { + mods = post('http://%s/graphql' % os.environ.get("API_URL", "client:4000"), {}, { 'query': """query{ module(input:{}){ id From 0cea71cec515d439bfb4b8d9457ce54b418df386 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1niel=20B=2E=20Papp?= Date: Tue, 28 Feb 2023 12:40:58 -0500 Subject: [PATCH 14/17] chore(docs): Fixed formatting issue --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1a701da..8627839 100644 --- a/README.md +++ b/README.md @@ -65,7 +65,7 @@ docker-compose up -d #### Get module recommendations for a user ```http - GET /recommend/${userID} +GET /recommend/${userID} ``` | Parameter | Type | Description | From da8fab67c3dd1fe965ab45f1ca843ae07348e5fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1niel=20B=2E=20Papp?= Date: Tue, 28 Feb 2023 14:28:57 -0500 Subject: [PATCH 15/17] chore(app): Removed unused import --- app.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/app.py b/app.py index 8869e1e..eb800dd 100644 --- a/app.py +++ b/app.py @@ -1,6 +1,4 @@ import asyncio -from prisma import Prisma - from utils.db import testConnection from utils.seed import Seeder, Skipper From 9bc1f718e6124e8d30489bba921d371205122533 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1niel=20B=2E=20Papp?= Date: Tue, 28 Feb 2023 14:29:58 -0500 Subject: [PATCH 16/17] feat(rec): Completed similarity calculation and sorting --- recommend.py | 150 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 93 insertions(+), 57 deletions(-) diff --git a/recommend.py b/recommend.py index e891013..7f7c03a 100644 --- a/recommend.py +++ b/recommend.py @@ -8,6 +8,8 @@ import pandas as pd from math import sqrt +from pandas import DataFrame + from prisma import Prisma @@ -15,7 +17,7 @@ async def main(target: str): # rec = Recommender(target) # await rec.recommend() rec = Recs() - rec.run() + await rec.run() class Recommender: @@ -94,94 +96,140 @@ async def recommend(self): class Recs: def __init__(self, target=None): + self.prisma = Prisma() if target is None: target = [ { - 'title': 'Breakfast Club, The', + "id": "63f4ee98ece0495cbb312604", + 'title': 'orm,', 'rating': 5 }, { - 'title': 'Toy Story', + 'id': '63f4ee98ece0495cbb312608', + 'title': 'me', 'rating': 3.5 }, { - 'title': 'Jumanji', + 'id': '63f4ee98ece0495cbb3125f5', + 'title': '2017', 'rating': 2 }, { - 'title': 'Pulp Fiction', + "id": "63f4ee98ece0495cbb3125f9", + 'title': 'Souppe', 'rating': 5 }, { - 'title': 'Akira', + "id": "63f4ee98ece0495cbb3125fe", + 'title': 'Frams.', 'rating': 4.5 } ] self.inputMovies = pd.DataFrame(target) + self.movies_df = pd.read_csv('input/movies.csv') + self.modules_df = None self.ratings_df = pd.read_csv('input/ratings.csv') + self.feedbacks_df = None + self.userSubsetGroup = None self.pearsonCorrelationDict = dict() self.tempTopUsersRating = None - def cleanData(self): - # Using regular expressions to find a year stored between parentheses - # We specify the parentheses, so we don’t conflict with movies that have years in their titles + def sampleModules(self): + """ + - get a random set of 10 modules + - create a ratings for each module + - return as a list of dicts with id, title and rating + """ - print(self.movies_df.head()) + def cleanData(self): + """ + Removes all the columns that are not needed for the recommendation engine. This is done to reduce the size of + the dataset and reduce overall complexity in our data. + """ + modules_df: DataFrame = self.modules_df.drop([ + 'description', + 'duration', + 'intro', + 'numSlides', + 'keywords', + 'objectives', + 'createdAt', + 'updatedAt', + 'members', + 'assignments', + 'parentModules', + 'parentModuleIDs', + 'subModules', + 'subModuleIDs', + 'collections', + 'course', + 'courseIDs', + 'feedback', + 'moduleName' + ], axis=1) + + feedbacks_df: DataFrame = self.feedbacks_df.drop(['student', 'module'], axis=1) + + self.feedbacks_df = feedbacks_df + + self.modules_df = modules_df + + pd.options.display.max_columns = 60 + + async def __get_module_data(self): + await self.prisma.connect() + + modules = await self.prisma.module.find_many() - self.movies_df['year'] = self.movies_df.title.str.extract('(\(\d\d\d\d\))', expand=False) + modules = list(map(lambda x: x.dict(), modules)) - # Removing the parentheses + self.modules_df = pd.DataFrame(modules) - self.movies_df['year'] = self.movies_df.year.str.extract('(\d\d\d\d)', expand=False) + await self.prisma.disconnect() - # Removing the years from the ‘title’ column + async def __get_feedback_data(self): + await self.prisma.connect() - self.movies_df['title'] = self.movies_df.title.str.replace('(\(\d\d\d\d\))', '') - # Applying the strip function to get rid of any ending whitespace characters that may have appeared + feedbacks = await self.prisma.modulefeedback.find_many() - self.movies_df['title'] = self.movies_df['title'].apply(lambda x: x.strip()) + feedbacks = list(map(lambda x: x.dict(), feedbacks)) - print(self.movies_df.head()) + self.feedbacks_df = pd.DataFrame(feedbacks) - self.movies_df = self.movies_df.drop('genres', 1) + await self.prisma.disconnect() def handleUserInput(self): - inputID = self.movies_df[self.movies_df['title'].isin(self.inputMovies['title'].tolist())] + inputID = self.modules_df[self.modules_df['id'].isin(self.inputMovies['id'].tolist())] inputMovies = pd.merge(inputID, self.inputMovies) - inputMovies = inputMovies.drop('year', 1) - - print(inputMovies) self.inputMovies = inputMovies def createSubset(self): - userSubset = self.ratings_df[self.ratings_df['movieId'].isin(self.inputMovies['movieId'].tolist())] + userSubset = self.feedbacks_df[self.feedbacks_df['moduleId'].isin(self.inputMovies['id'].tolist())] - userSubset.head() - - userSubsetGroup = userSubset.groupby(['userId']) + userSubsetGroup = userSubset.groupby(['studentId']) userSubsetGroup = sorted(userSubsetGroup, key=lambda x: len(x[1]), reverse=True) - print(userSubsetGroup[0:3]) - self.userSubsetGroup = userSubsetGroup def createSimilarityMatrix(self): pearsonCorrelationDict = {} for name, group in self.userSubsetGroup: - group = group.sort_values(by='movieId') + group = group.sort_values(by='id') - inputMovies = self.inputMovies.sort_values(by='movieId') + inputMovies = self.inputMovies.sort_values(by='rating') nRatings = len(group) - temp_df = inputMovies[inputMovies['movieId'].isin(group['movieId'].tolist())] + temp_df = inputMovies[inputMovies['id'].isin(group['moduleId'].tolist())] + tempRatingList = temp_df['rating'].tolist() + tempGroupList = group['rating'].tolist() Sxx = sum([i ** 2 for i in tempRatingList]) - pow(sum(tempRatingList), 2) / float(nRatings) @@ -194,62 +242,50 @@ def createSimilarityMatrix(self): else: pearsonCorrelationDict[name] = 0 - print(pearsonCorrelationDict.items()) self.pearsonCorrelationDict = pearsonCorrelationDict def topUser(self): pearsonDF = pd.DataFrame.from_dict(self.pearsonCorrelationDict, orient='index') pearsonDF.columns = ['similarityIndex'] - pearsonDF['userId'] = pearsonDF.index + pearsonDF['studentId'] = pearsonDF.index pearsonDF.index = range(len(pearsonDF)) - print(pearsonDF.head()) - topUsers = pearsonDF.sort_values(by='similarityIndex', ascending=False)[0:50] print(topUsers.head()) - topUsersRating = topUsers.merge(self.ratings_df, left_on='userId', right_on='userId', how='inner') - - print(topUsersRating.head()) + topUsersRating = topUsers.merge(self.feedbacks_df, left_on='studentId', right_on='studentId', how='inner') topUsersRating['weightedRating'] = topUsersRating['similarityIndex'] * topUsersRating['rating'] - print(topUsersRating.head()) - - tempTopUsersRating = topUsersRating.groupby('movieId').sum()[['similarityIndex', 'weightedRating']] + tempTopUsersRating = topUsersRating.groupby('moduleId').sum()[['similarityIndex', 'weightedRating']] tempTopUsersRating.columns = ['sum_similarityIndex', 'sum_weightedRating'] - print(tempTopUsersRating.head()) - self.tempTopUsersRating = tempTopUsersRating def recommend(self): recommendation_df = pd.DataFrame() - recommendation_df['weighted average recommendation score'] = self.tempTopUsersRating['sum_weightedRating'] / \ - self.tempTopUsersRating['sum_similarityIndex'] - recommendation_df['movieId'] = self.tempTopUsersRating.index + recommendation_df['w-AVG score'] = self.tempTopUsersRating['sum_weightedRating'] / self.tempTopUsersRating['sum_similarityIndex'] + recommendation_df['moduleId'] = self.tempTopUsersRating.index - print(recommendation_df.head()) - - recommendation_df = recommendation_df.sort_values(by='weighted average recommendation score', ascending=False) + recommendation_df = recommendation_df.sort_values(by='w-AVG score', ascending=False) print(recommendation_df.head(10)) - movies_df = self.movies_df.loc[self.movies_df['movieId'].isin(recommendation_df.head(10)['movieId'].tolist())] - - print(movies_df.head(10)) + mods_df = self.modules_df.loc[self.modules_df['id'].isin(recommendation_df.head(20)['moduleId'].tolist())] - self.movies_df = movies_df + self.modules_df = mods_df def convertResultToJSON(self): - movies = self.movies_df + modules = self.modules_df - return movies.to_json(orient="records") + return modules.to_json(orient="records") - def run(self): + async def run(self): + await self.__get_module_data() + await self.__get_feedback_data() self.cleanData() self.handleUserInput() self.createSubset() From 398b9035f8149402f8736a75ae9b3036bf7a7787 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C3=A1niel=20B=2E=20Papp?= Date: Tue, 28 Feb 2023 14:30:26 -0500 Subject: [PATCH 17/17] feat(api): Started sending recommendations through HTTP API --- api.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/api.py b/api.py index f8d03ee..1846f44 100644 --- a/api.py +++ b/api.py @@ -1,6 +1,5 @@ from fastapi import FastAPI - -from app import getUserProfile +import json from recommend import Recommender, Recs app = FastAPI() @@ -8,13 +7,9 @@ @app.get("/recommend/") async def read_item(userID: str): - # rec = Recommender(target=userID) - # await rec.recommend() - # account = await getUserProfile(userID) - rec = Recs() - res = rec.run() + res = await rec.run() - print(res.json()) + cleaned_data = json.loads(res) - return {"user": userID, "data": res} + return {"user": userID, "data": cleaned_data[0:50]}