diff --git a/backend/alembic_files/versions/08ddf28cb361_add_feedback_language_percentage.py b/backend/alembic_files/versions/08ddf28cb361_add_feedback_language_percentage.py new file mode 100644 index 00000000..ba39b470 --- /dev/null +++ b/backend/alembic_files/versions/08ddf28cb361_add_feedback_language_percentage.py @@ -0,0 +1,30 @@ +"""add feedback language percentage + +Revision ID: 08ddf28cb361 +Revises: 106ff76da94a +Create Date: 2023-08-30 16:38:26.013055 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '08ddf28cb361' +down_revision = '106ff76da94a' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('section', sa.Column('positive_language_percent', sa.Integer(), nullable=True)) + op.add_column('section', sa.Column('constructive_language_percent', sa.Integer(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('section', 'constructive_language_percent') + op.drop_column('section', 'positive_language_percent') + # ### end Alembic commands ### diff --git a/backend/alembic_files/versions/1c3fcbc15477_change_data_type_to_float.py b/backend/alembic_files/versions/1c3fcbc15477_change_data_type_to_float.py new file mode 100644 index 00000000..c0d1490e --- /dev/null +++ b/backend/alembic_files/versions/1c3fcbc15477_change_data_type_to_float.py @@ -0,0 +1,28 @@ +"""change data type to float + +Revision ID: 1c3fcbc15477 +Revises: 08ddf28cb361 +Create Date: 2023-08-30 16:58:36.435159 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '1c3fcbc15477' +down_revision = '08ddf28cb361' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + pass + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + pass + # ### end Alembic commands ### diff --git a/backend/alembic_files/versions/a363f59d034e_add_feedback_percentage_to_the_section_.py b/backend/alembic_files/versions/a363f59d034e_add_feedback_percentage_to_the_section_.py new file mode 100644 index 00000000..4cd6bbe2 --- /dev/null +++ b/backend/alembic_files/versions/a363f59d034e_add_feedback_percentage_to_the_section_.py @@ -0,0 +1,30 @@ +"""add feedback percentage to the section table + +Revision ID: a363f59d034e +Revises: c2010fea1867 +Create Date: 2023-08-31 14:35:41.192691 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'a363f59d034e' +down_revision = 'c2010fea1867' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('section', sa.Column('positive_language_percent', sa.Float(), nullable=True)) + op.add_column('section', sa.Column('constructive_language_percent', sa.Float(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('section', 'constructive_language_percent') + op.drop_column('section', 'positive_language_percent') + # ### end Alembic commands ### diff --git a/backend/alembic_files/versions/c2010fea1867_remove_column_from_table.py b/backend/alembic_files/versions/c2010fea1867_remove_column_from_table.py new file mode 100644 index 00000000..3e5d71f4 --- /dev/null +++ b/backend/alembic_files/versions/c2010fea1867_remove_column_from_table.py @@ -0,0 +1,30 @@ +"""remove column from table + +Revision ID: c2010fea1867 +Revises: 1c3fcbc15477 +Create Date: 2023-08-31 14:35:01.377046 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'c2010fea1867' +down_revision = '1c3fcbc15477' +branch_labels = None +depends_on = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('section', 'positive_language_percent') + op.drop_column('section', 'constructive_language_percent') + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('section', sa.Column('constructive_language_percent', sa.INTEGER(), autoincrement=False, nullable=True)) + op.add_column('section', sa.Column('positive_language_percent', sa.INTEGER(), autoincrement=False, nullable=True)) + # ### end Alembic commands ### diff --git a/backend/data/report_writer.py b/backend/data/report_writer.py index 9b50bc0a..3e95c4b1 100644 --- a/backend/data/report_writer.py +++ b/backend/data/report_writer.py @@ -62,7 +62,9 @@ def upsert_report_section(section: Section, report_id: int, session: Session): report_id=report_id, number=section.number, title=section.title, - decision=section.decision + decision=section.decision, + positive_language_percent=section.positive_language_percent, + constructive_language_percent = section.constructive_language_percent ) upsert_section_statement = insert(section_table).values(section_to_insert) diff --git a/backend/models/section.py b/backend/models/section.py index 77b4441f..d00480f7 100644 --- a/backend/models/section.py +++ b/backend/models/section.py @@ -18,4 +18,6 @@ class Section(Base): number: Mapped[int]= mapped_column(nullable=True) decision: Mapped[str] = mapped_column(nullable=True) title: Mapped[str] = mapped_column(nullable= True) - feedback: Mapped[List["Feedback"]] = relationship() \ No newline at end of file + feedback: Mapped[List["Feedback"]] = relationship() + positive_language_percent: Mapped[float] = mapped_column(nullable=True) + constructive_language_percent: Mapped[float] = mapped_column(nullable=True) diff --git a/backend/services/basic_info_scraper.py b/backend/services/basic_info_scraper.py index 02d0ace3..ffe6599e 100644 --- a/backend/services/basic_info_scraper.py +++ b/backend/services/basic_info_scraper.py @@ -18,7 +18,7 @@ def scrape_reports() -> list[Report]: LOGGER.info("Retrieving report links") - # report_links = get_report_links() + report_links = get_report_links() report_links = ["/service-standard-reports/get-security-clearance"] reports_models = [] number_of_reports = len(report_links) @@ -264,6 +264,9 @@ def create_report_model(report_dict: dict, url: str) -> Report: section.decision = report_section["decision"] if "title" in report_section.keys(): section.title = report_section["title"] + section.positive_language_percent = report_section["positive_feedback_percentage"] + section.constructive_language_percent = report_section["negative_feedback_percentage"] + if "feedback" in report_section: for feedback_item in report_section["feedback"]: diff --git a/backend/services/section_info_scraper.py b/backend/services/section_info_scraper.py index e96479cf..dec3d079 100644 --- a/backend/services/section_info_scraper.py +++ b/backend/services/section_info_scraper.py @@ -43,14 +43,20 @@ def scrape_sections_html(soup) -> list[dict]: def analyse_feedback(feedback_string): - + analysed_percentages =[] si_obj = SentimentIntensityAnalyzer() sentiment_dict = si_obj.polarity_scores(feedback_string) - negative_percentage = sentiment_dict['neg']*100 - positive_percentage = sentiment_dict['pos']*100 + analysed_percentages.insert(0,sentiment_dict['neg']*100) + analysed_percentages.insert(0,sentiment_dict['pos']*100) - return positive_percentage, negative_percentage + return analysed_percentages +def extract_text_from_feedback(feedback): + feedback_concat = [] + feedback_string = ' ' + for text in feedback: + feedback_concat.insert(0,text[0]) + return feedback_string.join(feedback_concat) @@ -79,21 +85,16 @@ def scrape_one(soup: BeautifulSoup, sections: list[dict]): feedback = [] feedback.extend(extract_feedback(section_decision, "what-the-team-has-done-well", FeedbackType.POSITIVE)) feedback.extend(extract_feedback(section_decision, "what-the-team-needs-to-explore", FeedbackType.CONSTRUCTIVE)) - - - feedback_concat = [] - feedback_string = ' ' - for text in feedback: - feedback_concat.insert(0,text[0]) - - analyse_feedback(feedback_string.join(feedback_concat)) - + feedback_text = extract_text_from_feedback(feedback) + sections.append(dict( number=int(section_id), decision=get_decision(section_decision.text), title = section_element.text.strip(), - feedback = feedback + feedback = feedback, + positive_feedback_percentage = analyse_feedback(feedback_text)[0], + negative_feedback_percentage = analyse_feedback(feedback_text)[1] )) break