From 1a1d3b680b2b5ac9f5ef437dc963d79069dcf5b6 Mon Sep 17 00:00:00 2001 From: FsherinP Date: Tue, 30 Jul 2024 12:42:24 +0530 Subject: [PATCH] KB-6242 - added language col in warehouse.content table --- batch-models/src/main/resources/warehouse-schema.sql | 3 ++- .../main/scala/org/ekstep/analytics/dashboard/DataUtil.scala | 5 ++++- .../analytics/dashboard/exhaust/DataExhaustModel.scala | 4 ++-- .../dashboard/report/course/CourseReportModel.scala | 1 + 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/batch-models/src/main/resources/warehouse-schema.sql b/batch-models/src/main/resources/warehouse-schema.sql index 4795bcbe..44e900d6 100644 --- a/batch-models/src/main/resources/warehouse-schema.sql +++ b/batch-models/src/main/resources/warehouse-schema.sql @@ -174,4 +174,5 @@ ALTER TABLE user_enrolments ADD COLUMN first_completed_on VARCHAR(255); ALTER TABLE user_detail ADD COLUMN weekly_claps_day_before_yesterday VARCHAR(255); ALTER TABLE user_detail ADD COLUMN status INTEGER; ALTER TABLE user_detail ADD COLUMN no_of_karma_points INTEGER; -ALTER TABLE org_hierarchy ADD COLUMN mdo_created_on DATE; \ No newline at end of file +ALTER TABLE org_hierarchy ADD COLUMN mdo_created_on DATE; +ALTER TABLE content ADD COLUMN language VARCHAR(255); \ No newline at end of file diff --git a/batch-models/src/main/scala/org/ekstep/analytics/dashboard/DataUtil.scala b/batch-models/src/main/scala/org/ekstep/analytics/dashboard/DataUtil.scala index 6a60b065..1802a4bd 100644 --- a/batch-models/src/main/scala/org/ekstep/analytics/dashboard/DataUtil.scala +++ b/batch-models/src/main/scala/org/ekstep/analytics/dashboard/DataUtil.scala @@ -598,6 +598,7 @@ object DataUtil extends Serializable { // now that error handling is done, proceed with business as usual df = df .withColumn("courseOrgID", explode_outer(col("createdFor"))) + .withColumn("contentLanguage", explode_outer(col("language"))) .select( col("identifier").alias("courseID"), col("primaryCategory").alias("category"), @@ -612,7 +613,8 @@ object DataUtil extends Serializable { col("courseOrgID"), col("competencies_v5.competencyAreaId"), col("competencies_v5.competencyThemeId"), - col("competencies_v5.competencySubThemeId") + col("competencies_v5.competencySubThemeId"), + col("contentLanguage") ) @@ -1031,6 +1033,7 @@ object DataUtil extends Serializable { .withColumn("courseEnrolledTimestamp", col("enrolled_date")) .withColumn("lastContentAccessTimestamp", col("lastcontentaccesstime")) .withColumn("issuedCertificateCount", size(col("issued_certificates"))) + .withColumn("issuedCertificateCountPerContent", when(size(col("issued_certificates")) > 0, lit(1)).otherwise( lit(0))) .withColumn("certificateGeneratedOn", when(col("issued_certificates").isNull, "").otherwise( col("issued_certificates")(size(col("issued_certificates")) - 1).getItem("lastIssuedOn"))) .withColumn("firstCompletedOn", when(col("issued_certificates").isNull, "").otherwise(when(size(col("issued_certificates")) > 0, col("issued_certificates")(0).getItem("lastIssuedOn")).otherwise(""))) .withColumn("certificateID", when(col("issued_certificates").isNull, "").otherwise( col("issued_certificates")(size(col("issued_certificates")) - 1).getItem("identifier"))) diff --git a/batch-models/src/main/scala/org/ekstep/analytics/dashboard/exhaust/DataExhaustModel.scala b/batch-models/src/main/scala/org/ekstep/analytics/dashboard/exhaust/DataExhaustModel.scala index 174dc0d5..4037af32 100644 --- a/batch-models/src/main/scala/org/ekstep/analytics/dashboard/exhaust/DataExhaustModel.scala +++ b/batch-models/src/main/scala/org/ekstep/analytics/dashboard/exhaust/DataExhaustModel.scala @@ -56,8 +56,8 @@ object DataExhaustModel extends AbsDashboardModel { // ES content val primaryCategories = Seq("Course","Program","Blended Program","Curated Program","Standalone Assessment","CuratedCollections","Moderated Course") val shouldClause = primaryCategories.map(pc => s"""{"match":{"primaryCategory.raw":"${pc}"}}""").mkString(",") - val fields = Seq("identifier", "name", "primaryCategory", "status", "reviewStatus", "channel", "duration", "leafNodesCount", "lastPublishedOn", "lastStatusChangedOn", "createdFor", "competencies_v5", "programDirectorName") - val arrayFields = Seq("createdFor") + val fields = Seq("identifier", "name", "primaryCategory", "status", "reviewStatus", "channel", "duration", "leafNodesCount", "lastPublishedOn", "lastStatusChangedOn", "createdFor", "competencies_v5", "programDirectorName","language") + val arrayFields = Seq("createdFor","language") val fieldsClause = fields.map(f => s""""${f}"""").mkString(",") val query = s"""{"_source":[${fieldsClause}],"query":{"bool":{"should":[${shouldClause}]}}}""" val esContentDF = elasticSearchDataFrame(conf.sparkElasticsearchConnectionHost, "compositesearch", query, fields, arrayFields) diff --git a/batch-models/src/main/scala/org/ekstep/analytics/dashboard/report/course/CourseReportModel.scala b/batch-models/src/main/scala/org/ekstep/analytics/dashboard/report/course/CourseReportModel.scala index 1db53731..a3e97712 100644 --- a/batch-models/src/main/scala/org/ekstep/analytics/dashboard/report/course/CourseReportModel.scala +++ b/batch-models/src/main/scala/org/ekstep/analytics/dashboard/report/course/CourseReportModel.scala @@ -209,6 +209,7 @@ object CourseReportModel extends AbsDashboardModel { col("courseResourceCount").alias("resource_count"), col("totalCertificatesIssued").alias("total_certificates_issued"), col("courseReviewStatus").alias("content_substatus"), + col("contentLanguage").alias("language"), col("data_last_generated_on") ) generateReport(df_warehouse.coalesce(1), s"${reportPath}-warehouse")