Skip to content

Commit

Permalink
Merge pull request #120 from FsherinP/warehouse-changes
Browse files Browse the repository at this point in the history
KB-6242 - added language col in warehouse.content table
  • Loading branch information
varshamahuli97 authored Jul 30, 2024
2 parents f5f20da + 4433efa commit bd23923
Show file tree
Hide file tree
Showing 4 changed files with 9 additions and 4 deletions.
3 changes: 2 additions & 1 deletion batch-models/src/main/resources/warehouse-schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -176,4 +176,5 @@ ALTER TABLE user_detail ADD COLUMN weekly_claps_day_before_yesterday VARCHAR(255
ALTER TABLE user_detail ADD COLUMN status INTEGER;
ALTER TABLE user_detail ADD COLUMN no_of_karma_points INTEGER;
ALTER TABLE org_hierarchy ADD COLUMN mdo_created_on DATE;
ALTER TABLE user_detail ADD COLUMN marked_as_not_my_user boolean;
ALTER TABLE content ADD COLUMN language VARCHAR(255);
ALTER TABLE user_detail ADD COLUMN marked_as_not_my_user boolean;
Original file line number Diff line number Diff line change
Expand Up @@ -600,6 +600,7 @@ object DataUtil extends Serializable {
// now that error handling is done, proceed with business as usual
df = df
.withColumn("courseOrgID", explode_outer(col("createdFor")))
.withColumn("contentLanguage", explode_outer(col("language")))
.select(
col("identifier").alias("courseID"),
col("primaryCategory").alias("category"),
Expand All @@ -614,7 +615,8 @@ object DataUtil extends Serializable {
col("courseOrgID"),
col("competencies_v5.competencyAreaId"),
col("competencies_v5.competencyThemeId"),
col("competencies_v5.competencySubThemeId")
col("competencies_v5.competencySubThemeId"),
col("contentLanguage")

)

Expand Down Expand Up @@ -1033,6 +1035,7 @@ object DataUtil extends Serializable {
.withColumn("courseEnrolledTimestamp", col("enrolled_date"))
.withColumn("lastContentAccessTimestamp", col("lastcontentaccesstime"))
.withColumn("issuedCertificateCount", size(col("issued_certificates")))
.withColumn("issuedCertificateCountPerContent", when(size(col("issued_certificates")) > 0, lit(1)).otherwise( lit(0)))
.withColumn("certificateGeneratedOn", when(col("issued_certificates").isNull, "").otherwise( col("issued_certificates")(size(col("issued_certificates")) - 1).getItem("lastIssuedOn")))
.withColumn("firstCompletedOn", when(col("issued_certificates").isNull, "").otherwise(when(size(col("issued_certificates")) > 0, col("issued_certificates")(0).getItem("lastIssuedOn")).otherwise("")))
.withColumn("certificateID", when(col("issued_certificates").isNull, "").otherwise( col("issued_certificates")(size(col("issued_certificates")) - 1).getItem("identifier")))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,8 @@ object DataExhaustModel extends AbsDashboardModel {
// ES content
val primaryCategories = Seq("Course","Program","Blended Program","Curated Program","Standalone Assessment","CuratedCollections","Moderated Course")
val shouldClause = primaryCategories.map(pc => s"""{"match":{"primaryCategory.raw":"${pc}"}}""").mkString(",")
val fields = Seq("identifier", "name", "primaryCategory", "status", "reviewStatus", "channel", "duration", "leafNodesCount", "lastPublishedOn", "lastStatusChangedOn", "createdFor", "competencies_v5", "programDirectorName")
val arrayFields = Seq("createdFor")
val fields = Seq("identifier", "name", "primaryCategory", "status", "reviewStatus", "channel", "duration", "leafNodesCount", "lastPublishedOn", "lastStatusChangedOn", "createdFor", "competencies_v5", "programDirectorName","language")
val arrayFields = Seq("createdFor","language")
val fieldsClause = fields.map(f => s""""${f}"""").mkString(",")
val query = s"""{"_source":[${fieldsClause}],"query":{"bool":{"should":[${shouldClause}]}}}"""
val esContentDF = elasticSearchDataFrame(conf.sparkElasticsearchConnectionHost, "compositesearch", query, fields, arrayFields)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ object CourseReportModel extends AbsDashboardModel {
col("courseResourceCount").alias("resource_count"),
col("totalCertificatesIssued").alias("total_certificates_issued"),
col("courseReviewStatus").alias("content_substatus"),
col("contentLanguage").alias("language"),
col("data_last_generated_on")
)
generateReport(df_warehouse.coalesce(1), s"${reportPath}-warehouse")
Expand Down

0 comments on commit bd23923

Please sign in to comment.