Skip to content

Commit

Permalink
linting
Browse files Browse the repository at this point in the history
  • Loading branch information
javfg committed Nov 19, 2024
1 parent 14f3a47 commit cee38c4
Showing 1 changed file with 8 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -120,13 +120,14 @@ object Grounding extends Serializable with LazyLogging {

df.withColumn("minDistinctKeywordsPerLabelPerPubOverKeywordPerPub",
min(col(labelCountsColumnName)).over(windowPerKeywordPerPub)
).withColumn("minDistinctKeywordsPerLabelOverKeywordOverallPubs",
min(col("minDistinctKeywordsPerLabelPerPubOverKeywordPerPub")).over(windowPerKeyword)
// was previously a filter, now changed to a boolean column
).withColumn(
"minDistinctKeywordsPerLabelOverKeywordOverallPubs",
min(col("minDistinctKeywordsPerLabelPerPubOverKeywordPerPub")).over(windowPerKeyword)
// was previously a filter, now changed to a boolean column
).withColumn("isDisambiguous",
col("minDistinctKeywordsPerLabelPerPubOverKeywordPerPub") <= col(
"minDistinctKeywordsPerLabelOverKeywordOverallPubs"
)
col("minDistinctKeywordsPerLabelPerPubOverKeywordPerPub") <= col(
"minDistinctKeywordsPerLabelOverKeywordOverallPubs"
)
)
}

Expand Down Expand Up @@ -197,7 +198,7 @@ object Grounding extends Serializable with LazyLogging {
.dropDuplicates("type", "label", "labelN", "keywordId")
// evaluated after filtering by rank so only determined by relevant keywordIds
.withColumn("uniqueKeywordIdsPerLabelN",
approx_count_distinct(col("keywordId"), 0.01).over(windowByTypeAndLabel)
approx_count_distinct(col("keywordId"), 0.01).over(windowByTypeAndLabel)
)
.orderBy(col("type"), col("labelN"))

Expand Down

0 comments on commit cee38c4

Please sign in to comment.