From 134e704577a093a532b6eecb0805c8b0a4669dc5 Mon Sep 17 00:00:00 2001 From: Parker-Kasiewicz Date: Sat, 2 Nov 2024 19:42:02 -0700 Subject: [PATCH] updated schema & null behavior for hashtags & urls to biqquery --- pkg/hydrator/hydrator.go | 2 ++ pkg/output/bq/schema/schema.go | 30 ++++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/pkg/hydrator/hydrator.go b/pkg/hydrator/hydrator.go index 472707b..aa7ad47 100644 --- a/pkg/hydrator/hydrator.go +++ b/pkg/hydrator/hydrator.go @@ -251,6 +251,8 @@ func (h *Hydrator) flattenFullProfile(profile *bsky.ActorDefs_ProfileViewDetaile } func (h *Hydrator) flattenFacets(facets []*bsky.RichtextFacet) (hashtags []string, urls []string) { + hashtags = []string{} + urls = []string{} if facets != nil { for _, facet := range facets { if facet != nil { diff --git a/pkg/output/bq/schema/schema.go b/pkg/output/bq/schema/schema.go index 31007c3..7f6d72c 100644 --- a/pkg/output/bq/schema/schema.go +++ b/pkg/output/bq/schema/schema.go @@ -276,6 +276,16 @@ func GetSchema() bigquery.Schema { "name": "RepostCount", "type": "INTEGER" }, + { + "mode": "REPEATED", + "name": "Hashtags", + "type": "STRING" + }, + { + "mode": "REPEATED", + "name": "URLs", + "type": "STRING" + }, { "name": "Text", "type": "STRING" @@ -399,6 +409,16 @@ func GetSchema() bigquery.Schema { "name": "ReplyParentCID", "type": "STRING" }, + { + "mode": "REPEATED", + "name": "Hashtags", + "type": "STRING" + }, + { + "mode": "REPEATED", + "name": "URLs", + "type": "STRING" + }, { "name": "Text", "type": "STRING" @@ -566,6 +586,16 @@ func GetSchema() bigquery.Schema { "name": "RepostCount", "type": "INTEGER" }, + { + "mode": "REPEATED", + "name": "Hashtags", + "type": "STRING" + }, + { + "mode": "REPEATED", + "name": "URLs", + "type": "STRING" + }, { "name": "Text", "type": "STRING"