Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds sql script for finding forms affected by undefined answers #24

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 88 additions & 0 deletions tools/2024-11_fix_hashed_metadata/fix_hashed.metadata.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
-- Type used for outputting results
drop type if exists respondent_count_type cascade;
create type respondent_count_type as (
project_id text,
form_id text,
respondent_count_before integer,
respondent_count_after integer

);

-- Function removes all respondents from data for submissions for the given project and submission.
-- It outputs the count of respondents before and after running.
-- If run with dry_run = true, it will not perform the update.
create or replace function fix_respondent_in_metadata(project_id text, form_id text, dry_run boolean)
returns respondent_count_type as $$
declare
result respondent_count_type;
before_count integer;
after_count integer;
begin
execute format(
$fmt$
select count(0) from %s."%s"
where data ? 'metadata' and ( data -> 'metadata' ? 'respondent')
$fmt$
, project_id
, form_id
) into before_count;
if (dry_run is not true) then
raise notice 'Stripping respondent for project % form %', project_id, form_id;
execute format(
$fmt$
update %s."%s"
set data = data::jsonb #- '{metadata,respondent}'
where data ? 'metadata'
and ( data -> 'metadata' ? 'respondent')
$fmt$
, project_id
, form_id
);
end if;

execute format(
$fmt$
select count(0) from %s."%s"
where data ? 'metadata' and ( data -> 'metadata' ? 'respondent')
$fmt$
, project_id
, form_id
) into after_count;

return (project_id, form_id, before_count, after_count);
end $$ language plpgsql;


-- p896
select * from fix_respondent_in_metadata('p896', '344335', true);
select * from fix_respondent_in_metadata('p896', '347973', true);
select * from fix_respondent_in_metadata('p896', '386498', true);

-- p1336
select * from fix_respondent_in_metadata('p1336', '205339', true);
select * from fix_respondent_in_metadata('p1336', '220418', true);
select * from fix_respondent_in_metadata('p1336', '221197', true);
select * from fix_respondent_in_metadata('p1336', '358479', true);
select * from fix_respondent_in_metadata('p1336', '358484', true);
select * from fix_respondent_in_metadata('p1336', '358486', true);

-- p2731
select * from fix_respondent_in_metadata('p2731', '367084', true);
select * from fix_respondent_in_metadata('p2731', '367277', true);

-- p2525
select * from fix_respondent_in_metadata('p2525', '371198', true);
select * from fix_respondent_in_metadata('p2525', '377224', true);
select * from fix_respondent_in_metadata('p2525', '377898', true);
select * from fix_respondent_in_metadata('p2525', '388300', true);
select * from fix_respondent_in_metadata('p2525', '388303', true);
select * from fix_respondent_in_metadata('p2525', '394394', true);
select * from fix_respondent_in_metadata('p2525', '394397', true);
select * from fix_respondent_in_metadata('p2525', '394398', true);
select * from fix_respondent_in_metadata('p2525', '394400', true);
select * from fix_respondent_in_metadata('p2525', '415005', true);

-- Cleanup
drop function if exists fix_respondent_in_metadata;
drop type if exists respondent_count_type;

83 changes: 83 additions & 0 deletions tools/2025-02_fix_undefined/find_all_undefined.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
/*
This script loops through all forms for all projects
It then looks through all of the answers, looking for
an item in answer with the value "undefined". Any results
are added to the results table. After all forms are queried
it loops through the results table, and then does a query
to find the element-type of the found items, updating the
results table
*/

DROP TABLE undefined_fix_results;
CREATE TABLE undefined_fix_results (
id SERIAL PRIMARY KEY,
submission_id TEXT,
key TEXT,
value TEXT,
element_type TEXT,
project_id TEXT,
form_id TEXT
);


-- Loop through all projects and forms. Put any "undefined" in undefined_fix_results table
DO $$
DECLARE
project_id TEXT;
form_id TEXT;
query TEXT;
BEGIN
FOR project_id IN
SELECT nspname as project_id
FROM pg_namespace
WHERE nspname ~ '^p[0-9]+$'
LOOP
FOR form_id IN
SELECT table_name as form_id
FROM information_schema.tables
WHERE table_schema = project_id
AND table_name ~ '^[0-9]+$'
LOOP
query := FORMAT(
$fmt$
INSERT INTO undefined_fix_results (submission_id, key, value, project_id, form_id)
SELECT data->'metadata'->'submission_id' as submission_id, n.key as key, n.value as value, %L as project_id, %L as form_id
FROM %I.%I,
jsonb_each(data->'answers') AS n(key, value)
WHERE n.value = '"undefined"';
$fmt$,
project_id, form_id, project_id, form_id
);

EXECUTE query;
END LOOP;
END LOOP;
END $$;

-- Loop through undefined_fix_results, lookup element-type of affected elements, then update row.
DO $$
DECLARE
rec RECORD;
query TEXT;
elem_type TEXT;
BEGIN
FOR rec IN
SELECT *
FROM undefined_fix_results
LOOP
query := FORMAT(
$fmt$
SELECT value->'elementType' as element_type from %I."%s/metadata",
LATERAL jsonb_array_elements(data->'elements') as elem
WHERE elem->'externalElementId' = '"%s"';
$fmt$,
rec.project_id, rec.form_id, rec.key
);
RAISE NOTICE '%', query;
EXECUTE query INTO elem_type;

UPDATE undefined_fix_results
SET element_type = elem_type
WHERE id = rec.id;
END LOOP;
END $$;