diff --git a/README.md b/README.md index cd2baf6..dfa6eb3 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,9 @@ pip install singer-target-postgres 1. Create a [config file](#configjson) at `~/singer.io/target_postgres_config.json` with postgres connection information and target postgres schema. + If a value is not set in the config file, the target will use the normal + [PostgreSQL env vars](https://www.postgresql.org/docs/current/libpq-envars.html) + for Postgres configuration before taking the default. ```json { @@ -77,34 +80,36 @@ pip install singer-target-postgres The fields available to be specified in the config file are specified here. - -| Field | Type | Default | Details | -| --------------------------- | --------------------- | ---------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `postgres_host` | `["string", "null"]` | `"localhost"` | | -| `postgres_port` | `["integer", "null"]` | `5432` | | -| `postgres_database` | `["string"]` | `N/A` | | -| `postgres_username` | `["string", "null"]` | `N/A` | | -| `postgres_password` | `["string", "null"]` | `null` | | -| `postgres_schema` | `["string", "null"]` | `"public"` | | -| `postgres_sslmode` | `["string", "null"]` | `"prefer"` | Refer to the [libpq](https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-PARAMKEYWORDS) docs for more information about SSL | -| `postgres_sslcert` | `["string", "null"]` | `"~/.postgresql/postgresql.crt"` | Only used if a SSL request w/ a client certificate is being made | -| `postgres_sslkey` | `["string", "null"]` | `"~/.postgresql/postgresql.key"` | Only used if a SSL request w/ a client certificate is being made | -| `postgres_sslrootcert` | `["string", "null"]` | `"~/.postgresql/root.crt"` | Used for authentication of a server SSL certificate | -| `postgres_sslcrl` | `["string", "null"]` | `"~/.postgresql/root.crl"` | Used for authentication of a server SSL certificate | -| `invalid_records_detect` | `["boolean", "null"]` | `true` | Include `false` in your config to disable `target-postgres` from crashing on invalid records | -| `invalid_records_threshold` | `["integer", "null"]` | `0` | Include a positive value `n` in your config to allow for `target-postgres` to encounter at most `n` invalid records per stream before giving up. | -| `disable_collection` | `["string", "null"]` | `false` | Include `true` in your config to disable [Singer Usage Logging](#usage-logging). | -| `logging_level` | `["string", "null"]` | `"INFO"` | The level for logging. Set to `DEBUG` to get things like queries executed, timing of those queries, etc. See [Python's Logger Levels](https://docs.python.org/3/library/logging.html#levels) for information about valid values. | -| `persist_empty_tables` | `["boolean", "null"]` | `False` | Whether the Target should create tables which have no records present in Remote. | -| `max_batch_rows` | `["integer", "null"]` | `200000` | The maximum number of rows to buffer in memory before writing to the destination table in Postgres | -| `max_buffer_size` | `["integer", "null"]` | `104857600` (100MB in bytes) | The maximum number of bytes to buffer in memory before writing to the destination table in Postgres | -| `batch_detection_threshold` | `["integer", "null"]` | `5000`, or 1/40th `max_batch_rows` | How often, in rows received, to count the buffered rows and bytes to check if a flush is necessary. There's a slight performance penalty to checking the buffered records count or bytesize, so this controls how often this is polled in order to mitigate the penalty. This value is usually not necessary to set as the default is dynamically adjusted to check reasonably often. | -| `state_support` | `["boolean", "null"]` | `True` | Whether the Target should emit `STATE` messages to stdout for further consumption. In this mode, which is on by default, STATE messages are buffered in memory until all the records that occurred before them are flushed according to the batch flushing schedule the target is configured with. | -| `add_upsert_indexes` | `["boolean", "null"]` | `True` | Whether the Target should create column indexes on the important columns used during data loading. These indexes will make data loading slightly slower but the deduplication phase much faster. Defaults to on for better baseline performance. | -| `before_run_sql` | `["string", "null"]` | `None` | Raw SQL statement(s) to execute as soon as the connection to Postgres is opened by the target. Useful for setup like `SET ROLE` or other connection state that is important. | -| `after_run_sql` | `["string", "null"]` | `None` | Raw SQL statement(s) to execute as soon as the connection to Postgres is opened by the target. Useful for setup like `SET ROLE` or other connection state that is important. | -| `before_run_sql_file` | `["string", "null"]` | `None` | Similar to `before_run_sql` but reads an external file instead of SQL in the JSON config file. | -| `after_run_sql_file` | `["string", "null"]` | `None` | Similar to `after_run_sql` but reads an external file instead of SQL in the JSON config file. | +If a field is not set, the value from the standard[PostgreSQL env vars](https://www.postgresql.org/docs/current/libpq-envars.html) +will be used if available and set. Finally, the Default value will be used. + +| Field | Fallback Env Var | Type | Default | Details | +| --------------------------- | ---------------- | --------------------- | ---------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `postgres_host` | `PGHOST` | `["string", "null"]` | `"localhost"` | | +| `postgres_port` | `PGPORT` | `["integer", "null"]` | `5432` | | +| `postgres_database` | `PGDATABASE` | `["string"]` | `N/A` | | +| `postgres_username` | `PGUSER` | `["string", "null"]` | `N/A` | | +| `postgres_password` | `PGPASSWORD` | `["string", "null"]` | `null` | | +| `postgres_schema` | | `["string", "null"]` | `"public"` | | +| `postgres_sslmode` | `PGSSLMODE` | `["string", "null"]` | `"prefer"` | Refer to the [libpq](https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-PARAMKEYWORDS) docs for more information about SSL | +| `postgres_sslcert` | `PGSSLCERT` | `["string", "null"]` | `"~/.postgresql/postgresql.crt"` | Only used if a SSL request w/ a client certificate is being made | +| `postgres_sslkey` | `PGSSLKEY` | `["string", "null"]` | `"~/.postgresql/postgresql.key"` | Only used if a SSL request w/ a client certificate is being made | +| `postgres_sslrootcert` | `PGSSLROOTCERT` | `["string", "null"]` | `"~/.postgresql/root.crt"` | Used for authentication of a server SSL certificate | +| `postgres_sslcrl` | `PGSSLCRL` | `["string", "null"]` | `"~/.postgresql/root.crl"` | Used for authentication of a server SSL certificate | +| `invalid_records_detect` | | `["boolean", "null"]` | `true` | Include `false` in your config to disable `target-postgres` from crashing on invalid records | +| `invalid_records_threshold` | | `["integer", "null"]` | `0` | Include a positive value `n` in your config to allow for `target-postgres` to encounter at most `n` invalid records per stream before giving up. | +| `disable_collection` | | `["string", "null"]` | `false` | Include `true` in your config to disable [Singer Usage Logging](#usage-logging). | +| `logging_level` | | `["string", "null"]` | `"INFO"` | The level for logging. Set to `DEBUG` to get things like queries executed, timing of those queries, etc. See [Python's Logger Levels](https://docs.python.org/3/library/logging.html#levels) for information about valid values. | +| `persist_empty_tables` | | `["boolean", "null"]` | `False` | Whether the Target should create tables which have no records present in Remote. | +| `max_batch_rows` | | `["integer", "null"]` | `200000` | The maximum number of rows to buffer in memory before writing to the destination table in Postgres | +| `max_buffer_size` | | `["integer", "null"]` | `104857600` (100MB in bytes) | The maximum number of bytes to buffer in memory before writing to the destination table in Postgres | +| `batch_detection_threshold` | | `["integer", "null"]` | `5000`, or 1/40th `max_batch_rows` | How often, in rows received, to count the buffered rows and bytes to check if a flush is necessary. There's a slight performance penalty to checking the buffered records count or bytesize, so this controls how often this is polled in order to mitigate the penalty. This value is usually not necessary to set as the default is dynamically adjusted to check reasonably often. | +| `state_support` | | `["boolean", "null"]` | `True` | Whether the Target should emit `STATE` messages to stdout for further consumption. In this mode, which is on by default, STATE messages are buffered in memory until all the records that occurred before them are flushed according to the batch flushing schedule the target is configured with. | +| `add_upsert_indexes` | | `["boolean", "null"]` | `True` | Whether the Target should create column indexes on the important columns used during data loading. These indexes will make data loading slightly slower but the deduplication phase much faster. Defaults to on for better baseline performance. | +| `before_run_sql` | | `["string", "null"]` | `None` | Raw SQL statement(s) to execute as soon as the connection to Postgres is opened by the target. Useful for setup like `SET ROLE` or other connection state that is important. | +| `after_run_sql` | | `["string", "null"]` | `None` | Raw SQL statement(s) to execute as soon as the connection to Postgres is opened by the target. Useful for setup like `SET ROLE` or other connection state that is important. +| `before_run_sql_file` | | `["string", "null"]` | `None` | Similar to `before_run_sql` but reads an external file instead of SQL in the JSON config file. | +| `after_run_sql_file` | | `["string", "null"]` | `None` | Similar to `after_run_sql` but reads an external file instead of SQL in the JSON config file. ### Supported Versions diff --git a/target_postgres/__init__.py b/target_postgres/__init__.py index 6531bb2..6e22a67 100644 --- a/target_postgres/__init__.py +++ b/target_postgres/__init__.py @@ -1,3 +1,5 @@ +import os + from singer import utils import psycopg2 @@ -9,6 +11,20 @@ ] +CONFIG_TO_ENV_MAPPING = { + 'postgres_host': 'PGHOST', + 'postgres_port': 'PGPORT', + 'postgres_database': 'PGDATABASE', + 'postgres_username': 'PGUSER', + 'postgres_password': 'PGPASSWORD', + 'postgres_sslmode': 'PGSSLMODE', + 'postgres_sslcert': 'PGSSLCERT', + 'postgres_sslkey': 'PGSSLKEY', + 'postgres_sslrootcert': 'PGSSLROOTCERT', + 'postgres_sslcrl': 'PGSSLCRL', +} + + def main(config, input_stream=None): with psycopg2.connect( connection_factory=MillisLoggingConnection, @@ -39,7 +55,16 @@ def main(config, input_stream=None): target_tools.main(postgres_target) +def fallback_to_env_vars(config): + for conf_key, env_var in CONFIG_TO_ENV_MAPPING.items(): + if config.get(conf_key) is None: + config[conf_key] = os.environ.get(env_var) + return config + + def cli(): - args = utils.parse_args(REQUIRED_CONFIG_KEYS) + args = utils.parse_args() + config = fallback_to_env_vars(args.config) + utils.check_config(config, REQUIRED_CONFIG_KEYS) - main(args.config) + main(config)