Hey, I am new to GE, could I get help with this error. #8188

krish-shahh · 2023-06-27T13:46:22Z

krish-shahh
Jun 27, 2023

2023-06-27T09:42:11-0400 - INFO - Great Expectations logging enabled at 20 level by JupyterUX module.
2023-06-27T09:42:11-0400 - INFO - FileDataContext loading fluent config
2023-06-27T09:42:11-0400 - INFO - Loading 'datasources' ->
[]
Loaded ExpectationSuite "github_stats_expectation_suite" containing 0 expectations.

ParserError Traceback (most recent call last)
Cell In[1], line 35
31 suite = context.add_expectation_suite(expectation_suite_name=expectation_suite_name)
32 print(f'Created ExpectationSuite "{suite.expectation_suite_name}".')
---> 35 validator = context.get_validator(
36 batch_request=BatchRequest(**batch_request),
37 expectation_suite_name=expectation_suite_name,
38 )
39 column_names = [f'"{column_name}"' for column_name in validator.columns()]
40 print(f"Columns: {', '.join(column_names)}.")

File ~/anaconda3/lib/python3.10/site-packages/great_expectations/data_context/data_context/abstract_data_context.py:2583, in AbstractDataContext.get_validator(failed resolving arguments)
2579 batch_request_list = [batch_request] # type: ignore[list-item]
2581 for batch_request in batch_request_list:
2582 batch_list.extend(
-> 2583 self.get_batch_list(
2584 datasource_name=datasource_name,
2585 data_connector_name=data_connector_name,
2586 data_asset_name=data_asset_name,
2587 batch_request=batch_request,
2588 batch_data=batch_data,
2589 data_connector_query=data_connector_query,
2590 batch_identifiers=batch_identifiers,
2591 limit=limit,
2592 index=index,
2593 custom_filter_function=custom_filter_function,
2594 sampling_method=sampling_method,
2595 sampling_kwargs=sampling_kwargs,
2596 splitter_method=splitter_method,
2597 splitter_kwargs=splitter_kwargs,
2598 runtime_parameters=runtime_parameters,
2599 query=query,
2600 path=path,
2601 batch_filter_parameters=batch_filter_parameters,
2602 batch_spec_passthrough=batch_spec_passthrough,
2603 **kwargs,
2604 )
2605 )
2607 return self.get_validator_using_batch_list(
2608 expectation_suite=expectation_suite, # type: ignore[arg-type]
2609 batch_list=batch_list,
2610 include_rendered_content=include_rendered_content,
2611 )

File ~/anaconda3/lib/python3.10/site-packages/great_expectations/core/usage_statistics/usage_statistics.py:260, in usage_statistics_enabled_method..usage_statistics_wrapped_method(*args, **kwargs)
257 args_payload = args_payload_fn(*args, **kwargs) or {}
258 nested_update(event_payload, args_payload)
--> 260 result = func(*args, **kwargs)
261 message["success"] = True
262 except Exception:

File ~/anaconda3/lib/python3.10/site-packages/great_expectations/data_context/data_context/abstract_data_context.py:2752, in AbstractDataContext.get_batch_list(self, datasource_name, data_connector_name, data_asset_name, batch_request, batch_data, data_connector_query, batch_identifiers, limit, index, custom_filter_function, sampling_method, sampling_kwargs, splitter_method, splitter_kwargs, runtime_parameters, query, path, batch_filter_parameters, batch_spec_passthrough, batch_request_options, **kwargs)
2674 @public_api
2675 @usage_statistics_enabled_method(
2676 event_name=UsageStatsEvents.DATA_CONTEXT_GET_BATCH_LIST,
(...)
2701 **kwargs: Optional[dict],
2702 ) -> List[Batch]:
2703 """Get the list of zero or more batches, based on a variety of flexible input types.
2704
2705 get_batch_list is the main user-facing API for getting batches.
(...)
2750
2751 """
-> 2752 return self._get_batch_list(
2753 datasource_name=datasource_name,
2754 data_connector_name=data_connector_name,
2755 data_asset_name=data_asset_name,
2756 batch_request=batch_request,
2757 batch_data=batch_data,
2758 data_connector_query=data_connector_query,
2759 batch_identifiers=batch_identifiers,
2760 limit=limit,
2761 index=index,
2762 custom_filter_function=custom_filter_function,
2763 sampling_method=sampling_method,
2764 sampling_kwargs=sampling_kwargs,
2765 splitter_method=splitter_method,
2766 splitter_kwargs=splitter_kwargs,
2767 runtime_parameters=runtime_parameters,
2768 query=query,
2769 path=path,
2770 batch_filter_parameters=batch_filter_parameters,
2771 batch_spec_passthrough=batch_spec_passthrough,
2772 batch_request_options=batch_request_options,
2773 **kwargs,
2774 )

File ~/anaconda3/lib/python3.10/site-packages/great_expectations/data_context/data_context/abstract_data_context.py:2834, in AbstractDataContext._get_batch_list(self, datasource_name, data_connector_name, data_asset_name, batch_request, batch_data, data_connector_query, batch_identifiers, limit, index, custom_filter_function, sampling_method, sampling_kwargs, splitter_method, splitter_kwargs, runtime_parameters, query, path, batch_filter_parameters, batch_spec_passthrough, batch_request_options, **kwargs)
2825 raise gx_exceptions.DatasourceError(
2826 datasource_name,
2827 "The given datasource could not be retrieved from the DataContext; "
2828 "please confirm that your configuration is accurate.",
2829 )
2831 datasource = self.datasources[
2832 datasource_name
2833 ] # this can return one of three datasource types, including Fluent datasource types
-> 2834 return datasource.get_batch_list_from_batch_request(batch_request=result)

File ~/anaconda3/lib/python3.10/site-packages/great_expectations/datasource/new_datasource.py:218, in BaseDatasource.get_batch_list_from_batch_request(self, batch_request)
212 batch_spec: PathBatchSpec # type: ignore[no-redef]
213 batch_markers: BatchMarkers # type: ignore[no-redef]
214 (
215 batch_data,
216 batch_spec,
217 batch_markers,
--> 218 ) = data_connector.get_batch_data_and_metadata(
219 batch_definition=batch_definition
220 )
221 new_batch = Batch(
222 data=batch_data,
223 batch_request=batch_request,
(...)
226 batch_markers=batch_markers,
227 )
228 batches.append(new_batch)

File ~/anaconda3/lib/python3.10/site-packages/great_expectations/datasource/data_connector/data_connector.py:120, in DataConnector.get_batch_data_and_metadata(self, batch_definition)
111 """
112 Uses batch_definition to retrieve batch_data and batch_markers by building a batch_spec from batch_definition,
113 then using execution_engine to return batch_data and batch_markers
(...)
117
118 """
119 batch_spec: BatchSpec = self.build_batch_spec(batch_definition=batch_definition)
--> 120 batch_data, batch_markers = self._execution_engine.get_batch_data_and_markers(
121 batch_spec=batch_spec
122 )
123 self._execution_engine.load_batch_data(batch_definition.id, batch_data) # type: ignore[arg-type] # got ExecutionEngine
124 return (
125 batch_data,
126 batch_spec,
127 batch_markers,
128 )

File ~/anaconda3/lib/python3.10/site-packages/great_expectations/execution_engine/pandas_execution_engine.py:332, in PandasExecutionEngine.get_batch_data_and_markers(self, batch_spec)
330 path = batch_spec.path
331 reader_fn = self._get_reader_fn(reader_method, path)
--> 332 df = reader_fn(path, **reader_options)
334 elif isinstance(batch_spec, PandasBatchSpec):
335 reader_method = batch_spec.reader_method

File ~/anaconda3/lib/python3.10/site-packages/pandas/io/parsers/readers.py:912, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)
899 kwds_defaults = _refine_defaults_read(
900 dialect,
901 delimiter,
(...)
908 dtype_backend=dtype_backend,
909 )
910 kwds.update(kwds_defaults)
--> 912 return _read(filepath_or_buffer, kwds)

File ~/anaconda3/lib/python3.10/site-packages/pandas/io/parsers/readers.py:583, in _read(filepath_or_buffer, kwds)
580 return parser
582 with parser:
--> 583 return parser.read(nrows)

File ~/anaconda3/lib/python3.10/site-packages/pandas/io/parsers/readers.py:1704, in TextFileReader.read(self, nrows)
1697 nrows = validate_integer("nrows", nrows)
1698 try:
1699 # error: "ParserBase" has no attribute "read"
1700 (
1701 index,
1702 columns,
1703 col_dict,
-> 1704 ) = self._engine.read( # type: ignore[attr-defined]
1705 nrows
1706 )
1707 except Exception:
1708 self.close()

File ~/anaconda3/lib/python3.10/site-packages/pandas/io/parsers/c_parser_wrapper.py:234, in CParserWrapper.read(self, nrows)
232 try:
233 if self.low_memory:
--> 234 chunks = self._reader.read_low_memory(nrows)
235 # destructive to chunks
236 data = _concatenate_chunks(chunks)

File ~/anaconda3/lib/python3.10/site-packages/pandas/_libs/parsers.pyx:812, in pandas._libs.parsers.TextReader.read_low_memory()

File ~/anaconda3/lib/python3.10/site-packages/pandas/_libs/parsers.pyx:873, in pandas._libs.parsers.TextReader._read_rows()

File ~/anaconda3/lib/python3.10/site-packages/pandas/_libs/parsers.pyx:848, in pandas._libs.parsers.TextReader._tokenize_rows()

File ~/anaconda3/lib/python3.10/site-packages/pandas/_libs/parsers.pyx:859, in pandas._libs.parsers.TextReader._check_tokenize_status()

File ~/anaconda3/lib/python3.10/site-packages/pandas/_libs/parsers.pyx:2025, in pandas._libs.parsers.raise_parser_error()

ParserError: Error tokenizing data. C error: Expected 1 fields in line 10, saw 6

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Hey, I am new to GE, could I get help with this error. #8188

{{title}}

Replies: 0 comments

Select a reply

Hey, I am new to GE, could I get help with this error. #8188

krish-shahh Jun 27, 2023

Replies: 0 comments

krish-shahh
Jun 27, 2023