From ec6feba44a0c9bcb768c15a68aafdd8c4fedea93 Mon Sep 17 00:00:00 2001 From: Kaya Celebi Date: Mon, 12 Jun 2023 01:50:12 +0300 Subject: [PATCH] Updates to README, typo fix in scrape, dependency fixes --- README.md | 43 +++++++++++++++++++++------- requirements.txt | 2 +- setup.cfg | 2 +- src/google_flight_analysis/scrape.py | 2 ++ 4 files changed, 37 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index e33e247..90a74a0 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ [![kcelebi](https://circleci.com/gh/celebi-pkg/flight-analysis.svg?style=svg)](https://circleci.com/gh/celebi-pkg/flight-analysis) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![Live on PyPI](https://img.shields.io/badge/PyPI-1.1.0-brightgreen)](https://pypi.org/project/google-flight-analysis/) -[![TestPyPI](https://img.shields.io/badge/PyPI-1.1.1--alpha.10-blue)](https://test.pypi.org/project/google-flight-analysis/1.1.1a10/) +[![TestPyPI](https://img.shields.io/badge/PyPI-1.1.1--alpha.11-blue)](https://test.pypi.org/project/google-flight-analysis/1.1.1a11/) # Flight Analysis @@ -62,23 +62,46 @@ For GitHub repository cloners, import as follows from the root of the repository Here is some quick starter code to accomplish the basic tasks. Find more in the [documentation](https://kcelebi.github.io/flight-analysis/). - # Try to keep the dates in format YYYY-mm-dd - result = Scrape('JFK', 'IST', '2023-07-20', '2023-08-20') # obtain our scrape object + # Keep the dates in format YYYY-mm-dd + result = Scrape('JFK', 'IST', '2023-07-20', '2023-08-20') # obtain our scrape object, represents out query + result.type # This is in a round-trip format + result.origin # ['JFK', 'IST'] + result.dest # ['IST', 'JFK'] + result.dates # ['2023-07-20', '2023-08-20'] + print(result) # get unqueried str representation - origin = result.origin # 'JFK' - dest = result.dest # 'IST' - print(result) # get the str representation of the query +A `Scrape` object represents a Google Flights query to be run. It maintains flights as a sequence of one or more one-way flights which have a origin, destination, and flight date. The above object for a round-trip flight from JFK to IST is a sequence of JFK --> IST, then IST --> JFK. We can obtain the data as follows: - ScrapeObjects(result) # modify our result in-place with the scraped data - print(result) # can see updated object - print(result.data) # can see data + ScrapeObjects(result) # runs selenium through ChromeDriver, modifies results in-place + result.data # returns pandas DF + print(result) # get queried representation of result You can also scrape for one-way trips: results = Scrape('JFK', 'IST', '2023-08-20') ScrapeObjects(result) - print(result.data.head()) #see data + result.data #see data +You can also scrape chain-trips, which are defined as a sequence of one-way flights that have no direct relation to each other, other than being in chronological order. + + # chain-trip format: origin, dest, date, origin, dest, date, ... + result = Scrape('JFK', 'IST', '2023-08-20', 'RDU', 'LGA', '2023-12-25', 'EWR', 'SFO', '2024-01-20') + result.type # chain-trip + ScrapeObjects(result) + result.data # see data + +You can also scrape perfect-chains, which are defined as a sequence of one-way flights such that the destination of the previous flight is the origin of the next and the origin of the chain is the final destination of the chain (a cycle). + + # perfect-chain format: origin, date, origin, date, ..., first_origin + result = Scrape("JFK", "2023-09-20", "IST", "2023-09-25", "CDG", "2023-10-10", "LHR", "2023-11-01", "JFK") + result.type # perfect-chain + ScrapeObjects(result) + result.data # see data + +You can read more about the different type of trips in the documentation. Scrape objects can be added to one another to create larger queries. This is under the conditions: + +1. The objects being added are the same type of trip (one-way, round-trip, etc) +2. The objects being added are either both unqueried or both queried ## Updates & New Features diff --git a/requirements.txt b/requirements.txt index 369192b..87fbd26 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,4 @@ pandas selenium pytest sqlalchemy -chromedriver_autoinstaller \ No newline at end of file +chromedriver-autoinstaller \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index 2f3de01..5afa290 100644 --- a/setup.cfg +++ b/setup.cfg @@ -25,7 +25,7 @@ install_requires = pandas selenium sqlalchemy - chromedriver_autoinstaller + chromedriver-autoinstaller [options.packages.find] where = src diff --git a/src/google_flight_analysis/scrape.py b/src/google_flight_analysis/scrape.py index 1584d44..2f40a63 100644 --- a/src/google_flight_analysis/scrape.py +++ b/src/google_flight_analysis/scrape.py @@ -30,7 +30,9 @@ def ScrapeObjects(objs, copy_objs = False): if type(objs) is _Scrape: objs = [objs] + chromedriver_autoinstaller.install() # check if chromedriver is installed correctly and on path + driver = webdriver.Chrome() driver.maximize_window() # modifies the objects in-place