From a21745f877bef75b1323a28605d537ed79d060a1 Mon Sep 17 00:00:00 2001 From: deedy5 <65482418+deedy5@users.noreply.github.com> Date: Mon, 19 Feb 2024 12:30:53 +0300 Subject: [PATCH] add 'tabs' parameter --- README.md | 7 +++++-- fake_traffic/cli.py | 11 ++++++++++- fake_traffic/fake_traffic.py | 11 +++++++---- 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index fd14e3a..11e98e8 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,8 @@ fake_traffic -c tr -l ku-tr -ca h fake_traffic -c ru -l ru-ru -ca s -lf # use none-headless mode fake_traffic -c en -l en-us -ca t -nh +# limit the number of tabs in the browser to 2 +fake_traffic -c en -l en-us -ca t -t 2 ``` --- ### Simple usage @@ -46,7 +48,8 @@ ft = FakeTraffic(country='US', language='en-US', category='h', headless=True) category = сategory of interest of a user (defaults to 'h'): 'all' (all), 'b' (business), 'e' (entertainment), 'm' (health), 's' (sports), 't' (sci/tech), 'h' (top stories); - headless = True/False (defaults to True). + headless = True/False (defaults to True); + tabs = limit the number of tabs in browser (defaults to 3). """ ft.crawl() ``` @@ -92,6 +95,6 @@ Russia | English | `FakeTraffic(country="RU", language="en-US", category='b Russia | Russian | `FakeTraffic(country="RU", language="ru-RU")` | Brazil | Portuguese | `FakeTraffic(country="BR", language="pt-BR", category='s')` | United Kingdom | English | `FakeTraffic(country="GB", language="en-GB")` | -United States | English | `FakeTraffic(country="US", language="en-US")` | +United States | English | `FakeTraffic(country="US", language="en-US", tabs=4)` | United States | Hebrew Israel | `FakeTraffic(country="US", language="he-IL")` | diff --git a/fake_traffic/cli.py b/fake_traffic/cli.py index e731f59..98f43d9 100644 --- a/fake_traffic/cli.py +++ b/fake_traffic/cli.py @@ -37,6 +37,14 @@ help="Run the browser in non-headless mode", required=False, ) +parser.add_argument( + "-t", + "--tabs", + default=3, + type=int, + help="Limit the number of tabs in browser. Defaults to 3", + required=False, +) parser.add_argument( "-lf", "--logfile", @@ -61,7 +69,7 @@ language_split = args.language.split("-") language = f"{language_split[0]}-{language_split[1].upper()}" logging.info( - f"Run crawl with: {country=}, {language=}, category={args.category}, headless={args.headless}, logfile={args.logfile}" + f"Run crawl with: {country=}, {language=}, category={args.category}, headless={args.headless}, tabs={args.tabs}, logfile={args.logfile}" ) @@ -70,5 +78,6 @@ language=language, category=args.category, headless=args.headless, + tabs=args.tabs, ) fake_traffic.crawl() diff --git a/fake_traffic/fake_traffic.py b/fake_traffic/fake_traffic.py index 5276b5a..0522a2f 100644 --- a/fake_traffic/fake_traffic.py +++ b/fake_traffic/fake_traffic.py @@ -6,8 +6,6 @@ from playwright_stealth import stealth_async logger = logging.getLogger("__name__") -SEMAPHORE = asyncio.Semaphore(5) - # playwright install chromium res = subprocess.run( @@ -27,6 +25,8 @@ def __init__( language="en-US", category="h", headless=True, + tabs=3, + ): """Internet traffic generator. Utilizes real-time google search trends by specified parameters. country = country code ISO 3166-1 Alpha-2 code (https://www.iso.org/obp/ui/), @@ -34,16 +34,18 @@ def __init__( category = category of interest of a user (defaults to 'h'): 'all' (all), 'b' (business), 'e' (entertainment), 'm' (health), 's' (sports), 't' (sci/tech), 'h' (top stories); - headless = True/False (defaults to True). + headless = True/False (defaults to True); + tabs = limit the number of tabs in browser (defaults to 3). """ self.country = country self.language = language self.category = category self.headless = headless self.browser = None + self.semaphore = asyncio.Semaphore(tabs) async def abrowse(self, url): - async with SEMAPHORE: + async with self.semaphore: page = await self.browser.new_page() await stealth_async(page) try: @@ -128,5 +130,6 @@ def crawl(self): language="en-US", category="h", headless=True, + tabs=3, ) fake_traffic.crawl()