-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSeleniumWebdriverUtils.py
437 lines (399 loc) · 16.7 KB
/
SeleniumWebdriverUtils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
'''
Created on 31 ene. 2020
@author: aoviedo
This is a module that offers some methods to perform some tasks and are complex or tedious to code
but are useful or have to be performed frequently.
'''
__version__= "0.1.0"
import time
import os
import datetime
import selenium.webdriver as webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
#This is a global parameter used to name our screenshots when we are using the method
#getFullScreenshotScrollinggetFullScreenshotScrolling
partialImage = 0
#This is a global parameter used to set the timeout when looking for an HTML element
timeout = 10
def getDocumentHeight(driver):
'''
This method executes several JavaScript scripts to get the height of the HTML document
(as a whole), not the window size
Parameters
----------
driver : selenium.webdriver
This is the driver controlling the browser where we will execute the JavaScript scripts
Returns
-------
int
The height of the HTML document
'''
#In this list we will we adding all the different heights that our different JavaScript scripts
#will be returning
v = []
#Here we will save all the capabilities that were set in our driver so we can get the browser name
caps = driver.capabilities
#Now, we will execute all the different JavaScript scripts and add their results to our list
#Notice, we will filter where we apply our scripts depending on the browser (not all browsers
#are the same and support the same JavaScript functions)
if(caps['browserName'] != "Safari"):
v.append(driver.execute_script("return document.body.scrollHeight"))
v.append(driver.execute_script("return document.documentElement.scrollHeight"))
v.append(driver.execute_script("return document.body.offsetHeight"))
if(caps['browserName'] != "Safari" and ("platformName" not in caps.keys() or caps["platformName"] == "Android")):
v.append(driver.execute_script("return document.documentElement.offsetHeight"))
v.append(driver.execute_script("return document.body.clientHeight"))
v.append(driver.execute_script("return document.documentElement.clientHeight"))
#In this list we will add all the "real" heights that our scripts returned, all the ones
#that were different than None
heights = []
for vv in v:
if vv is not None:
heights.append(vv)
#We will return the greater height
return max(heights)
def getDocumentWidth(driver):
'''
This method executes several JavaScript scripts to get the height of the HTML document
(as a whole), not the window size
Parameters
----------
driver : selenium.webdriver
This is the driver controlling the browser where we will execute the JavaScript scripts
Returns
-------
int
The height of the HTML document
'''
#In this list we will we adding all the different widths that our different JavaScript scripts
#will be returning
v = []
#Here we will save all the capabilities that were set in our driver so we can get the browser name
caps = driver.capabilities
#Now, we will execute all the different JavaScript scripts and add their results to our list
#Notice, we will filter where we apply our scripts depending on the browser (not all browsers
#are the same and support the same JavaScript functions)
if(caps['browserName'] != "Safari"):
v.append(driver.execute_script("return document.body.scrollWidth"))
v.append(driver.execute_script("return document.documentElement.scrollWidth"))
v.append(driver.execute_script("return document.body.offsetWidth"))
if(caps['browserName'] != "Safari" and ("platformName" not in caps.keys() or caps["platformName"] == "Android")):
v.append(driver.execute_script("return document.documentElement.offsetWidth"))
v.append(driver.execute_script("return document.body.clientWidth"))
v.append(driver.execute_script("return document.documentElement.clientWidth"))
#In this list we will add all the "real" widths that our scripts returned, all the ones
#that were different than None
widths = []
for vv in v:
if vv is not None:
widths.append(vv)
#We will return the greatest width
return max(widths)
def getInnerWindowHeight(driver):
'''
This method executes a JavaScript script to get the height of the inner part of the window
Parameters
----------
driver : selenium.webdriver
This is the driver controlling the browser where we will execute the JavaScript scripts
Returns
-------
int
The height of inner part of the window
'''
return driver.execute_script("return window.innerHeight")
def getInnerWindowWidth(driver):
'''
This method executes a JavaScript script to get the width of the inner part of the window
Parameters
----------
driver : selenium.webdriver
This is the driver controlling the browser where we will execute the JavaScript scripts
Returns
-------
int
The width of inner part of the window
'''
return driver.execute_script("return window.innerWidth")
def goToTopPage(driver):
'''
This method executes a JavaScript script to scroll to the top of the HTML document
Parameters
----------
driver : selenium.webdriver
This is the driver controlling the browser where we will execute the JavaScript scripts
Returns
-------
None
'''
#First we get the height of the HTML document
docHeight = getDocumentHeight(driver)
#We create a script in JavaScript that scroll to the top
toTopPage = "window.scrollBy(0,-{0})".format(docHeight+100)
#We execute that script
driver.execute_script(toTopPage)
def hideScrollBar(driver):
'''
This method executes a JavaScript script that hides the scroll bar
Parameters
----------
driver : selenium.webdriver
This is the driver controlling the browser where we will execute the JavaScript scripts
Returns
-------
None
'''
driver.execute_script("document.body.style.overflow = 'hidden';")
def scrollWindow(driver,func=None,*parameters):
'''
This method executes a JavaScript script that scroll our page from top to bottom
Parameters
----------
driver : selenium.webdriver
This is the driver controlling the browser where we will execute the JavaScript scripts
func: Python function
This allows the possibility to execute a function during each iteration of this scrolling
function. For instance, take screenshots
parameters:
This will contain all the necessary parameters to execute the function in the parameter
"func"
Returns
-------
None
'''
#We get the HTML document height
docHeight = getDocumentHeight(driver)
print("height: {0}".format(docHeight))
#We scroll to the top of the page
goToTopPage(driver)
#x represents our position in the page
x = 0
#While x is not greater than the height of the document, it means we can keep scrolling
while(x<docHeight):
#We hide the scroll bar
hideScrollBar(driver)
#We sleep for 2 seconds to allow the former change to have effect
time.sleep(2)
#We try to execute the function "func" that we passed as parameter using the
#parameters in the list *parameters
try:
func(*parameters)
except Exception as e:
#If the function returns a Exception, we catch it
print("An exception ocurred while we were executing the function passed as parameter: {0}".format(e))
#We get the inner size of our current wndow, we add it to our current position in variable
#x (minus 100 pixels)
x = x + (getInnerWindowHeight(driver)-100)
#We code a JavaScript script to scroll to that point
scrollIt = "window.scrollTo(0,{0})".format(x)
print("Position: {0}".format(x))
#We execute that script to scroll to that point
driver.execute_script(scrollIt)
def getPartialScrollImage(driver,imageTitle):
'''
This method names our partial screenshots that we get while scrolling the page from top to bottom
Parameters
----------
driver : selenium.webdriver
This is the driver controlling the browser where we will execute the JavaScript scripts
imageTitle: string
Root of the name for this screenshot
Returns
-------
None
'''
#We get the global variable partialImage
global partialImage
#We save this screenshot, using partialImage as the last part of the name for this screenshot
driver.get_screenshot_as_file(imageTitle+"-{0}".format(partialImage)+".jpg")
#We increase the counter partialImage by 1
partialImage+=1
def getFullScreenshotScrolling(driver,imageTitle,folder=None):
'''
This method gets screenshots of our current page, scrolling from top to bottom and getting
a screenshot in each iteration, using the method scrollWindow described above
Parameters
----------
driver : selenium.webdriver
This is the driver controlling the browser where we will execute the JavaScript scripts
imageTitle: string
Root of the name for this screenshot
folder: string
path to the folder where we want to save our screenshots
if none, we will create one using the current date and time
Returns
-------
None
'''
#If folder is None, we provide a name for the folder that will contain our screenshots
#using the current date and time as a basis
if(folder is None):
folder= "{0}".format(datetime.datetime.today()).replace(":","-")
#We create the folder that will contain our screenshots
os.mkdir(folder)
#We set the complete that for our image combining the variable path and imageTitle
imagePath = folder+"/"+imageTitle
#We call to the method scrollWindow using getPartilScrollImage as the function func
#and driver and imagePath as part of the variables in the variables list *parameters
scrollWindow(driver, getPartialScrollImage,driver,imagePath)
#We get the global variable partialImage
global partialImage
#And set it to 0 as we have finished the process so it has to point to 0 again
partialImage = 0
def getFullScreenshotHeadless(driver,imageTitle,webdriverpath=None):
'''
This method opens a headless browser using webdriverpath (in case is not Noe),
goes to the URL in the parameter driver and copies the cookies (to assure we see the same in
case we are logged in)
Parameters
----------
driver : selenium.webdriver
This is the driver controlling the browser where we will execute the JavaScript scripts
imageTitle: string
Name for the file that will contain our screenshot
webdriverpath: string
Path to the webdriver for our headless browser
Returns
-------
None
'''
#We check if our driver is a Chrome or a Firefox driver
#And create the corresponding headless webdriver using the parameter
#webdriverpath in caxe it was provided, if it was not we are assuming webdrivers
#paths have been added to the environment variable PATH, otherwise it won't work
if isinstance(driver,webdriver.Chrome):
options = webdriver.ChromeOptions()
options.add_experimental_option("excludeSwitches",["ignore-certificate-errors"])
options.add_argument('headless')
if webdriverpath is not None:
driverHeadless = webdriver.Chrome(executable_path=webdriverpath,chrome_options=options)
else:
driverHeadless = webdriver.Chrome(chrome_options=options)
elif isinstance(driver, webdriver.Firefox):
options = Options()
options.headless = True
if webdriverpath is not None:
driver = webdriver.Firefox(executable_path=webdriverpath,options=options)
else:
driver = webdriver.Firefox(options=options)
else:
#If it was not Chrome or Firefox, we raise an Exception as the other webdrivers
#don't support headless capabilities
raise Exception("Browser Not Supported")
#We go to the url where we where in our driver
driverHeadless.get(driver.current_url)
#We copy the cookies from our original driver to our headless driver
for cookie in driver.get_cookies():
if 'expiry' in cookie.keys():
del cookie['expiry']
driverHeadless.add_cookie(cookie)
#We get the height of the whole HTML document
docHeight = getDocumentHeight(driver)
#We get the width of the whole HTML document
docWidth = getDocumentWidth(driver)
#We resize our headless browser to the size of the whole HTML document
driverHeadless.set_window_size(docWidth+3000, docHeight)
#We get a screenshot
driverHeadless.get_screenshot_as_file("{0}.jpg".format(imageTitle))
#We close our headless driver
driverHeadless.quit()
def find_element_implicitwait(driver,by,selector):
'''
This method finds an element like the actual method offered by selenium.webdriver but
it waits until the element is present in the DOM (although there is a timeout after
which it will fail and raise an exception)
Parameters
----------
driver : selenium.webdriver
This is the driver controlling the browser where we will execute the JavaScript scripts
by: selenium.webdriver.common.by
Way in which we will search for our element, By.ID, By.XPATH, By.CLASS_NAME
selector: string
Selector that we will use, the ID of the element if we are using By.ID, the xpath if
we are using By.XPATH...
Returns
-------
webdriver HTML element
'''
#We get the timeout get set for this search
global timeout
#We search for the element
elem = WebDriverWait(driver, timeout).until(
EC.presence_of_element_located((by,selector))
)
#We return the element
return elem
def find_element_by_id_implicitwait(driver,vId):
'''
This method finds an element by ID like the actual method offered by selenium.webdriver but
it waits until the element is present in the DOM (although there is a timeout after
which it will fail and raise an exception) using the method find_element_implicitwait
described above
Parameters
----------
driver : selenium.webdriver
This is the driver controlling the browser where we will execute the JavaScript scripts
id: string
ID of the HTML element
Returns
-------
webdriver HTML element
'''
return find_element_implicitwait(driver, By.ID, vId)
def find_element_by_xpath_implicitwait(driver,xpath):
'''
This method finds an element by XPATH like the actual method offered by selenium.webdriver but
it waits until the element is present in the DOM (although there is a timeout after
which it will fail and raise an exception) using the method find_element_implicitwait
described above
Parameters
----------
driver : selenium.webdriver
This is the driver controlling the browser where we will execute the JavaScript scripts
xpath: string
XPATH of the HTML element
Returns
-------
webdriver HTML element
'''
return find_element_implicitwait(driver, By.XPATH, xpath)
def set_implicit_wait_timeout(newTimeout):
'''
This method, sets the timeout for the implicit wait in method find_element_implicitwait
and the ones that extend it
Parameters
----------
newTimeout : selenium.webdriver
This is the driver controlling the browser where we will execute the JavaScript scripts
Returns
-------
None
'''
global timeout
timeout = newTimeout
def capabilities_remove_automation_bar_Chrome():
'''
This method returns an object of type ChromeOptions that contains the capabilities necessary
to remove the bar that states that the browser is being controlled by an automation machine
in Chrome. You have have to give the parameter options in the constructor the value of the
return of this method
Parameters
----------
None
Returns
-------
chrome_options webdriver.ChromeOptions object
'''
chrome_options = webdriver.ChromeOptions();
chrome_options.add_experimental_option("excludeSwitches", ['enable-automation']);
chrome_options.add_experimental_option("useAutomationExtension", False);
chrome_options.add_experimental_option("prefs", {
'credentials_enable_service:':False,
'profile': {
'password_manager_enabled':False
}
})
return chrome_options