-
Notifications
You must be signed in to change notification settings - Fork 0
/
source code.py
177 lines (160 loc) · 5.92 KB
/
source code.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import tkinter as tk
from tkinter import *
import operator
from bs4 import BeautifulSoup
import requests
from PIL import Image , ImageTk
from urllib.request import urlopen
seo={}
string=''
############################# web crawler #########################
def spider (url):
header=[]
normal=[]
txt=requests.get(url)
content=txt.text
soup=BeautifulSoup(content)
for link in soup.findAll('h1'):
t=str(link.string)
sentence=t.lower().split()
for word in sentence:
header.append(word)
for link in soup.findAll('h2'):
t=str(link.string)
sentence=t.lower().split()
for word in sentence:
header.append(word)
for link in soup.findAll('p'):
t=str(link.string)
sentence=t.lower().split()
for word in sentence:
normal.append(word)
filtr(header,normal)
############################## online content extractor ###################
def spider2 (url):
header=[]
normal=[]
txt=requests.get(url)
content=txt.text
soup=BeautifulSoup(content)
for link in soup.findAll('a'):
t=str(link.string)
sentence=t.lower().split()
for word in sentence:
normal.append(word)
filtr(header,normal)
############################## filter for words ##############################
def filtr (head,norml):
header=[]
normal=[]
conectors=['is','am','are','the','and','or','for','who','by','a','that','about','com',
'with','he','too','she','it','they','on','at','then','may','able','us','what','be'
,'sholud','done','all','you','i','why','how','an','them','am','so','for','j','b','c','d','e','f'
,'g' ,'next','also','like','since','because','hi','thats','me','due','whenever','none','h','i','k'
,'since','unless','infact','unlike','still','hence','yet','besides','just','even','have','your','l'
,'my','be''there','of','in','as','make','we','of','to','m','n'] #restrected words to be removed
symbols="!@#$%^&*()_+{}|:\"<>?1234567890-=[]\;',.©/`~" #string for special symbol
for word in norml:
for i in range(0,len(symbols)):
word=word.replace(symbols[i],'')
if len(word)>0:
for i in range(len(conectors)): #checking for conectors
if word == conectors[i]:
word=''
if len(word)>0: #checking for woeds with only special symbol
normal.append(word)
for word in head:
for i in range(0,len(symbols)):
word=word.replace(symbols[i],'') #replace function defined in oprator library
if len(word)>0:
for i in range(len(conectors)):
if word == conectors[i]:
word='' #replacing with enpty string
if len(word)>0:
header.append(word)
Seo(header,normal) #calling Seo function
########################## Seo function #########################
def Seo (header,normal):
i=0
for word in header:
seo[word]=10
for word in normal:
if word in seo:
seo[word]+=1
else:
seo[word]=1
root=tk.Tk()
root.title('results')
lbl=Label(root,text="The COntent Of Web Site is...")
#root.geometry('600x600') #geometry not included as panel uses grid layout
x=[]
for key, value in sorted(seo.items(), key=operator.itemgetter(1)): #filter for small words
if value != '1' or value !='2':
x.append(key)
if len(header) == 0:
y=x
else:
y=x[::-1]
print(y) #check function ### to be removed###
for i in range(len(y)):
Label(root, text = y[i]).grid(row=i,ipadx = 10,ipady = 50)
scrollbar = Scrollbar(root)
scrollbar.pack(side=RIGHT, fill=Y)
listbox = Listbox(root, yscrollcommand=scrollbar.set)
text = Text(root)
for i in range(1000):
listbox.insert(END, str(i))
for line in range(0,i):
mylist.insert(END)
text.insert(END, "ansjfnafjasf")
text.pack()
listbox.pack(side=LEFT, fill=BOTH)
scrollbar.config(command = listbox.yview)
lbl.pack()
mainloop() # running the nwe aplet window
################################# application main body #########################
app=tk.Tk()
app.configure(background='white')
newImageSizeWidth=100
newImageSizeHeight=100
app.title("Search")
def on_button(): #on button geting the string entered in the text field
string=entry.get()
if string=='': #exception NULL IN STRING handeled
return
print(string)
spider(string) #calling main web crawler
def clear_text():
entry.delete(0, 'end')
def content():
string=entry.get()
print(string )
spider2(string)
app.geometry('300x200') #size of main window
same = True
n=.25
path =r'C:\Users\Abhi\Desktop\photo.jpg'
image = Image.open(path)
[imageSizeWidth, imageSizeHeight] = image.size
newImageSizeWidth = int(imageSizeWidth*n)
if same:
newImageSizeHeight = int(imageSizeHeight*n)
else:
newImageSizeHeight = int(imageSizeHeight/n)
image = image.resize((newImageSizeWidth, newImageSizeHeight), Image.ANTIALIAS)
img = ImageTk.PhotoImage(image)
Canvas1 = Canvas(app)
Canvas1.create_image(newImageSizeWidth/2,newImageSizeHeight/2,image = img)
Canvas1.config(bg="white",width = newImageSizeWidth, height = newImageSizeHeight)
Canvas1.pack(side=TOP,expand=True)
labl=Label(app,text="Facitoo Search",bg="white",fg="black")
button = tk.Button(app, text="SEO", command=on_button) #event handling for geting the text
button3 = tk.Button(app, text="Get Content", command=content)
button2 = tk.Button(app, text="clear", command=clear_text) #event for clearing the textbox
entry = tk.Entry(app)
labl.pack()
entry.pack()
button.pack(padx=20, pady=10, side=LEFT)
button3.pack(padx=20, pady=20, side=LEFT)
button2.pack(padx=20, pady=20, side=LEFT)
app.mainloop()