from flask import Flask, request, render_template
import pandas
as
pd
import requests
from bs4 import BeautifulSoup
import csv
import json
import time
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
as
EC
from selenium.webdriver.chrome.options import Options
app = Flask(__name__)
@app.route(
'/'
)
def index():
return
render_template(
'upload_page.html'
)
#Code
for
Login to the website
@app.route(
'/upload'
, methods=[
'POST'
])
def upload():
# Read the uploaded file
csv_file = request.files[
'file'
]
# Load the CSV data into a DataFrameSS
df = pd.read_csv(csv_file)
final_data = []
# Loop over the rows in the DataFrame
and
scrape each link
for
index, row in df.iterrows():
link = row[
'Link'
]
response = requests.get(link)
soup = BeautifulSoup(response.content,
'html.parser'
)
start = time.time()
# will be used in the
while
loop
initialScroll = 0
finalScroll = 1000
while
True:
driver.execute_script(f
"window.scrollTo({initialScroll},{finalScroll})"
)
# this command scrolls the window starting from the pixel value stored in the initialScroll
# variable to the pixel value stored at the finalScroll variable
initialScroll = finalScroll
finalScroll += 1000
# we will stop the script
for
3 seconds so that the data can load
time.sleep(2)
end
= time.time()
# We will scroll
for
20 seconds.
if
round
(
end
- start) > 20:
break
src = driver.page_source
soup = BeautifulSoup(driver.page_source,
'html.parser'
)
#
print
(soup.prettify())
#Code to
do
scrape the website
return
render_template(
'index.html'
, message=
'Scraped all data'
)
if
__name__ ==
'__main__'
:
app.run(debug=True)