second commit
This commit is contained in:
64
main.py
Normal file
64
main.py
Normal file
@@ -0,0 +1,64 @@
|
||||
from fake_useragent import UserAgent
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver import Firefox
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.common.keys import Keys
|
||||
from selenium.webdriver.firefox.service import Service
|
||||
from selenium.webdriver.firefox.options import Options
|
||||
import time
|
||||
from datetime import date, datetime
|
||||
import os
|
||||
import codecs
|
||||
|
||||
|
||||
now = datetime.now()
|
||||
formatingDate = now.strftime("%d-%m-%Y-%H-%M")
|
||||
|
||||
basePath = "D:\Programming\PythonProgects\PinterestParse\html"
|
||||
|
||||
ua = UserAgent(use_cache_server=False)
|
||||
#useragent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:100.0) Gecko/20100101 Firefox/100.0"
|
||||
#useragent=ua
|
||||
|
||||
firefoxProfie = r'C:\\Users\\danamir.ASUMADI\\AppData\\Roaming\\Mozilla\\Firefox\\Profiles\\9vfgbpct.default'
|
||||
options=Options()
|
||||
options.set_preference('profile',firefoxProfie)
|
||||
driverPath = os.getcwd()+'\driver\geckodriver.exe'
|
||||
service = Service(driverPath)
|
||||
browser = Firefox(service=service, options=options)
|
||||
|
||||
url = "https://ru.pinterest.com/alexdanamir/%D1%87%D0%B5%D1%80%D1%82%D0%B5%D0%B6%D0%B8/"
|
||||
|
||||
browser.get(url)
|
||||
|
||||
|
||||
SCROLL_PAUSE_TIME = 5
|
||||
|
||||
# Get scroll height
|
||||
last_height = browser.execute_script("return document.body.scrollHeight")
|
||||
|
||||
while True:
|
||||
# Scroll down to bottom
|
||||
browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
||||
|
||||
# Wait to load page
|
||||
time.sleep(SCROLL_PAUSE_TIME)
|
||||
|
||||
# Calculate new scroll height and compare with last scroll height
|
||||
new_height = browser.execute_script("return document.body.scrollHeight")
|
||||
if new_height == last_height:
|
||||
break
|
||||
last_height = new_height
|
||||
|
||||
html = browser.page_source
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
htmlName = "pinterest" + "-" + formatingDate + ".txt"
|
||||
saveFile = os.path.join(basePath, htmlName)
|
||||
|
||||
savedFile = open(saveFile, "w")
|
||||
savedFile.write(html)
|
||||
savedFile.close()
|
||||
browser.close()
|
||||
|
||||
Reference in New Issue
Block a user