65 lines
1.8 KiB
Python
65 lines
1.8 KiB
Python
from fake_useragent import UserAgent
|
|
from selenium import webdriver
|
|
from selenium.webdriver import Firefox
|
|
from selenium.webdriver.common.by import By
|
|
from selenium.webdriver.common.keys import Keys
|
|
from selenium.webdriver.firefox.service import Service
|
|
from selenium.webdriver.firefox.options import Options
|
|
import time
|
|
from datetime import date, datetime
|
|
import os
|
|
import codecs
|
|
|
|
|
|
now = datetime.now()
|
|
formatingDate = now.strftime("%d-%m-%Y-%H-%M")
|
|
|
|
basePath = "D:\Programming\PythonProgects\PinterestParse\html"
|
|
|
|
ua = UserAgent(use_cache_server=False)
|
|
#useragent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:100.0) Gecko/20100101 Firefox/100.0"
|
|
#useragent=ua
|
|
|
|
firefoxProfie = r'C:\\Users\\danamir.ASUMADI\\AppData\\Roaming\\Mozilla\\Firefox\\Profiles\\9vfgbpct.default'
|
|
options=Options()
|
|
options.set_preference('profile',firefoxProfie)
|
|
driverPath = os.getcwd()+'\driver\geckodriver.exe'
|
|
service = Service(driverPath)
|
|
browser = Firefox(service=service, options=options)
|
|
|
|
url = "https://ru.pinterest.com/alexdanamir/%D1%87%D0%B5%D1%80%D1%82%D0%B5%D0%B6%D0%B8/"
|
|
|
|
browser.get(url)
|
|
|
|
|
|
SCROLL_PAUSE_TIME = 5
|
|
|
|
# Get scroll height
|
|
last_height = browser.execute_script("return document.body.scrollHeight")
|
|
|
|
while True:
|
|
# Scroll down to bottom
|
|
browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
|
|
|
|
# Wait to load page
|
|
time.sleep(SCROLL_PAUSE_TIME)
|
|
|
|
# Calculate new scroll height and compare with last scroll height
|
|
new_height = browser.execute_script("return document.body.scrollHeight")
|
|
if new_height == last_height:
|
|
break
|
|
last_height = new_height
|
|
|
|
html = browser.page_source
|
|
|
|
time.sleep(2)
|
|
|
|
htmlName = "pinterest" + "-" + formatingDate + ".txt"
|
|
saveFile = os.path.join(basePath, htmlName)
|
|
|
|
savedFile = open(saveFile, "w")
|
|
savedFile.write(html)
|
|
savedFile.close()
|
|
browser.close()
|
|
|