Files
PinterestParse/main.py
Александр Геннадьевич Сальный 7caeeaaff5 second commit
2022-10-15 21:01:12 +03:00

65 lines
1.8 KiB
Python

from fake_useragent import UserAgent
from selenium import webdriver
from selenium.webdriver import Firefox
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.firefox.options import Options
import time
from datetime import date, datetime
import os
import codecs
now = datetime.now()
formatingDate = now.strftime("%d-%m-%Y-%H-%M")
basePath = "D:\Programming\PythonProgects\PinterestParse\html"
ua = UserAgent(use_cache_server=False)
#useragent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:100.0) Gecko/20100101 Firefox/100.0"
#useragent=ua
firefoxProfie = r'C:\\Users\\danamir.ASUMADI\\AppData\\Roaming\\Mozilla\\Firefox\\Profiles\\9vfgbpct.default'
options=Options()
options.set_preference('profile',firefoxProfie)
driverPath = os.getcwd()+'\driver\geckodriver.exe'
service = Service(driverPath)
browser = Firefox(service=service, options=options)
url = "https://ru.pinterest.com/alexdanamir/%D1%87%D0%B5%D1%80%D1%82%D0%B5%D0%B6%D0%B8/"
browser.get(url)
SCROLL_PAUSE_TIME = 5
# Get scroll height
last_height = browser.execute_script("return document.body.scrollHeight")
while True:
# Scroll down to bottom
browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait to load page
time.sleep(SCROLL_PAUSE_TIME)
# Calculate new scroll height and compare with last scroll height
new_height = browser.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
html = browser.page_source
time.sleep(2)
htmlName = "pinterest" + "-" + formatingDate + ".txt"
saveFile = os.path.join(basePath, htmlName)
savedFile = open(saveFile, "w")
savedFile.write(html)
savedFile.close()
browser.close()