second commit

This commit is contained in:
Александр Геннадьевич Сальный
2022-10-15 21:01:12 +03:00
commit 7caeeaaff5
1329 changed files with 489315 additions and 0 deletions

64
main.py Normal file
View File

@@ -0,0 +1,64 @@
from fake_useragent import UserAgent
from selenium import webdriver
from selenium.webdriver import Firefox
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.firefox.options import Options
import time
from datetime import date, datetime
import os
import codecs
now = datetime.now()
formatingDate = now.strftime("%d-%m-%Y-%H-%M")
basePath = "D:\Programming\PythonProgects\PinterestParse\html"
ua = UserAgent(use_cache_server=False)
#useragent="Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:100.0) Gecko/20100101 Firefox/100.0"
#useragent=ua
firefoxProfie = r'C:\\Users\\danamir.ASUMADI\\AppData\\Roaming\\Mozilla\\Firefox\\Profiles\\9vfgbpct.default'
options=Options()
options.set_preference('profile',firefoxProfie)
driverPath = os.getcwd()+'\driver\geckodriver.exe'
service = Service(driverPath)
browser = Firefox(service=service, options=options)
url = "https://ru.pinterest.com/alexdanamir/%D1%87%D0%B5%D1%80%D1%82%D0%B5%D0%B6%D0%B8/"
browser.get(url)
SCROLL_PAUSE_TIME = 5
# Get scroll height
last_height = browser.execute_script("return document.body.scrollHeight")
while True:
# Scroll down to bottom
browser.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# Wait to load page
time.sleep(SCROLL_PAUSE_TIME)
# Calculate new scroll height and compare with last scroll height
new_height = browser.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break
last_height = new_height
html = browser.page_source
time.sleep(2)
htmlName = "pinterest" + "-" + formatingDate + ".txt"
saveFile = os.path.join(basePath, htmlName)
savedFile = open(saveFile, "w")
savedFile.write(html)
savedFile.close()
browser.close()