파이썬 Youtube 검색 으로 영상수집 (selenium 코드) 소스오픈
import os
from datetime import datetime
import time
from browser import Browser
import traceback
class Youtube:
def __init__(self):
self.youtube_url: str = 'https://www.youtube.com/'
#self.app = app
self.wait = 60
def open_youtube(self):
try:
self.driver = Browser(user_data_dir='./chrome_cache/moonpa')
self.driver.get(self.youtube_url)
except Exception as e:
print(e)
pass
def search_keyword(self,keyword):
self.driver.get('https://www.youtube.com/results?search_query=' + keyword + '+직캠')
scroll_pause_time = 2
for _ in range(20): # 5번 스크롤 내리기
self.driver.execute_script("window.scrollTo(0, document.documentElement.scrollHeight);")
time.sleep(scroll_pause_time)
videos = self.driver.find_by_css('#video-title')
results = []
for video in videos: # [:10] 상위 10개의 결과만 수집
title = video.get_attribute('title')
href = video.get_attribute('href')
if href:
url = href.split('&')[0] # 'https://www.youtube.com' + & 이전의 URL만 사용
results.append({
'keyword': keyword,
'title': title,
'url': url
})
return results
def save_to_file(self,results, file_path):
print(file_path);
with open(file_path, 'a', encoding='utf-8') as file:
for result in results:
file.write(f"{result['keyword']}<=^=>{result['title']}<=^=>{result['url']}\n")
file.close()
if __name__ == "__main__":
try:
queries = ['뉴진스','르세라핌','엔믹스','블랙핑크','트와이스','여자아이들','아일릿',
'아이브','레드벨벳','트리플에스','키스오브라이프','베이비몬스터','오마이걸',
'에스파','itzy','wooah','xg','빅뱅','bts','nct',
'enhypen','cravity','xdinaryheroes','더보이즈'
]
y = Youtube()
y.open_youtube()
current_datetime = datetime.now().strftime("%Y-%m-%d_%H_%M")
filename = "yr"+current_datetime+".txt"
script_path = os.path.abspath(__file__)
current_folder = os.path.dirname(script_path)
file_path = current_folder + '/'+filename
for query in queries:
results = y.search_keyword(query)
y.save_to_file(results, file_path)
print(f"Results for '{query}' have been saved to {file_path}")
except Exception:
with open(f'error.txt', 'a', encoding='utf-8-sig') as f:
f.write('\n\n')
f.write(f'{datetime.now().strftime("%Y-%m-%d %H:%M")} - from Youtube\n')
traceback.print_exc(file=f)
f.write('\n\n')
input('에러가 발생했습니다. "error.txt" 파일과 함께 문의해주세요')