from bs4 import BeautifulSoup
from urllib.request import urlopen
# url = input("URL 입력:")
parsor = ['html.parser','lxml']
def parse(url: str, parsing:str=0) -> BeautifulSoup:
""" url을 읽어 parsor 형식으로 분석하여 bs 형식으로 반환"""
html = urlopen(url).read()
soup = BeautifulSoup(html, parsor[parsing] ) # html.parser방식으로 파싱
return soup
if __name__ == '__main__':
url = 'https://search.naver.com/search.naver?where=news&ie=utf8&sm=nws_hty&query=%EC%84%A4%EB%AA%85%EC%A0%88'
s = parse(url)
result = s.find_all(class_='news_tit')
for i in result:
print(i.text.strip(),'|', i.attrs['title'],'|',i.attrs['href'])
GET형 검색
urllib.parse 추가하여 url에러 해결 필요
from bs4 import BeautifulSoup
from urllib.request import urlopen
import urllib.parse as u_parse
# url = input("URL 입력:")
parsor = ['html.parser','lxml']
def url_input(base, plus:str=''):
baseurl = base
if plus == '':
return baseurl
else:
plus_url = plus
return baseurl + u_parse.quote_plus(plus_url)
def parse(url: str, parsing:str=0) -> BeautifulSoup:
""" url을 읽어 parsor 형식으로 분석하여 bs 형식으로 반환"""
html = urlopen(url).read()
soup = BeautifulSoup(html, parsor[parsing] ) # html.parser방식으로 파싱
return soup
if __name__ == '__main__':
base = 'https://search.naver.com/search.naver?where=news&ie=utf8&sm=nws_hty&query='
plus_url = '이재명'
url = url_input(base,plus_url)
s = parse(url)
result = s.find_all(class_='news_tit')
for i in result:
print(i.text.strip(),'|', i.attrs['title'],'|',i.attrs['href'])