爬取猎取网药品说明书
写个小小爬虫,爬取猎取网的药品说明书,大概是一万条数据
import requests
import codecs
from lxml import html
headers={
'Host':'www.liequ.net',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36'
}
for i in range(1,9445):
url='https://www.liequ.net/yaopin/{}.html'.format(i)
r=requests.get(url,headers,verify=False)
status=r.status_code
if status==200:
print('完成{}页'.format(i))
s = requests.session()
s.keep_alive = False
r.encoding = 'utf-8'
content = html.fromstring(r.text)
name = content.xpath('//div[@id="liequleft"]/div/div[2]/div[1]/h2/text()')
describe=content.xpath('//td[1]/text()')[9:]
output='{}\t{}\n'.format(name,describe)
f=codecs.open('liequ.xls','a+')
f.write(output)
f.close()
else:
print('第{}页失败'.format(i))