1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
|
import urllib import urllib2 import requests import re
def download_html(keywords): key = {'wd': keywords} headers = {'User-Agent': '(Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8'} web_content = requests.get("https://www.baidu.com/s?",params=key, headers=headers, timeout=4) return web_content.text content=download_html('ios') regexName = unicode("百度为您找到相关结果约(.+?)个", "utf8") num = re.search(regexName,content).group(1) num_int = int(num.replace(',','')) print(num_int)
|
user-agent 大全 http://www.fynas.com/ua
注意:使用正则匹配中文需求对中文正则表达式进行转码否则,无法正常匹配
比如Python源码的头文件中声明的编码方式为UTF-8,那么中文需要转成对应的UTF-8编码
1 2 3
|
regexName = unicode("百度为您找到相关结果约(.+?)个", "utf8")
|