TA的每日心情 | 无聊 2016-8-19 10:32 |
---|
签到天数: 80 天 [LV.6]常住居民II
|
本帖最后由 xiaoye 于 2015-9-30 11:20 编辑
由于在做舆情处理,需要大量的数据,所以就出现了这个东西,调用的微博公共API,如果有问题
查询微博文档
代码如下:
- #encoding = utf8
- import weibo
- import sys, os, urllib, urllib2
- import json
- import cookielib
- import ConfigParser
- import time
- import MySQLdb
- import re
- reload(sys)
- sys.setdefaultencoding('utf8')
- db_host=""
- db_data=""
- db_user=""
- db_pass=""
- db_port=""
- APP_KEY="162454*****" #你的APP_KEY
- APP_SECRET="753847a8d16ead8a8e7618*****************"#你的app_secret
- REDIRECT_URL="https://api.weibo.com/oauth2/default.html"
- USER_ID="crawlertest@sina.cn"
- USER_PASS="12345*******"
- client=weibo.APIClient(APP_KEY,APP_SECRET,REDIRECT_URL)
- def make_access_token():
- params = urllib.urlencode({
- 'action':'submit',
- 'withOfficalFlag':'0',
- 'ticket':'',
- 'isLoginSina':'',
- 'response_type':'code',
- 'regCallback':'',
- 'redirect_uri':REDIRECT_URL,
- 'client_id':APP_KEY,
- 'state':'',
- 'from':'',
- 'userId':USER_ID,
- 'passwd':USER_PASS,
- })
- login_url = 'https://api.weibo.com/oauth2/authorize'
- url = client.get_authorize_url()
- content = urllib2.urlopen(url)
- if content:
- headers = { 'Referer' : url }
- request = urllib2.Request(login_url, params, headers)
- opener = get_opener(False)
- urllib2.install_opener(opener)
- try:
- f = opener.open(request)
- return_redirect_uri = f.url
- except urllib2.HTTPError, e:
- return_redirect_uri = e.geturl()
-
- code = return_redirect_uri.split('=')[1]
-
- token = client.request_access_token(code,REDIRECT_URL)
- save_access_token(token)
-
- def get_opener(proxy=False):
- rv=urllib2.build_opener(get_cookie(), SmartRedirectHandler())
- rv.addheaders = [('User-agent', 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)')]
- return rv
- def get_cookie():
- cookies = cookielib.CookieJar()
- return urllib2.HTTPCookieProcessor(cookies)
- class SmartRedirectHandler(urllib2.HTTPRedirectHandler):
- def http_error_301(self,cls, req, fp, code, msg, headers):
- result = urllib2.HTTPRedirectHandler.http_error_301(cls, req, fp, code, msg, headers)
- result.status = code
- print headers
- return result
- def http_error_302(self,cls, req, fp, code, msg, headers):
- result = urllib2.HTTPRedirectHandler.http_error_302(cls, req, fp, code, msg, headers)
- result.status = code
- print headers
- return result
-
- def save_access_token(token):
- time=time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))
- cf=ConfigParser.ConfigParser()
- cf.read("./token.ini")
- cf.set("token","token",token.access_token)
- cf.set("token","expires_in",token.expires_in )
- cf.set("token","data",time)
- cf.write(open("./token.ini","w"))
-
- def apply_access_token():
- cf=ConfigParser.ConfigParser()
- cf.read("./token.ini")
- access_token=cf.get("token","access_token")
- expires_in=cf.get("token","expires_in")
- #print access_token
- #print expires_in
-
- client.set_access_token(access_token, expires_in)
-
- def get_public_weibo():
- try:
- text=client.statuses.public_timeline.get()
- except StandardError,e:
- if e.error_code==10023:
- print u"当前账号已经超出使用限制,请稍候再试!"
- exit(0)
- time.sleep(3600)
- js=json.dumps(text)
- js=json.loads(js)
- content=js['statuses']
- lenth=len(content)
- max=js['total_number']
- for i in range(0,lenth-1):
- try:
- source=content[i]["source"]
- source=re.match(r'''<a href="(.+?)" rel="nofollow">(.+?)</a>''',source)
- date=content[i]['created_at']
- mid=content[i]["idstr"]
- text=content[i]['text']
- source=source.group(2)
- reports=content[i]['reposts_count']
- comments=content[i]['comments_count']
- atti=content[i]['attitudes_count']
- uid=content[i]['user']['id']
- nick=content[i]['user']['screen_name']
- loca=content[i]['user']['location']
- deci=content[i]['user']['description']
- gender=content[i]['user']['gender']
- verti=content[i]['user']['verified']
- follws=content[i]['user']['bi_followers_count']
- value=[date.decode(),mid,text,source,reports,comments,atti,uid,nick,loca,deci,gender,verti,follws]
- db_save_info(value)
- print u"插入第%s条数据成功!"%i
- except:
- pass
-
-
-
-
-
-
- def db_get_config():
- cf=ConfigParser.ConfigParser()
- cf.read("./token.ini")
- db_host=cf.get("dbinfo", "dbserver")
- db_data=cf.get("dbinfo","db_data")
- db_user=cf.get("dbinfo","db_user")
- db_pass=cf.get("dbinfo","password")
- db_port=cf.get("dbinfo","port")
-
- def db_save_info(value):
- #value=['2012','10000000','test','test','2','2','2','32111','fuck','1','1','1','1']
- conn=MySQLdb.connect("127.0.0.1","root","123456",charset="utf8")
- conn.select_db("db_info")
- cursor=conn.cursor()
- cursor.execute("SET NAMES 'utf8'")
- cursor.execute('''insert into weibo_info values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)''',value)
- conn.commit()
- cursor.close()
-
- def get_db_count():
- conn=MySQLdb.connect("127.0.0.1","root","123456")
- conn.select_db("db_info")
- cursor=conn.cursor()
- cursor.execute("select count(*) from weibo_info")
- result=cursor.fetchall()
- for i in result:
- result=i[0]
- return result
- if __name__=="__main__":
- print u'''
- ###################################
- # [url=home.php?mod=space&uid=11566]@Author[/url] iceforce
- # @date 2014-11-17
- ##################################
- #抓取公共微博信息示例
- #
- ##################################
- '''
- count=get_db_count()
- apply_access_token()
- for i in range(0,150):
- count1=get_db_count()
- count2=count1-count
- print u"=======================================第%s次执行,已插入%s条数据============================================="%(i,count2)
- get_public_weibo()
-
- print u"============================================执行完成!============================================================"
-
-
复制代码 |
|