麻豆久久五月国产综合,AV美女免费看网站

知乎為什么成功？

因為知乎滿足用戶分享的欲望，同時滿足了個人建立威望的人性需求。

知乎抓住了人性中的一個優(yōu)點：分享。人其實是渴望分享的動物，有件趣事沒有人分享就好比女人穿著件漂亮衣服卻沒有人問她在哪里買的一樣難受，Keso說他之所以在知乎上如此活躍是因為“中國互聯(lián)網(wǎng)在滿足像我一樣的人的需求方面，做得太少”，而知乎，正好給了他們一個高質(zhì)量的分享舞臺。

當然，分享的前提，是高質(zhì)量的問題，最好是激起你回答的欲望乃至不回答都不舒服的問題。再者，這個分享其實也是有回饋的，他能讓你建立威望。你回答得越多，就越顯得你知識淵博，你的威望就越高。這恰好滿足了馬斯洛的需求金字塔中最高層次的需求——自我實現(xiàn)的需求。猶太裔人本主義心理學家亞伯拉罕·馬斯洛（Abraham Maslow）提出的需求層次理論，將人的需求劃分為五個層次。

這是一種觀點，另一種觀點認為知乎的成功是爬蟲技術，如下：

#coding:utf-8

"""

@author:haoning

@create time:2015.8.5

"""

from __future__ import division # 精確除法

from Queue import Queue

from __builtin__ import False

import json

import os

import re

import platform

import uuid

import urllib

import urllib2

import sys

import time

import MySQLdb as mdb

from bs4 import BeautifulSoup

reload(sys)

sys.setdefaultencoding( "utf-8" )

headers = {

'User-Agent' : 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:35.0) Gecko/20100101 Firefox/35.0',

'Content-Type':'application/x-www-form-urlencoded; charset=UTF-8',

'X-Requested-With':'XMLHttpRequest',

'Referer':'https://www.zhihu.com/topics',

'Cookie':'__utma=51854390.517069884.1416212035.1416212035.1416212035.1; q_c1=c02bf44d00d240798bfabcfc95baeb56|1455778173000|1416205243000; _za=b1c8ae35-f986-46a2-b24a-cb9359dc6b2a; aliyungf_tc=AQAAAJ1m71jL1woArKqF22VFnL/wRy6C; _xsrf=9d494558f9271340ab24598d85b2a3c8; cap_id="MDNiMjcwM2U0MTRhNDVmYjgxZWVhOWI0NTA2OGU5OTg=|1455864276|2a4ce8247ebd3c0df5393bb5661713ad9eec01dd"; n_c=1; _alicdn_sec=56c6ba4d556557d27a0f8c876f563d12a285f33a'

}

DB_HOST = '127.0.0.1'

DB_USER = 'root'

DB_PASS = 'root'

queue= Queue() #接收隊列

nodeSet=set()

keywordSet=set()

stop=0

offset=-20

level=0

maxLevel=7

counter=0

base=""

conn = mdb.connect(DB_HOST, DB_USER, DB_PASS, 'zhihu', charset='utf8')

conn.autocommit(False)

curr = conn.cursor()

def get_html(url):

try:

req = urllib2.Request(url)

response = urllib2.urlopen(req,None,3) #在這里應該加入代理

html = response.read()

return html

except:

pass

return None

def getTopics():

url = 'https://www.zhihu.com/topics'

print url

try:

req = urllib2.Request(url)

response = urllib2.urlopen(req) #鍦ㄨ繖閲屽簲璇ュ姞鍏ヤ唬鐞?

html = response.read().decode('utf-8')

print html

soup = BeautifulSoup(html)

lis = soup.find_all('li', {'class' : 'zm-topic-cat-item'})

for li in lis:

data_id=li.get('data-id')

name=li.text

curr.execute('select id from classify_new where name=%s',(name))

y= curr.fetchone()

if not y:

curr.execute('INSERT INTO classify_new(data_id,name)VALUES(%s,%s)',(data_id,name))

conn.commit()

except Exception as e:

print "get topic error",e

def get_extension(name):

where=name.rfind('.')

if where!=-1:

return name[where:len(name)]

return None

def which_platform():

sys_str = platform.system()

return sys_str

def GetDateString():

when=time.strftime('%Y-%m-%d',time.localtime(time.time()))

foldername = str(when)

return foldername

def makeDateFolder(par,classify):

try:

if os.path.isdir(par):

newFolderName=par + '//' + GetDateString() + '//' +str(classify)

if which_platform()=="Linux":

newFolderName=par + '/' + GetDateString() + "/" +str(classify)

if not os.path.isdir( newFolderName ):

os.makedirs( newFolderName )

return newFolderName

else:

return None

except Exception,e:

print "kk",e

return None

def download_img(url,classify):

try:

extention=get_extension(url)

if(extention is None):

return None

req = urllib2.Request(url)

resp = urllib2.urlopen(req,None,3)

dataimg=resp.read()

name=str(uuid.uuid1()).replace("-","")+"_www.guandn.com"+extention

top="E://topic_pic"

folder=makeDateFolder(top, classify)

filename=None

if folder is not None:

filename =folder+"//"+name

try:

if "e82bab09c_m" in str(url):

return True

if not os.path.exists(filename):

file_object = open(filename,'w+b')

file_object.write(dataimg)

file_object.close()

return '/room/default/'+GetDateString()+'/'+str(classify)+"/"+name

else:

print "file exist"

return None

except IOError,e1:

print "e1=",e1

pass

except Exception as e:

print "eee",e

pass

return None #如果沒有下載下來就利用原來網(wǎng)站的鏈接

def getChildren(node,name):

global queue,nodeSet

try:

url="https://www.zhihu.com/topic/"+str(node)+"/hot"

html=get_html(url)

if html is None:

return

soup = BeautifulSoup(html)

p_ch='父話題'

node_name=soup.find('div', {'id' : 'zh-topic-title'}).find('h1').text

topic_cla=soup.find('div', {'class' : 'child-topic'})

if topic_cla is not None:

try:

p_ch=str(topic_cla.text)

aList = soup.find_all('a', {'class' : 'zm-item-tag'}) #獲取所有子節(jié)點

if u'子話題' in p_ch:

for a in aList:

token=a.get('data-token')

a=str(a).replace('\n','').replace('\t','').replace('\r','')

start=str(a).find('>')

end=str(a).rfind('</a>')

new_node=str(str(a)[start+1:end])

curr.execute('select id from rooms where name=%s',(new_node)) #先保證名字絕不相同

y= curr.fetchone()

if not y:

print "y=",y,"new_node=",new_node,"token=",token

queue.put((token,new_node,node_name))

except Exception as e:

print "add queue error",e

except Exception as e:

print "get html error",e

def getContent(n,name,p,top_id):

try:

global counter

curr.execute('select id from rooms where name=%s',(name)) #先保證名字絕不相同

y= curr.fetchone()

print "exist?? ",y,"n=",n

if not y:

url="https://www.zhihu.com/topic/"+str(n)+"/hot"

html=get_html(url)

if html is None:

return

soup = BeautifulSoup(html)

title=soup.find('div', {'id' : 'zh-topic-title'}).find('h1').text

pic_path=soup.find('a',{'id':'zh-avartar-edit-form'}).find('img').get('src')

description=soup.find('div',{'class':'zm-editable-content'})

if description is not None:

description=description.text

if (u"未歸類" in title or u"根話題" in title): #允許入庫，避免死循環(huán)

description=None

tag_path=download_img(pic_path,top_id)

print "tag_path=",tag_path

if (tag_path is not None) or tag_path==True:

if tag_path==True:

tag_path=None

father_id=2 #默認為雜談

curr.execute('select id from rooms where name=%s',(p))

results = curr.fetchall()

for r in results:

father_id=r[0]

name=title

curr.execute('select id from rooms where name=%s',(name)) #先保證名字絕不相同

y= curr.fetchone()

print "store see..",y

if not y:

friends_num=0

temp = time.time()

x = time.localtime(float(temp))

create_time = time.strftime("%Y-%m-%d %H:%M:%S",x) # get time now

create_time

creater_id=None

room_avatar=tag_path

is_pass=1

has_index=0

reason_id=None

#print father_id,name,friends_num,create_time,creater_id,room_avatar,is_pass,has_index,reason_id

######################有資格入庫的內(nèi)容

counter=counter+1

curr.execute("INSERT INTO rooms(father_id,name,friends_num,description,create_time,creater_id,room_avatar,is_pass,has_index,reason_id)VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",(father_id,name,friends_num,description,create_time,creater_id,room_avatar,is_pass,has_index,reason_id))

conn.commit() #必須時時進入數(shù)據(jù)庫，不然找不到父節(jié)點

if counter % 200==0:

print "current node",name,"num",counter

except Exception as e:

print "get content error",e

def work():

global queue

curr.execute('select id,node,parent,name from classify where status=1')

results = curr.fetchall()

for r in results:

top_id=r[0]

node=r[1]

parent=r[2]

name=r[3]

try:

queue.put((node,name,parent)) #首先放入隊列

while queue.qsize() >0:

n,p=queue.get() #頂節(jié)點出隊

getContent(n,p,top_id)

getChildren(n,name) #出隊內(nèi)容的子節(jié)點

conn.commit()

except Exception as e:

print "what's wrong",e

def new_work():

global queue

curr.execute('select id,data_id,name from classify_new_copy where status=1')

results = curr.fetchall()

for r in results:

top_id=r[0]

data_id=r[1]

name=r[2]

try:

get_topis(data_id,name,top_id)

except:

pass

def get_topis(data_id,name,top_id):

global queue

url = 'https://www.zhihu.com/node/TopicsPlazzaListV2'

isGet = True;

offset = -20;

data_id=str(data_id)

while isGet:

offset = offset + 20

values = {'method': 'next', 'params': '{"topic_id":'+data_id+',"offset":'+str(offset)+',"hash_id":""}'}

try:

msg=None

try:

data = urllib.urlencode(values)

request = urllib2.Request(url,data,headers)

response = urllib2.urlopen(request,None,5)

html=response.read().decode('utf-8')

json_str = json.loads(html)

ms=json_str['msg']

if len(ms) <5:

break

msg=ms[0]

except Exception as e:

print "eeeee",e

#print msg

if msg is not None:

soup = BeautifulSoup(str(msg))

blks = soup.find_all('div', {'class' : 'blk'})

for blk in blks:

page=blk.find('a').get('href')

if page is not None:

node=page.replace("/topic/","") #將更多的種子入庫

parent=name

ne=blk.find('strong').text

try:

queue.put((node,ne,parent)) #首先放入隊列

while queue.qsize() >0:

n,name,p=queue.get() #頂節(jié)點出隊

size=queue.qsize()

if size > 0:

print size

getContent(n,name,p,top_id)

getChildren(n,name) #出隊內(nèi)容的子節(jié)點

conn.commit()

except Exception as e:

print "what's wrong",e

except urllib2.URLError, e:

print "error is",e

pass

if __name__ == '__main__':

i=0

while i<400:

new_work()

i=i+1

說下數(shù)據(jù)庫的問題，我這里就不傳附件了，看字段自己建立，因為這確實太簡單了，我是用的mysql，你看自己的需求自己建。

（本文由中國計算網(wǎng)總編欒玲收錄到《超算AI數(shù)據(jù)庫》轉(zhuǎn)載請注明出處）

微信關注公眾號“cncompute_com ”，每天為您奉上最新最熱的計算頭條資訊，滿滿干貨~多年軟件設計師經(jīng)歷，業(yè)內(nèi)資深分析人士，圈中好友眾多，信息豐富，觀點獨到。發(fā)布各大自媒體平臺，覆蓋百萬讀者。《蘋果的品牌設計之道》、《誰擁有未來：小米互聯(lián)網(wǎng)思維PK傳統(tǒng)行業(yè)思維》二本暢銷書作者欒玲，現(xiàn)為中國計算網(wǎng)設計總監(jiān)與內(nèi)容總編，欒玲專著與國畫已被國圖、清華北大圖書館等收藏

用戶觀點：知乎的成功是爬蟲技術還是懂得人性