阿神2017-04-18 10:13:48
只有挨個遍歷了,幾千條資料也不算多,類似處理方法如下:
# python 2.7 utf-8
from copy import deepcopy
dic_category = {
u'卫生': [u'扫地', u'拖地', u'吸尘'],
u'锻炼': [u'跑步', u'慢跑', u'俯卧撑'],
u'自杀': [u'跳楼']
}
data = {
"Data": [
{
"title": u"我要扫地",
"id": "1"
},
{
"title": u"他要跳楼了",
"id": "2"
},
{
"title": u"跑步是有好处的",
"id": "3"
},
{
"title": u"多做俯卧撑",
"id": "4"
}
]
}
processed_data = deepcopy(data) # 若考虑内存占用率,直接处理data
for dic_ele in processed_data['Data']:
dic_ele['category'] = None
for str_category, tup_keys in dic_category.iteritems():
if dic_ele['category']: # 不考虑一个title有多种类别的情况
break
for str_key in tup_keys:
if str_key in dic_ele['title']:
dic_ele['category'] = str_category
break
# display
for dic_ele in processed_data['Data']:
print '------------'
print 'id:', dic_ele['id']
print 'title:', dic_ele['title'].encode('utf-8')
print 'category:', dic_ele['category'].encode('utf-8')