Heim >Backend-Entwicklung >Python-Tutorial >python处理csv数据的方法

python处理csv数据的方法

WBOY
WBOYOriginal
2016-06-10 15:17:241173Durchsuche

本文实例讲述了python处理csv数据的方法。分享给大家供大家参考。具体如下:

Python代码:

复制代码 代码如下:

#coding=utf-8
__author__ = 'dehua.li'
from datetime import *
import datetime
import csv
import sys
import time
import string
import os
import os.path
import pylab as plt
rootdir='/nethome/dehua.li/orderlifeCycleData/xingzheng'
writeFileDir="/nethome/dehua.li/orderlifeMyWork/xingzheng/csv"
heyueFile="/nethome/dehua.li/orderlifeCycleData/heyue_150128.csv"
ms_acked="1"
msg=[]
ex=[]
def getTheDate(date):
    [filenamePart1,filenamePart2]=string.split(filename,'.')
    [filenamePart11,filenamePart12,filenamePart13]=string.split(filenamePart1,'_')
    return filenamePart13
LocalTime=datetime.datetime.fromtimestamp(time.mktime(time.strptime("2014-11-04 20:59:59","%Y-%m-%d %H:%M:%S")))
for parent,dirname,filenames in os.walk(rootdir):
    for filename in filenames:
        fileNameWrite=os.path.join(writeFileDir,filename)
        print fileNameWrite
        csvfile00=open(fileNameWrite,'wb')
        writer1=csv.writer(csvfile00)
        writer1.writerow(['FeedCode','OrderId','Status','LocalTime','Time','Exchange'])
        fileName=os.path.join(parent,filename)
        [filenamePart1,filenamePart2]=string.split(filename,'.')
        [filenamePart11,filenamePart12,filenamePart13]=string.split(filenamePart1,'_')
        #filenamePart11_filenamePart12_filenamePart13.filenamePart2:dongzheng_orderlifeCycleData_20150111.csv
        print fileName
        with open(fileName,'rb') as csvfile:
            reader=csv.reader(csvfile)
            CsvItem=[row for row in reader]
            for item in CsvItem:
                if item[3]=='TPO':
                    #print " filter TPO "
                    continue
                if item[12]=='Sent':
                    [tm_local,ms_local]=string.split(item[15],'.')
                    [tm_localup,ms_localup]=string.split(item[19],'.')
                    LocalTime=datetime.datetime.fromtimestamp(time.mktime(time.strptime(tm_local,"%Y-%m-%d %H:%M:%S")))
                    LocalUpdate=datetime.datetime.fromtimestamp(time.mktime(time.strptime(tm_localup,"%Y-%m-%d %H:%M:%S")))
                    tm=int(((LocalTime-LocalUpdate).seconds))*1000
                    ms_sent=str(int(ms_local)-int(ms_localup)+tm)
                    if int(ms_sent)>10*60*1000:
                        print "ms_sent>600000"
                        continue
                    if(int(ms_local)-int(ms_localup)+tm)                         print 'wrong1'
                    msg=[]
                    msg.append(item[0])
                    msg.append(item[1])
                    msg.append(item[12])
                    msg.append(item[15])
                    msg.append(ms_sent)
                    with open(heyueFile,'rb') as csvfile1:
                        reader=csv.reader(csvfile1)
                        CsvItem=[row for row in reader]
                        for Item in CsvItem:
                            if Item[1]==item[0]:
                                msg.append(Item[3])
                                writer1.writerow(msg)
                                #print 'write ok'
                                ex=Item[3]
                                break
                    csvfile1.close()
                    with open(fileName,'rb') as csvfile22:
                        reader=csv.reader(csvfile22)
                        CsvItem2=[row for row in reader]
                        for item_ in CsvItem2:
                            if item_[12]=='Acked' and item_[1]==item[1]:
                                [tm_local2,ms_local2]=string.split(item_[15],'.')
                                LocalTime2=datetime.datetime.fromtimestamp(time.mktime(time.strptime(tm_local2,"%Y-%m-%d %H:%M:%S")))
                                tm2=int(((LocalTime2-LocalTime).seconds))*1000
                                ms_acked=str(int(ms_local2)-int(ms_local)+tm2)
                                if int(ms_acked)>10*60*1000:
                                    print "MSacked>600000"
                                    continue
                                msg=[]
                                msg.append(item_[0])
                                msg.append(item_[1])
                                msg.append(item_[12])
                                msg.append(item_[15])
                                msg.append(ms_acked)
                    with open(heyueFile,'rb') as csvfile111:
                        reader=csv.reader(csvfile111)
                        CsvItem=[row for row in reader]
                        for Item in CsvItem:
                            if Item[1]==item[0]:
                                msg.append(Item[3])
                                writer1.writerow(msg)
                                #print 'write ok'
                                break
                                #print "write ok"
                    csvfile22.close()
        csvfile.close()
        csvfile00.close()

复制代码 代码如下:

#coding=utf-8
#__author__ = 'dehua.li'
from datetime import *
import datetime
import csv
import sys
import time
import string
import os
import os.path
import pylab as plt
def median(lst):
    even = (0 if len(lst) % 2 else 1) + 1
    half = (len(lst) - 1) / 2
    return sum(sorted(lst)[half:half + even]) / float(even)
def mean(lst):
    if len(lst)==0:
        return 0
    return sum(lst)/len(lst)
nightLine="21:01:00"
morningLine="09:01:00"
def getTheDate(date):
    [filenamePart1,filenamePart2]=string.split(filename,'.')
    [filenamePart11,filenamePart12,filenamePart13]=string.split(filenamePart1,'_')
    return filenamePart13
def afterOneMin(time):
    [tm_local,ms_local]=string.split(time,'.')
    [ymd,hms]=string.split(tm_local,' ')
    flag=0
    if hms>"21:01:00":
        flag=1
    elif hms>"09:01:00" and hms         flag=1
    elif hms>"00:00:00" and hms         flag=1
    return flag
rootdir="/nethome/dehua.li/orderlifeMyWork/xingzheng/csv"
#csvfileMaxMin = open('e:\dehua.li\csv\__xingzhenMaxMin.csv','wb')
#writer1 = csv.writer(csvfileMaxMin)
#writer1.writerow(['FeedCode','date','SentMaxTime','SentMaxLocalTime','SentMinTime','SentMinLocalTime','SentMeanTime','SentMedian','AckedMaxTime','AckedMaxLocalTime','AckedMinTime','AckedMinLocalTime','AckedMeanTime','AckedMedianTime','Exchange'])
#writer1.writerow(['FeedCode','date','SentMaxTime','SentMinTime','SentMeanTime','SentMedian','AckedMaxTime','AckedMinTime','AckedMeanTime','AckedMedianTime','Exchange'])
msg=[]
codeList=list()
orderList=list()
itemSentList=[]
itemAckedList=[]
feedCode=[]
exchange=[]
zhengshangSentMedian=0
zhengshangSentMean=0
zhengshangAckedMedian=0
zhengshangAckedMean=0
dashangSentMedian=0
dashangSentMean=0
dashangAckedMedian=0
dashangAckedMean=0
shangqiSentMedian=0
shangqiSentMean=0
shangqiAckedMedian=0
shangqiAckedMean=0
zhongjinSentMedian=0
zhongjinSentMean=0
zhongjinAckedMedian=0
zhongjinAckedMean=0
zhengshangSent=[]
zhengshangAcked=[]
dashangSent=[]
dashangAcked=[]
shangqiSent=[]
shangqiAcked=[]
zhongjinSent=[]
zhongjinAcked=[]
zhengshangSentMedianAll=[]
zhengshangSentMeanAll=[]
zhengshangAckedMedianAll=[]
zhengshangAckedMeanAll=[]
dashangSentMedianAll=[]
dashangSentMeanAll=[]
dashangAckedMedianAll=[]
dashangAckedMeanAll=[]
shangqiSentMedianAll=[]
shangqiSentMeanAll=[]
shangqiAckedMedianAll=[]
shangqiAckedMeanAll=[]
zhongjinSentMedianAll=[]
zhongjinSentMeanAll=[]
zhongjinAckedMedianAll=[]
zhongjinAckedMeanAll=[]
zhengshang='0'
dashang='0'
shangqi='0'
zhongjin='0'
with open('/nethome/dehua.li/orderlifeCycleData/heyue_150128.csv','rb') as csvfile:
    reader=csv.reader(csvfile)
    csvItem=[row for row in reader]
    zhengshang=csvItem[300][3]
    dashang=csvItem[5][3]
    shangqi=csvItem[165][3]
    zhongjin=csvItem[435][3]
    #for item in csvItem:
    #    if item[3]==zhengshang:
     #       print item
for parent,dirname,filenames in os.walk(rootdir):
    for filename in filenames:
        fileName=os.path.join(rootdir,filename)
        csvfile1=open(fileName,'rb')
        reader=csv.reader(csvfile1)
        CsvItem=[row for row in reader]
        for item in CsvItem:
            if item[0]=='FeedCode':
                continue
            if item[0] not in codeList:
                codeList.append(item[0])
                #print CsvItem[15]
            if len(item)                 print fileName
                print item
                print '++++++++++++++++++++++++++++++'
            #if afterOneMin(item[3])==0:
            #    print item[3]
            #    continue
            if item[5]==zhengshang and item[2]=='Sent':
                zhengshangSent.append(int(item[4]))
            elif item[5]==zhengshang and item[2]=='Acked':
                zhengshangAcked.append(int(item[4]))
            elif item[5]==dashang and item[2]=='Sent':
                 dashangSent.append(int(item[4]))
            elif item[5]==dashang and item[2]=='Acked':
                 dashangAcked.append(int(item[4]))
            elif item[5]==shangqi and item[2]=='Sent':
                 shangqiSent.append(int(item[4]))
                 if int(item[4])>=600000:
                     print "------------"
                     print item
            elif item[5]==shangqi and item[2]=='Acked':
                 shangqiAcked.append(int(item[4]))
            elif item[5]==zhongjin and item[2]=='Sent':
                 zhongjinSent.append(int(item[4]))
            elif item[5]==zhongjin and item[2]=='Acked':
                 zhongjinAcked.append(int(item[4]))
            else:
                 print "wrong info"
                 print item
        if mean(shangqiSent)>420000:
            print sum(shangqiSent)
            print len(shangqiSent)
            print item
            print fileName
            print shangqiSent
        zhengshangSentMedian=median(zhengshangSent)
        zhengshangSentMean=mean(zhengshangSent)
        zhengshangAckedMedian=median(zhengshangAcked)
        zhengshangAckedMean=mean(zhengshangAcked)
        dashangSentMedian=median(dashangSent)
        dashangSentMean=mean(dashangSent)
        dashangAckedMedian=median(dashangAcked)
        dashangAckedMean=mean(dashangAcked)
        shangqiSentMedian=median(shangqiSent)
        shangqiSentMean=mean(shangqiSent)
        shangqiAckedMedian=median(shangqiAcked)
        shangqiAckedMean=mean(shangqiAcked)
        zhongjinSentMedian=median(zhongjinSent)
        zhongjinSentMean=mean(zhongjinSent)
        zhongjinAckedMedian=median(zhongjinAcked)
        zhongjinAckedMean=mean(zhongjinAcked)
        #if mean(shangqiSent)>70:
        #    print '================================'
        #    print fileName
        #print codeList
        '''
        for listItem in codeList:
            itemSentList=[]
            itemAckedList=[]
            for item in CsvItem:
                if item[0]==listItem and item[2]=='Sent':
                    itemSentList.append(int(item[4]))
                    exchange=item[5]
                elif  item[0]==listItem and item[2]=='Acked':
                    itemAckedList.append(int(item[4]))
            #print itemSentList
            itemMaxSent=max(itemSentList)
            itemMinSent=min(itemSentList)
            itemAvgSent=sum(itemSentList)/len(itemSentList)
            itemMaxAcked=max(itemAckedList)
            itemMinAcked=min(itemAckedList)
            itemAvgAcked=sum(itemAckedList)/len(itemAckedList)
            SentMedian=median(itemSentList)
            AckedMedian=median(itemAckedList)
            msg=[]
            msg.append(listItem)         #0
            msg.append("2015/01/14")   #1
            msg.append(itemMaxSent)     #2
            msg.append(itemMinSent)     #3
            msg.append(itemAvgSent)     #4
            msg.append(SentMedian)     #5
            msg.append(itemMaxAcked)    #6
            msg.append(itemMinAcked)     #7
            msg.append(itemAvgAcked)     #8
            msg.append(AckedMedian)      #9
            msg.append(exchange)          #10
            if len(msg)>15:
               print "------------------------------"
               print msg
            writer1.writerow(msg)
         '''
        zhengshangSentMedianAll.append(zhengshangSentMedian)
        zhengshangSentMeanAll.append(zhengshangSentMean)
        zhengshangAckedMedianAll.append(zhengshangAckedMedian)
        zhengshangAckedMeanAll.append(zhengshangAckedMean)
        dashangSentMedianAll.append(dashangSentMedian)
        dashangSentMeanAll.append(dashangSentMean)
        dashangAckedMedianAll.append(dashangAckedMedian)
        dashangAckedMeanAll.append(dashangAckedMean)
        shangqiSentMedianAll.append(shangqiSentMedian)
        shangqiSentMeanAll.append(shangqiSentMean)
        shangqiAckedMedianAll.append(shangqiAckedMedian)
        shangqiAckedMeanAll.append(shangqiAckedMean)
        zhongjinSentMedianAll.append(zhongjinSentMedian)
        zhongjinSentMeanAll.append(zhongjinSentMean)
        zhongjinAckedMedianAll.append(zhongjinAckedMedian)
        zhongjinAckedMeanAll.append(zhongjinAckedMean)
plt.figure(1)
plt.figure(2)
plt.figure(3)
plt.figure(4)
plt.figure(1)
plt.title('SentMean r-zhengshang b-dashang,green-shangqi grey-zhongjin')
plt.plot(range(1,len(zhengshangSentMeanAll)+1),zhengshangSentMeanAll,'r')
plt.plot(range(1,len(dashangSentMeanAll)+1),dashangSentMeanAll,'b')
plt.plot(range(1,len(shangqiSentMeanAll)+1),shangqiSentMeanAll,'g')
plt.plot(range(1,len(zhongjinSentMeanAll)+1),zhongjinSentMeanAll,'grey')
plt.savefig('/nethome/dehua.li/orderlifeMyWork/xingzheng/data_noTPO_in10minutes/SentMean.png')
plt.figure(2)
plt.title('SentMedian r-zhengshang b-dashang,green-shangqi grey-zhongjin')
plt.plot(range(1,len(zhengshangSentMedianAll)+1),zhengshangSentMedianAll,'r')
plt.plot(range(1,len(dashangSentMedianAll)+1),dashangSentMedianAll,'b')
plt.plot(range(1,len(shangqiSentMedianAll)+1),shangqiSentMedianAll,'g')
plt.plot(range(1,len(zhongjinSentMedianAll)+1),zhongjinSentMedianAll,'grey')
plt.savefig('/nethome/dehua.li/orderlifeMyWork/xingzheng/data_noTPO_in10minutes/SentMedian.png')
plt.figure(3)
plt.title('AckedMean r-zhengshang b-dashang,green-shangqi grey-zhongjin')
plt.plot(range(1,len(zhengshangAckedMeanAll)+1),zhengshangAckedMeanAll,'r')
plt.plot(range(1,len(dashangAckedMeanAll)+1),dashangAckedMeanAll,'b')
plt.plot(range(1,len(shangqiAckedMeanAll)+1),shangqiAckedMeanAll,'g')
plt.plot(range(1,len(zhongjinAckedMeanAll)+1),zhongjinAckedMeanAll,'grey')
plt.savefig('/nethome/dehua.li/orderlifeMyWork/xingzheng/data_noTPO_in10minutes/AckedMean.png')
plt.figure(4)
plt.title('AckedMedian r-zhengshang b-dashang,green-shangqi grey-zhongjin')
plt.plot(range(1,len(zhengshangAckedMedianAll)+1),zhengshangAckedMedianAll,'r')
plt.plot(range(1,len(dashangAckedMedianAll)+1),dashangAckedMedianAll,'b')
plt.plot(range(1,len(shangqiAckedMedianAll)+1),shangqiAckedMedianAll,'g')
plt.plot(range(1,len(zhongjinAckedMedianAll)+1),zhongjinAckedMedianAll,'grey')
plt.savefig('/nethome/dehua.li/orderlifeMyWork/xingzheng/data_noTPO_in10minutes/AckedMedian.png')
plt.show()
print 'over'

希望本文所述对大家的Python程序设计有所帮助。

Stellungnahme:
Der Inhalt dieses Artikels wird freiwillig von Internetnutzern beigesteuert und das Urheberrecht liegt beim ursprünglichen Autor. Diese Website übernimmt keine entsprechende rechtliche Verantwortung. Wenn Sie Inhalte finden, bei denen der Verdacht eines Plagiats oder einer Rechtsverletzung besteht, wenden Sie sich bitte an admin@php.cn