search

Home  >  Q&A  >  body text

python - 文件按条件将多行合并为一行?

在这个test文件里。如果ID和DATE都相等,那么把他们合并到一行。
ID,NAME,GENDER,AGE,KESHI,DATE,TYPE,DIAG保留第一行的值就好,PROJ_1至RE_6有值的话就依次往后面添加成一行。也就是ID和DATE都相同的话,就合并成一行。

test文件地址:http://pan.baidu.com/s/1slP4wsX

plus:
最最最期望得到的结果为:

谢谢大家的帮助!

怪我咯怪我咯2803 days ago2039

reply all(2)I'll reply

  • PHP中文网

    PHP中文网2017-04-17 17:37:21

    Implemented one using Perl6

    #!/usr/bin/env perl6
    
    class StudentInfo {
        has Str $.id;
        has Str $.name;
        has Str $.gender;
        has Str $.age;
        has Str $.keshi;
        has Str $.date;
        has Str $.type;
        has Str $.diag;
        has %.index;
    
        method new (Str $line) {
            self.bless(|self.parse-line($line));
        }
    
        method parse-line(Str $line) {
            my @items = $line.split: ',';
            my @title = < id name gender age keshi date type diag >;
            my %hash;
    
            loop (my $i = +@title;$i < +@items;$i += 2) {
                if @items[$i] ne "" && @items[$i + 1] ne "" {        # 去除空的 proj re
                    unless %hash{@items[$i]}:exists {
                        %hash{@items[$i]} = @items[$i + 1];
                    }    
                }
            }
            my %ret = @title Z=> @items[0 .. +@title - 1];    # 拼接头部
    
            %ret<index> := %hash;
    
            return %ret;
        }
    
        method num-of-proj() {
            +%!index.keys;
        }
    
        method hash-key() {
            return $!id ~ $!name;
        }
    
        method meger(::?CLASS:D: $other) {
            for $other.index.keys -> $key {
                unless %!index{$key}:exists {
                    %!index{$key} = $other.index{$key};    
                }
            }
        }
    
        # $max - 指标的个数参数
        method generate($max) {
            my @line = ($!id, $!name, $!gender, $!age, $!keshi, $!date, $!type, $!diag);
    
            my @keys := %!index.keys.sort;
    
            loop (my Int $i = 0;$i < $max;$i++) {
                if $i > @keys.elems - 1 {
                    @line.append: ('', '');
                } else {
                    @line.append: (~@keys[$i], ~%!index{@keys[$i]});
                }
            }
            return @line.join(',');
        }
    }
    
    #    o    输出
    #    a    追加到    
    #    i    指标个数
    #    debug
    #
    sub MAIN(Str :o(:output($out))?, 
        Str :a(:append-to($append))?, 
        Int :i(:index-max($index)) = 8,
        Bool :d(:debug($debug)) = False, 
        *@files) {
        my %info;
        my @title;
    
        for @files -> $file {
            my @lines = $file.IO.lines;
    
            @title = @lines.shift.split: ',';
    
            for @lines -> $line {
                my StudentInfo $si .= new(~$line);    ## 
    
                note $si.perl if $debug;
    
                if %info{$si.hash-key}:exists {
                    %info{$si.hash-key}.meger($si);
                } else {
                    %info{$si.hash-key} := $si;
                }
            }
        }
    
        if $debug {
            for %info.values  {
                .note if $debug;
            }
        }
    
        @title = @title[^8];
        @title.append: (< PROJ_ RE_ > xx $index).flat Z~ ((1 ... $index) xx 2).flat.sort;
    
        if defined($append) || defined($out) {
            my $out-fh =  defined($append) ?? $append.IO.open(:a) !! $out.IO.open(:w);
    
            $out-fh.say(@title.join(',')) if defined($out);
    
            for %info.values -> $value {
                $out-fh.say: $value.generate($index) 
                    if $value.num-of-proj >= $index;
            };
    
            $out-fh.close;
        } else {
            for %info.values -> $value {
                say $value.generate($index)
                    if $value.num-of-proj >= $index;
            };
        }
    }

    How to use

    [root@localhost tmp]# ./meger.p6 --help
    Usage:
      ./meger.p6 [-o|--output=<Str>] [-a|--append-to=<Str>] [-i|--index-max=<Int>] [-d|--debug] [<files> ...]  
    [root@localhost tmp]# ./meger.p6 -o=out.put.log testSheet.csv testSheet.csv 
    [root@localhost tmp]# cat out.put.log 
    ID,NAME,GENDER,AGE,KESHI,DATE,TYPE,DIAG,PROJ_1,RE_1,PROJ_2,RE_2,PROJ_3,RE_3,PROJ_4,RE_4,PROJ_5,RE_5,PROJ_6,RE_6,PROJ_7,RE_7,PROJ_8,RE_8
    179802,彭永彪,男,82,神经内科,2013/1/1,血清,脑梗塞,ALP,88,ALT,8,AST,18,DBIL,3.4,GGT,32,IBIL,8.9,TBIL,12.3,TP,59.4
    179099,王元家,男,39,手足显微外科,2013/1/1,血清,足外伤,ALP,58,ALT,32,AST,19,DBIL,2.1,GGT,44,IBIL,12.2,TBIL,14.3,TP,61.5
    181012,潘国华,男,94,心肺血管科,2013/1/1,血清,高血压,ALP,84,ALT,10,AST,13,DBIL,1.5,GGT,34,IBIL,2.6,TBIL,4.1,TP,52.8
    180813,朱安清,男,40,骨二科,2013/1/1,血清,足外伤,ALP,73,ALT,36,AST,19,DBIL,3.9,GGT,24,IBIL,18.6,TBIL,22.5,TP,59.6
    180188,刘田英,女,80,综合一科,2013/1/1,血清,高血压,ALP,92,ALT,25,AST,24,DBIL,3,GGT,78,IBIL,7.3,TBIL,10.3,TP,64.9
    178748,邓晓运,女,77,肿瘤科,2013/1/1,血清,脑梗塞,ALP,48,ALT,13,AST,16,DBIL,1.8,GGT,48,IBIL,6.1,TBIL,7.9,TP,63.3
    180974,王龙,男,25,骨一科,2013/1/1,血清,肱骨干骨折,ALP,82,ALT,34,AST,36,DBIL,4.1,GGT,38,IBIL,11.6,TBIL,15.7,TP,61.7
    180940,赵金成,男,79,综合一科,2013/1/1,血清,冠心病,ALP,66,ALT,54,AST,26,DBIL,5.5,GGT,30,IBIL,12.1,TBIL,17.6,TP,59
    181168,张永堂,男,63,骨二科,2013/1/1,血清,肱骨干骨折,ALP,59,ALT,25,AST,35,DBIL,5.2,GGT,33,IBIL,14.4,TBIL,19.6,TP,57.1
    [root@localhost tmp]# 

    reply
    0
  • PHP中文网

    PHP中文网2017-04-17 17:37:21

    You can put the records into a dictionary with ID and Date as keys. If the key does not exist in the dictionary, put it directly. If it already exists, add "PROJ_1 to RE_6" if they have values. ".

    I’m not sure what you mean by “merging into one line”, but the general code can be similar to the following. If the merging method is wrong, you can modify it yourself:

    import csv
    
    result = {}
    with open('/Volumes/MacDocuments/Downloads/testSheet.csv', 'r') as csvfile:
        reader = csv.reader(csvfile)
        for row in reader:
            print(row)
            print(len(row))
            key = '{}-{}'.format(row[0], row[5])
            if result.get(key, None):
                result[key].extend(row[8:])
            else:
                result[key] = row
    
    print(result)

    reply
    0
  • Cancelreply