Home >Backend Development >Python Tutorial >PythonCookbook - Data Structures and Algorithms

PythonCookbook - Data Structures and Algorithms

巴扎黑
巴扎黑Original
2016-11-26 10:13:261394browse

Chapter 1 Data Structures and Algorithms

1.1 Decompose a sequence into separate variables

p = (4, 5)
x, y = p
print x 
print y 
data = [ 'ACME', 50, 91.1, (2012, 12, 21) ]
name, shares, price, date = data
print name
print shares 
print price 
print date 
name, shares, price, (year, mon, day ) = data
print year 
p = (4, 5)
#x, y, z = p 错误!!!
s = 'hello!'
a, b, c, d, e, f = s
print a
print f
data = [ 'ACME', 50, 91.1, (2012, 12, 21) ]
_, shares, price, _ = data 
print shares
print price
#其他数据可以丢弃了

1.2 Decompose elements from an iterable of arbitrary length

from audioop import avg
def drop_first_last(grades):
    first, *middle, last = grades
    return avg(middle)
record = ('Dave', 'dave@example.com', '777-333-2323', '234-234-2345')
name, email, *phone_numbers = record
print name 
print email
print phone_numbers
*trailing, current = [10, 8, 7, 2, 5]
print trailing  #[10, 8, 7, 2, ]
print current #5
records = [
           ('foo', 1, 2),
           ('bar', 'hello'),
           ('foo', 5, 3)
           ]
def do_foo(x, y):
    print ('foo', x, y)
def do_bar(s):
    print ('bar', s)
for tag, *args in records:
    if tag == 'foo':
        do_foo(*args)
    elif tag == 'bar':
        do_bar(*args)
        
line = 'asdf:fedfr234://wef:678d:asdf'
uname, *fields, homedir, sh = line.split(':')
print uname 
print homedir
record = ('ACME', 50, 123.45, (12, 18, 2012))
name, *_, (*_, year) = record
print name
print year
items = [1, 10, 7, 4, 5, 9]
head, *tail = items
print head #1
print tail #[10, 7, 4, 5, 9]
def sum(items):
    head, *tail = items
    return head + sum(tail) if tail else head
sum(items)

1.3 Save the last N elements

from _collections import deque
def search(lines, pattern, history=5):
    previous_lines = deque(maxlen = history)
    for line in lines:
        if pattern in line:
            yield line, previous_lines
        previous_lines.append(line)
# Example use on a file
if __name__ == '__main__':
    with open('somefile.txt') as f:
        for line, prevlines in search(f, 'python', 5):
            for pline in prevlines:
                print (pline) #print (pline, end='')
            print (line) #print (pline, end='')
            print ('-'*20)
            
q = deque(maxlen=3)
q.append(1)
q.append(2)
q.append(3)
print q
q.append(4)
print q
q = deque()
q.append(1)
q.append(2)
q.append(3)
print q
q.appendleft(4)
print q
q_pop = q.pop()
print q_pop
print q
q_popleft = q.popleft()
print q_popleft
print q

1.4 Find the largest or Minimum N elements

import heapq
nums = [1,30,6,2,36,33,46,3,23,43]
print (heapq.nlargest(3, nums))
print (heapq.nsmallest(3, nums))
portfolio = [
                 {'name':'IBM', 'shares':100, 'price':2.4},
                 {'name':'A', 'shares':1040, 'price':12.4},
                 {'name':'S', 'shares':40, 'price':23.4},
                 {'name':'D', 'shares':1, 'price':2.49},
                 {'name':'F', 'shares':9, 'price':24}
             ]
cheap = heapq.nsmallest(3, portfolio, key=lambda s: s['price'])
expensive = heapq.nlargest(3, portfolio, key=lambda s: s['price'])
print cheap
print expensive
nums = [1,8,2,23,7,-4,18,23,42,37,2]
heap = list(nums)
print heap
heapq.heapify(heap)
print heap
print heapq.heappop(heap)
print heapq.heappop(heap)
print heapq.heappop(heap)

1.5 Implement priority queue

import heapq
class PriorityQueue:
    def __init__(self):
        self._queue = []
        self._index = 0
    def push(self, item, priority):
        heapq.heappush(self._queue, (-priority, self._index, item))
        self._index += 1
    def pop(self):
        return heapq.heappop(self._queue)[-1]
#Example
class Item:
    def __init__(self, name):
        self.name = name
    def __repr__(self):
        return 'Item({!r})'.format(self.name)
q = PriorityQueue()
q.push(Item('foo'), 1)
q.push(Item('spam'), 4)
q.push(Item('bar'), 5)
q.push(Item('grok'), 1)
print q.pop()
print q.pop()
print q.pop()
a = Item('foo')
b = Item('bar')
#a < b    error
a = (1, Item(&#39;foo&#39;))
b = (5, Item(&#39;bar&#39;))
print a < b
c = (1, Item(&#39;grok&#39;))
#a < c  error
a = (1, 0, Item(&#39;foo&#39;))
b = (5, 1, Item(&#39;bar&#39;))
c = (1, 2, Item(&#39;grok&#39;))
print a < b
print a < c

1.6 Map objects to multiple values ​​in the dictionary

d = {  
        &#39;a&#39; : [1, 2, 3],  
        &#39;b&#39; : [4, 5]  
     }  
e = {  
        &#39;a&#39; : {1, 2, 3},  
        &#39;b&#39; : {4, 5}  
     }  
  
from collections import defaultdict  
  
d = defaultdict(list)  
d[&#39;a&#39;].append(1)  
d[&#39;a&#39;].append(2)  
d[&#39;a&#39;].append(3)  
print d  
  
d = defaultdict(set)  
d[&#39;a&#39;].add(1)  
d[&#39;a&#39;].add(2)  
d[&#39;a&#39;].add(3)  
print d  
  
d = {}  
d.setdefault(&#39;a&#39;, []).append(1)  
d.setdefault(&#39;a&#39;, []).append(2)  
d.setdefault(&#39;b&#39;, []).append(3)  
print d   
  
d = {}  
for key, value in d:#pairs:  
    if key not in d:  
        d[key] = []  
    d[key].append(value)  
  
d = defaultdict(list)  
for key, value in d:#pairs:  
    d[key].append(value)

1.7 Keep the dictionary in order

from collections import OrderedDict  
  
d = OrderedDict()  
d[&#39;foo&#39;] = 1  
d[&#39;bar&#39;] = 2  
d[&#39;spam&#39;] = 3  
d[&#39;grol&#39;] = 4  
for key in d:  
    print (key, d[key])  
      
import json  
  
json.dumps(d)

1.8 Computational issues related to dictionary

price = {  
            &#39;ACME&#39;:23.45,  
            &#39;IBM&#39;:25.45,  
            &#39;FB&#39;:13.45,  
            &#39;IO&#39;:4.45,  
            &#39;JAVA&#39;:45.45,  
            &#39;AV&#39;:38.38,  
         }  
  
min_price = min( zip( price.values(), price.keys() ) )  
print min_price  
  
max_price = max( zip( price.values(), price.keys() ) )  
print max_price  
  
price_sorted = sorted( zip( price.values(), price.keys() ) )  
print price_sorted     
  
price_and_names = zip( price.values(), price.keys() )  
print (min(price_and_names))  
#print (max(price_and_names))  error  zip()创建了迭代器,内容只能被消费一次  
  
print min(price)  
print max(price)  
  
print min(price.values())  
print max(price.values())  
  
  
print min(price, key = lambda k : price[k])  
print max(price, key = lambda k : price[k])  
  
min_value = price[ min(price, key = lambda k : price[k]) ]  
print min_value  
  
price = {  
            &#39;AAA&#39;: 23,  
            &#39;ZZZ&#39;: 23,  
         }  
print min( zip( price.values(), price.keys() ) )  
print max( zip( price.values(), price.keys() ) )

1.9 Find similarities in two dictionaries

a = {  
        &#39;x&#39;:1,  
        &#39;y&#39;:2,  
        &#39;z&#39;:3  
     }  
b = {  
        &#39;x&#39;:11,  
        &#39;y&#39;:2,  
        &#39;w&#39;:10  
     }  
  
print a.keys() & b.keys() #{&#39;x&#39;,&#39;y&#39;}  
print a.keys() - b.keys() #{&#39;z&#39;}  
print a.items() & b.items() #{(&#39;y&#39;, 2)}  
  
c = {key: a[key] for key in a.keys() - {&#39;z&#39;, &#39;w&#39;} }  
print c #{&#39;x&#39;:1, &#39;y&#39;:2}

1.10 Remove duplicates from the sequence and keep the order of elements unchanged

def dedupe(items):  
    seen = set()  
    for item in items:  
        if item not in seen:  
            yield item  
            seen.add(item)  
#example  
a = [1,5,2,1,9,1,5,10]  
print list(dedupe(a))  
  
def dedupe2(items, key = None):  
    seen = set()  
    for item in items:  
        val = item if key is None else key(item)  
        if val not in seen:  
            yield item  
            seen.add(val)   
#example  
a = [   
        {&#39;x&#39;:1, &#39;y&#39;:2},   
        {&#39;x&#39;:1, &#39;y&#39;:3},   
        {&#39;x&#39;:1, &#39;y&#39;:2},   
        {&#39;x&#39;:2, &#39;y&#39;:4},   
     ]  
print list( dedupe2(a, key=lambda d : (d[&#39;x&#39;], d[&#39;y&#39;]) ) )  
print list( dedupe2(a, key=lambda d : (d[&#39;x&#39;]) ) )  
  
a = [1,5,2,1,9,1,5,10]  
print set(a)

1.11 Name the slices

items = [0,1,2,3,4,5,6]  
a = slice(2,4)  
print items[2:4]  
print items[a]  
items[a] = [10,11]  
print items  
  
print a.start  
print a.stop  
print a.step

1.12 Find the one that appears the most in the sequence Elements

words = [  
            &#39;look&#39;, &#39;into&#39;, &#39;my&#39;, &#39;eyes&#39;, &#39;look&#39;, &#39;into&#39;, &#39;my&#39;, &#39;eyes&#39;,  
            &#39;the&#39;, &#39;look&#39;  
         ]  
  
from collections import Counter  
  
word_counts = Counter(words)  
top_three = word_counts.most_common(3)  
print top_three  
  
print word_counts[&#39;look&#39;]  
print word_counts[&#39;the&#39;]  
  
morewords = [&#39;why&#39;, &#39;are&#39;, &#39;you&#39;, &#39;not&#39;, &#39;looking&#39;, &#39;in&#39;, &#39;my&#39;, &#39;eyes&#39;]  
for word in morewords:  
    word_counts[word] += 1  
print word_counts[&#39;eyes&#39;]  
print word_counts[&#39;why&#39;]  
  
word_counts.update(morewords)  
print word_counts[&#39;eyes&#39;]  
print word_counts[&#39;why&#39;]  
  
a = Counter(words)  
b = Counter(morewords)  
print a  
print b  
c = a + b  
print c  
d = a - b  
print b

1.13 Sorting dictionary lists by common keys

rows = [  
            {&#39;fname&#39;:&#39;Brian&#39;, &#39;lname&#39;:&#39;Jones&#39;, &#39;uid&#39;:1003},  
            {&#39;fname&#39;:&#39;David&#39;, &#39;lname&#39;:&#39;Beazley&#39;, &#39;uid&#39;:1002},  
            {&#39;fname&#39;:&#39;John&#39;, &#39;lname&#39;:&#39;Cleese&#39;, &#39;uid&#39;:1001},  
            {&#39;fname&#39;:&#39;Big&#39;, &#39;lname&#39;:&#39;Jones&#39;, &#39;uid&#39;:1004}  
        ]  
  
from operator import itemgetter  
  
rows_by_fname = sorted(rows, key=itemgetter(&#39;fname&#39;))  
rows_by_uid = sorted(rows, key=itemgetter(&#39;uid&#39;))  
print rows_by_fname  
print rows_by_uid  
rows_by_lfname = sorted(rows, key=itemgetter(&#39;lname&#39;, &#39;fname&#39;))  
print rows_by_lfname  
  
rows_by_fname = sorted(rows, key=lambda r: r[&#39;fname&#39;])  
rows_by_lfname = sorted(rows, key=lambda r: (r[&#39;fname&#39;], r[&#39;lname&#39;]))  
print rows_by_fname  
print rows_by_lfname  
  
print min(rows, key=itemgetter(&#39;uid&#39;))  
print max(rows, key=itemgetter(&#39;uid&#39;))

1.14 Sorting objects that do not natively support comparison operations

class User:  
    def __init__(self, user_id):  
        self.user_id = user_id  
    def __repr__(self):  
        return &#39;User({})&#39;.format(self.user_id)  
  
users = [User(23), User(3), User(99)]  
print users  
print sorted(users, key = lambda u: u.user_id)  
  
from operator import attrgetter  
print sorted(users, key=attrgetter(&#39;user_id&#39;))  
  
print min(users, key=attrgetter(&#39;user_id&#39;))  
print max(users, key=attrgetter(&#39;user_id&#39;))

1.15 Grouping records based on fields

rows = [  
            {&#39;address&#39;:&#39;5412 N CLARK&#39;, &#39;data&#39;:&#39;07/01/2012&#39;},  
            {&#39;address&#39;:&#39;5232 N CLARK&#39;, &#39;data&#39;:&#39;07/04/2012&#39;},  
            {&#39;address&#39;:&#39;5542 E 58ARK&#39;, &#39;data&#39;:&#39;07/02/2012&#39;},  
            {&#39;address&#39;:&#39;5152 N CLARK&#39;, &#39;data&#39;:&#39;07/03/2012&#39;},  
            {&#39;address&#39;:&#39;7412 N CLARK&#39;, &#39;data&#39;:&#39;07/02/2012&#39;},  
            {&#39;address&#39;:&#39;6789 w CLARK&#39;, &#39;data&#39;:&#39;07/03/2012&#39;},  
            {&#39;address&#39;:&#39;9008 N CLARK&#39;, &#39;data&#39;:&#39;07/01/2012&#39;},  
            {&#39;address&#39;:&#39;2227 W CLARK&#39;, &#39;data&#39;:&#39;07/04/2012&#39;}  
        ]  
  
from operator import itemgetter  
from itertools import groupby  
  
rows.sort(key=itemgetter(&#39;data&#39;))  
for data, items in groupby(rows, key=itemgetter(&#39;data&#39;)):  
    print (data)  
    for i in items:  
        print (&#39; &#39;, i)  
          
from collections import defaultdict  
rows_by_date = defaultdict(list)  
for row in rows:  
    rows_by_date[row[&#39;data&#39;]].append(row)  
for r in rows_by_date[&#39;07/04/2012&#39;]:  
    print(r)

1.16 Filtering elements in a sequence

mylist = [1,4,-5,10,-7,2,3,-1]  
print [n for n in mylist if n > 0]#列表推导式  
print [n for n in mylist if n < 0]  
  
pos = (n for n in mylist if n > 0)#生成器表达式  
print pos  
for x in pos:  
    print(x)  
  
values = [&#39;1&#39;, &#39;2&#39;, &#39;-3&#39;, &#39;-&#39;, &#39;4&#39;, &#39;N/A&#39;, &#39;5&#39;]  
def is_int(val):  
    try:  
        x = int(val)  
        return True  
    except ValueError:  
        return False  
ivals = list(filter(is_int, values))  
print(ivals)  
  
mylist = [1,4,-5,10,-7,2,3,-1]  
import math  
print [math.sqrt(n) for n in mylist if n > 0]  
  
clip_neg = [n if n > 0 else 0 for n in mylist]  
print clip_neg  
  
clip_pos = [n if n < 0 else 0 for n in mylist]  
print clip_pos  
  
addresses = [&#39;a&#39;, &#39;b&#39;, &#39;c&#39;, &#39;d&#39;, &#39;e&#39;, &#39;f&#39;, &#39;g&#39;, &#39;h&#39;]  
counts = [0, 3, 10, 4, 1, 7, 6, 1]  
from itertools import compress  
more5 = [n > 5 for n in counts]  
print more5  
print list(compress(addresses, more5))

1. 17 from the dictionary Extract the subset

prices = {&#39;ACNE&#39;:45.23, &#39;AAPL&#39;:612.78, &#39;IBM&#39;:205.55, &#39;HPQ&#39;:37.20, &#39;FB&#39;:10.75}  
p1 = { key:value for key, value in prices.items() if value > 200 }  
print p1  
  
tech_names = {&#39;AAPL&#39;, &#39;IBM&#39;, &#39;HPQ&#39;}  
p2 = { key:value for key, value in prices.items() if key in tech_names }  
print p2  
  
p3 = dict( (key, value) for key, value in prices.items() if value > 200 ) #慢  
print p3  
  
tech_names = {&#39;AAPL&#39;, &#39;IBM&#39;, &#39;HPQ&#39;}  
p4 = { key:prices[key] for key in prices.keys() if key in tech_names } #慢  
print p4

1.18 Map the name to the elements of the sequence

from collections import namedtuple  
  
Subscriber = namedtuple(&#39;Subscriber&#39;, [&#39;addr&#39;, &#39;joined&#39;])  
sub = Subscriber(&#39;wang@qq.com&#39;, &#39;2020-10-10&#39;)  
print sub  
print sub.joined  
print sub.addr  
  
print len(sub)  
addr, joined = sub  
print addr  
print joined  
  
def compute_cost(records):  
    total = 0.0  
    for rec in records:  
        total += rec[1]*rec[2]  
    return total  
  
Stock = namedtuple(&#39;Stock&#39;, [&#39;name&#39;, &#39;shares&#39;, &#39;price&#39;])  
def compute_cost2(records):  
    total = 0.0  
    for rec in records:  
        s = Stock(*rec)  
        total += s.shares * s.price  
    return total  
  
s = Stock(&#39;ACME&#39;, 100, 123.45)  
print s  
#s.shares = 75    #error  
s = s._replace(shares=75)  
print s  
  
Stock = namedtuple(&#39;Stock&#39;, [&#39;name&#39;, &#39;shares&#39;, &#39;price&#39;, &#39;date&#39;, &#39;time&#39;])  
stock_prototype = Stock(&#39;&#39;,0, 0.0, None, None)  
def dict_to_stock(s):  
    return stock_prototype._replace(**s)  
a = {&#39;name&#39;:&#39;ACME&#39;, &#39;shares&#39;:100, &#39;price&#39;:123.45}  
print dict_to_stock(a)  
b = {&#39;name&#39;:&#39;ACME&#39;, &#39;shares&#39;:100, &#39;price&#39;:123.45, &#39;date&#39;:&#39;12/12/2012&#39;}  
print dict_to_stock(b)

1.19 Convert and convert the data at the same time

nums = [1, 2, 3, 4, 5]  
s = sum( x*x for x in nums )  
print s  
  
import os  
files = os.listdir(&#39;dirname&#39;)  
if any(name.endswith(&#39;.py&#39;) for name in files):  
    print(&#39;There be Python!&#39;)  
else:  
    print(&#39;sorry, no Python!&#39;)  
      
s = (&#39;ACME&#39;, 50, 123.45)  
print(&#39;,&#39;.join(str(x) for x in s))  
  
portfolio = [  
                {&#39;name&#39;:&#39;GOOG&#39;, &#39;shares&#39;:50},  
                {&#39;name&#39;:&#39;YHOO&#39;, &#39;shares&#39;:75},  
                {&#39;name&#39;:&#39;AOL&#39;, &#39;shares&#39;:20},  
                {&#39;name&#39;:&#39;SCOX&#39;, &#39;shares&#39;:65}  
             ]  
min_shares = min(s[&#39;shares&#39;] for s in portfolio)  
print min_shares      
  
min_shares = min(portfolio, key=lambda s: s[&#39;shares&#39;])  
print min_shares  
 1.20    将多个映射合并为单个映射
Java代码  
a = {&#39;x&#39;:1, &#39;z&#39;:3}  
b = {&#39;y&#39;:2, &#39;z&#39;:4}  
  
#from collections import ChainMap  
from pip._vendor.distlib.compat import ChainMap  
  
c = ChainMap(a, b)  
print(c[&#39;x&#39;])  
print(c[&#39;y&#39;])  
print(c[&#39;z&#39;]) #from a    第一个映射中的值  
  
print len(c)  
print list(c.values())  
  
c[&#39;z&#39;] = 10  
c[&#39;w&#39;] = 40  
del c[&#39;x&#39;]  
print a  
#del c[&#39;y&#39;]    #error    修改映射的操作总是会作用在列表的第一个映射结构上  
  
values = ChainMap()  
values[&#39;x&#39;] = 1  
values = values.new_child()#add a new map  
values[&#39;x&#39;] = 2  
values = values.new_child()  
values[&#39;x&#39;] = 3  
#print values  
print values[&#39;x&#39;]  
values = values.parents  
print values[&#39;x&#39;]  
values = values.parents  
print values[&#39;x&#39;]  
  
a = {&#39;x&#39;:1, &#39;z&#39;:3}  
b = {&#39;y&#39;:2, &#39;z&#39;:4}  
merged = dict(b)  
merged.update(a)  
print merged[&#39;x&#39;]  
print merged[&#39;y&#39;]  
print merged[&#39;z&#39;]  
a[&#39;x&#39;] = 13  
print merged[&#39;x&#39;]   #不会反应到合并后的字典中  
  
a = {&#39;x&#39;:1, &#39;z&#39;:3}  
b = {&#39;y&#39;:2, &#39;z&#39;:4}  
merged = ChainMap(a, b)  
print merged[&#39;x&#39;]  
a[&#39;x&#39;] = 42  
print merged[&#39;x&#39;]   #会反应到合并后的字典中



Statement:
The content of this article is voluntarily contributed by netizens, and the copyright belongs to the original author. This site does not assume corresponding legal responsibility. If you find any content suspected of plagiarism or infringement, please contact admin@php.cn
Previous article:python java callNext article:python java call