标签:rows date name C1PythonCookBook key print 2012
# set jupyter notebook theme
# jt -t monokai -f roboto -fs 12 -ofs 10
def SEE(var): # fail to write a format print function by myself
pass
#use ## (two pound signs for knowledge discription)
#use # (one spaces for common comments such as illustration of program,
# or some error information)
# callers_local_vars = inspect.currentframe().f_back.f_locals.items()
# a = [var_name for var_name, var_val in callers_local_vars if var_val is var]
# print(a,var)
# print(str(x)+":",x)
# import inspect
# def retrieve_name(var):
# callers_local_vars = inspect.currentframe().f_back.f_locals.items()
# return [var_name for var_name, var_val in callers_local_vars if var_val is var]
# A, B, C = [1, 2, 3], [4, 5, 6], [7, 8, 9]
# ls = [A, B, C]
# for i in ls:
# i1 = retrieve_name(i)[0]
# print('%s ='%i1, i)
# words = [
# 'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes',
# 'the', 'eyes', 'the', 'eyes', 'the', 'eyes', 'not', 'around', 'the',
# 'eyes', "don't", 'look', 'around', 'the', 'eyes', 'look', 'into',
# 'my', 'eyes', "you're", 'under'
# ]
# from collections import Counter
# word_counts = Counter(words)
# SEE(word_counts['eyes'])
# # word_counts['eyes']
1.1. Unpacking a Sequence into Separate Variables
data = ["monday",1,0.5,(1,2,3)]
x, y, z, t = data
print(x,y,z,t)
t1,t2,t3=t
_,middle,_ = t # a throwaway variable name
print(t1,t2,t3)
print(middle)
#x1,x2 = t
#ValueError: too many values to unpack (expected 2)
## unpack an iterable valuable such as a string, not just tuples or lists
str1 = "Hello"
a,b,c,d,e = str1
print(c)
monday 1 0.5 (1, 2, 3)
1 2 3
2
l
1.2. Unpacking Elements from Iterables of Arbitrary Length
## Star expression
def drop_first_and_last(grades):
first,*middle,last = grades
return avg(middle)
## Star expression
record = ["Crazy Dave",21,'wabbywabbo@PVZ.com','111-222-333','444-555-666',1]
name, age, mailaddress, *telephone = record
print(name,age,mailaddress,telephone)
# *telephone becomes a list whose elements are all two phonumbers and a number 1
## iterating over a sequence of tuples of varying length ,a sequence of tagged tuples
record = [
('foo',1,2),
('kee',10,11),
]
def do_foo(x, y):
print('foo',x,y)
def do_kee(x, y):
print('kee',x,y)
for tag, *args in record:
if tag == 'foo':
do_foo(*args) # don't forget the asterisk here
elif tag == 'kee':
do_kee(*args)
## Star unpacking can also be useful when combined with certain kinds of string processing
## operations, such as splitting.
line = 'nobody:*:-2:-2:Unprivileged User:/var/empty:/usr/bin/false'
uname, *fields, homedir, sh = line.split(':')
print(uname, homedir, sh)
print(f"fields:{fields}")
## Star unpacking binds throwable variable name
record = ('ACME',50, 123.45,(12,18,2012))
name, *_, (*_, year) = record
print(name, year)
## split a list into head and tail components
items = [0,1,2,3,4]
head, *tail = items
print(head)
print(f'tail:{tail}')
# print(f'*tail:{*tail}') SyntaxError: f-string: can't use starred expression here
print('*tail:',*tail)
Crazy Dave 21 wabbywabbo@PVZ.com ['111-222-333', '444-555-666', 1]
foo 1 2
kee 10 11
nobody /var/empty /usr/bin/false
fields:['*', '-2', '-2', 'Unprivileged User']
ACME 2012
0
tail:[1, 2, 3, 4]
*tail: 1 2 3 4
?1.3. Keeping the Last N Items
## what is "yields"? Page 24
## deque()
from collections import deque #NameError: name 'deque' is not defined
q = deque(maxlen=2) # deque() with maxlen setted
q.append(1)
q.append(2)
print(q)
q.append(3)
print(q)
q.appendleft(-1)
print(q)
q.popleft()
print(q)
q.pop()
print(q)
# if you don't set 'maxlen': there is something different
p=deque()
p.append(1)
p.append(2)
print(p)
p.append(3)
print(p)
p.appendleft(-1)
print(p)
p.pop()
print(p) # pop() will delete the newest elements in deque()
p.popleft()
print(p) # popleft() will delete the oldest elements in deque()
deque([1, 2], maxlen=2)
deque([2, 3], maxlen=2)
deque([-1, 2], maxlen=2)
deque([2], maxlen=2)
deque([], maxlen=2)
deque([1, 2])
deque([1, 2, 3])
deque([-1, 1, 2, 3])
deque([-1, 1, 2])
deque([1, 2])
1.4. Finding the Largest or Smallest N Items
## heapq module
import heapq
nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
print(heapq.nlargest(3,nums)) #print [42,37,23]
print(heapq.nsmallest(3,nums))#print [-4,1,2]
# using a key parameter in complicated data structure
portfolio = [
{'name': 'IBM', 'shares': 100, 'price': 91.1},
{'name': 'AAPL', 'shares': 50, 'price': 543.22},
{'name': 'FB', 'shares': 200, 'price': 21.09},
{'name': 'HPQ', 'shares': 35, 'price': 31.75},
{'name': 'YHOO', 'shares': 45, 'price': 16.35},
{'name': 'ACME', 'shares': 75, 'price': 115.65}
]
cheap = heapq.nsmallest(3,portfolio,key= lambda s:s['price'])
expensive = heapq.nlargest(3,portfolio,key = lambda s:s['price'])
print(cheap)
print(expensive)
# they work by first converting the data into a list where items are ordered as a heap
# The most important feature of a heap is that heap[0] is always the smallest item.
# heapq.heappop() method, which pops off the first item and replaces it with the next smallest item
# If you are simply trying to find the single smallest or largest item (N=1), it is faster to use min() and max().
# Similarly, if N is about the same size as the collection itself, it is usually faster to sort it first and take a slice (i.e., use sorted(items)[:N] or sorted(items)[-N:]).
nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
print(nums)
heap = list(nums)
print("before heapify:",heap)
heapq.heapify(heap)
print("after heapify: ",heap)
print("pop:",heapq.heappop(heap))
print(heap)
print("pop:",heapq.heappop(heap))
print(heap)
print("pop:",heapq.heappop(heap))
print(heap)
[42, 37, 23]
[-4, 1, 2]
[{'name': 'YHOO', 'shares': 45, 'price': 16.35}, {'name': 'FB', 'shares': 200, 'price': 21.09}, {'name': 'HPQ', 'shares': 35, 'price': 31.75}]
[{'name': 'AAPL', 'shares': 50, 'price': 543.22}, {'name': 'ACME', 'shares': 75, 'price': 115.65}, {'name': 'IBM', 'shares': 100, 'price': 91.1}]
[1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
before heapify: [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
after heapify: [-4, 2, 1, 23, 7, 2, 18, 23, 42, 37, 8]
pop: -4
[1, 2, 2, 23, 7, 8, 18, 23, 42, 37]
pop: 1
[2, 2, 8, 23, 7, 37, 18, 23, 42]
pop: 2
[2, 7, 8, 23, 42, 37, 18, 23]
?1.5. Implementing a Priority Queue
## some problem in self: push(self, item, priority) ,q.push(Item('foo'),1)
1.6. Mapping Keys to Multiple Values in a Dictionary
## defaultdict
from collections import defaultdict
d = defaultdict(list) # list use .append() method to add elements
d['a'].append(1)
d['a'].append(2)
d['b'].append(4)
print(d)
d = defaultdict(set) # set use .add() method to add elements
d['a'].add(1)
d['a'].add(2)
d['a'].add(1)
d['b'].add(4)
print(d)
d = {} # a regular dictionary use setdefault() method to add elements
# you should use code as // d[key] = []; to initialize a regular dictionary before you add elements in it
d.setdefault('a',[]).append(1)
d.setdefault('a',[]).append(2)
d.setdefault('b',[]).append(3)
print(d)
defaultdict(<class 'list'>, {'a': [1, 2], 'b': [4]})
defaultdict(<class 'set'>, {'a': {1, 2}, 'b': {4}})
{'a': [1, 2], 'b': [3]}
1.7. Keeping Dictionaries in Order
## OrderedDict
from collections import OrderedDict
d = OrderedDict()
# It exactly preserves the original insertion order of data when iterating
d['foo'] = 1
d['Crazy'] = 2
d['Dave'] = 3
for key in d:
print(key, d[key])
import json # json.dumps()
json.dumps(d)
foo 1
Crazy 2
Dave 3
'{"foo": 1, "Crazy": 2, "Dave": 3}'
1.8. Calculating with Dictionaries
## sorted(zip())
prices = {
'ACME': 45.23,
'AAPL': 612.78,
'IBM': 205.55,
'HPQ': 37.20,
'FB': 10.75
}
max_price = max(zip(prices.values(),prices.keys()))
min_price = min(zip(prices.values(),prices.keys()))
print("max price:",max_price)
print("min price:",min_price)
sorted_prices = sorted(zip(prices.values(),prices.keys()))
print(sorted_prices)
# zip() creates an iterator that can only be consumed once.
price_and_names = zip(prices.values(),prices.keys())
print("price_and_names:",price_and_names)
print(min(price_and_names))
#print(max(price_and_names)) # ValueError: max() arg is an empty sequence
print(max(prices)) # max of key
print(max(prices.values())) # use values() to find max or min
print(min(prices, key=lambda k:prices[k])) # use lambda to find key
print(prices[min(prices, key=lambda k:prices[k])]) # use lambda and key to find value
values_equal_key_differ = {'AAA':12,'ZZZ':12}
print("when values are equal, compare to keys, max:",\
max(zip(values_equal_key_differ.values(),values_equal_key_differ.keys())))
max price: (612.78, 'AAPL')
min price: (10.75, 'FB')
[(10.75, 'FB'), (37.2, 'HPQ'), (45.23, 'ACME'), (205.55, 'IBM'), (612.78, 'AAPL')]
price_and_names: <zip object at 0x0000025DC6C204C0>
(10.75, 'FB')
IBM
612.78
FB
10.75
when values are equal, compare to keys, max: (12, 'ZZZ')
1.9. Finding Commonalities in Two Dictionaries
## & - operations on dictionaries
a = {
'x':1,
'y':2,
'z':3
}
b = {
'w':1,
'y':2,
'z':-3
}
print("a:",a)
print("b:",b)
print("a.keys() & b.keys()",a.keys() & b.keys())
print("a.keys() - b.keys()",a.keys() - b.keys())
print("a.items() & b.items()",a.items() & b.items())
# make a new dictionary with certain keys removed
c = {key:a[key] for key in a.keys() - {'w','z'}}
print("c:",c)
a: {'x': 1, 'y': 2, 'z': 3}
b: {'w': 1, 'y': 2, 'z': -3}
a.keys() & b.keys() {'z', 'y'}
a.keys() - b.keys() {'x'}
a.items() & b.items() {('y', 2)}
c: {'x': 1, 'y': 2}
?1.10. Removing Duplicates from a Sequence while Maintaining Order
## I don't know what is a generator and what's its function here
1.11. Naming a Slice
## Nameing a slice
###### 0123456789012345678901234567890123456789012345678901234567890'
record = '....................100 .......513.25 ..........'
# readability and maintenance mess
cost = int(record[20:32]) * float(record[40:50])
print(cost)
# naming a slice
SHARE = slice(20,32)
PRICE = slice(40,50)
print("SHARE=record[20:32] :",SHARE)
SHARE_RECORD = slice(20,32,record)
print("SHARE_RECORD = slice(20,32,record) :",SHARE_RECORD)
#print(int(record[SHARE_RECORD])) //TypeError: slice indices must be integers or None or have an __index__ method
print(int(record[SHARE]))
print(float(record[PRICE]))
result = int(record[SHARE]) * float(record[PRICE])
print(result)
# more about slice()
items = [0,1,2,3,4,5,6]
a = slice(2,4) # elements on index 4 is not included in a
print(items[2:4],items[a])
items[a] = [-2,-3]
print(items)
del items[a]
print(items)
# s.start , s.stop(), s.step()
b = slice(1, 5, 2)
print("b:",b)
print("b.start:",b.start)
print("b.stop:",b.stop)
print("b.step:",b.step)
print("b.indices:",b.indices)
items = [0,1,2,3,4,5,6]
print("items:",items)
print("items[b]:",items[b])
# map a slice onto a sequence of a specific size by using its indices(size) method
# suitably limited to fit within bounds (as to avoid IndexError exceptions when indexing).
c = slice(10,50,2)
str = 'HelloWorld'
print("c:",c)
c.indices(len(str))
print("c.indices(len(str)):",c.indices(len(str)))
c = slice(20,50,2)
print("c:",c)
print("c.indices(len(str)):",c.indices(len(str)))
c = slice(1,20,2)
print("c:",c)
print("c.indices(len(str)):",c.indices(len(str)))
print("HelloWorld\n"+
"0123456789")
for i in range(*c.indices(len(str))):
print(str[i])
51325.0
SHARE=record[20:32] : slice(20, 32, None)
SHARE_RECORD = slice(20,32,record) : slice(20, 32, '....................100 .......513.25 ..........')
100
513.25
51325.0
[2, 3] [2, 3]
[0, 1, -2, -3, 4, 5, 6]
[0, 1, 4, 5, 6]
b: slice(1, 5, 2)
b.start: 1
b.stop: 5
b.step: 2
b.indices: <built-in method indices of slice object at 0x00000249C116C9C0>
items: [0, 1, 2, 3, 4, 5, 6]
items[b]: [1, 3]
c: slice(10, 50, 2)
c.indices(len(str)): (10, 10, 2)
c: slice(20, 50, 2)
c.indices(len(str)): (10, 10, 2)
c: slice(1, 20, 2)
c.indices(len(str)): (1, 10, 2)
HelloWorld
0123456789
e
l
W
r
d
1.12. Determining the Most Frequently Occurring Items in a Sequence
## collecions.Counter.mostcommon()
words = [
'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes',
'the', 'eyes', 'the', 'eyes', 'the', 'eyes', 'not', 'around', 'the',
'eyes', "don't", 'look', 'around', 'the', 'eyes', 'look', 'into',
'my', 'eyes', "you're", 'under'
]
from collections import Counter
word_counts = Counter(words)
top_three = Counter.most_common(word_counts,3)
# top_three = word_counts.most_common(3) //can replace
print(top_three)
# actually Counter is a dictionary that maps the items to the number of occurrences.
print("word_counts['eyes']:",word_counts['eyes'])
# increment the count manually, simply use addition:
print("before add morewords:",word_counts)
morewords = ['why','are','you','not','looking','in','my','eyes']
for word in morewords:
word_counts[word] += 1
print("after add morewords:",word_counts)
# word_counts.update(morewords) // alternatively use .update() method
# use various mathematical operations on Counter instances
a = Counter(words)
b = Counter(morewords)
print("\n",a + b)
print(a - b)
[('eyes', 8), ('the', 5), ('look', 4)]
word_counts['eyes']: 8
before add morewords: Counter({'eyes': 8, 'the': 5, 'look': 4, 'into': 3, 'my': 3, 'around': 2, 'not': 1, "don't": 1, "you're": 1, 'under': 1})
after add morewords: Counter({'eyes': 9, 'the': 5, 'look': 4, 'my': 4, 'into': 3, 'not': 2, 'around': 2, "don't": 1, "you're": 1, 'under': 1, 'why': 1, 'are': 1, 'you': 1, 'looking': 1, 'in': 1})
Counter({'eyes': 9, 'the': 5, 'look': 4, 'my': 4, 'into': 3, 'not': 2, 'around': 2, "don't": 1, "you're": 1, 'under': 1, 'why': 1, 'are': 1, 'you': 1, 'looking': 1, 'in': 1})
Counter({'eyes': 7, 'the': 5, 'look': 4, 'into': 3, 'my': 2, 'around': 2, "don't": 1, "you're": 1, 'under': 1})
1.13. Sorting a List of Dictionaries by a Common Key Problem
## operator.itemgetter()
rows = [
{'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
{'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
{'fname': 'John', 'lname': 'Cleese', 'uid': 1001},
{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}
]
from operator import itemgetter
# sortBy_fname = sorted(rows,key='fname') //TypeError: 'str' object is not callable
rows_by_fname = sorted(rows,key=itemgetter('fname'))
rows_by_uid_reverse = sorted(rows,key=itemgetter('uid'),reverse=True)
# multiple keys
rows_by_lfnames = sorted(rows,key=itemgetter('lname','fname'))
print("rows_by_fname:\n",rows_by_fname)
print("rows_by_uid_reverse:\n",rows_by_uid_reverse)
print("rows_by_lfnames:\n",rows_by_lfnames)
# replaced by lambda expressions
rows_by_fname_lambda = sorted(rows,key= lambda k:k['fname'])
# rows_by_lfname_lambda = sorted(rows,key= lambda k:k['lname','fname']) //WRONG
# rows_by_lfname_lambda = sorted(rows,key= lambda k:k['lname'],['fname']) //WRONG
# //SyntaxError: positional argument follows keyword argument
# rows_by_lfname_lambda = sorted(rows,key= lambda k:k['lname'],k['fname']) //WRONG
# //SyntaxError: positional argument follows keyword argument
# rows_by_lfname_lambda = sorted(rows,key= lambda k:k(['lname'],['fname'])) //WRONG
# //TypeError: 'dict' object is not callable
rows_by_lfname_lambda = sorted(rows,key= lambda k:(k['lname'],k['fname']))
print("rows_by_fname_lambda:\n",rows_by_fname_lambda)
print("rows_by_lfname_lambda:\n",rows_by_lfname_lambda)
# max() min() method
print("\n",min(rows, key=itemgetter('uid')))
rows_by_fname:
[{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}]
rows_by_uid_reverse:
[{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}]
rows_by_lfnames:
[{'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}, {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}]
rows_by_fname_lambda:
[{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}]
rows_by_lfname_lambda:
[{'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}, {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}]
{'fname': 'John', 'lname': 'Cleese', 'uid': 1001}
1.14. Sorting Objects Without Native Comparison Support
## use lambda or operator.attrgetter() to sort objects
class User:
def __init__(self, user_id):
self.user_id = user_id
def __repr__(self):
# return 'User({})'.format(user_id) // name 'user_id' is not defined
return 'User({})'.format(self.user_id)
users = [User(3), User(250), User(-1)]
print("before sorting, users:",users)
# sorted(users, key= lambda u:u.user_id) //this will not exist when running next line
print("after sorting, users:",sorted(users, key= lambda u:u.user_id))
# an alternative approach is to use operator.attrgetter()
from operator import attrgetter
print("use attrgetter():",sorted(users,key=attrgetter('user_id')))
# attrgetter() is analogous to itemgetter(), they are both a tad bit faster than lambda
# expression, and they both support multiple fields when sorting, for example, you can run
# by_name = sorted(users, key=attrgetter('last_name', 'first_name'))
# Also, max() min() methods are also OK
print("min user_id:",min(users, key=attrgetter('user_id')))
before sorting, users: [User(3), User(250), User(-1)]
after sorting, users: [User(-1), User(3), User(250)]
use attrgetter(): [User(-1), User(3), User(250)]
min user_id: User(-1)
1.15. Grouping Records Together Based on a Field
## Group a list of dictionaries by 'date'
rows = [
{'address': '5412 N CLARK', 'date': '07/01/2012'},
{'address': '5148 N CLARK', 'date': '07/04/2012'},
{'address': '5800 E 58TH', 'date': '07/02/2012'},
{'address': '2122 N CLARK', 'date': '07/03/2012'},
{'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
{'address': '1060 W ADDISON', 'date': '07/02/2012'},
{'address': '4801 N BROADWAY', 'date': '07/01/2012'},
{'address': '1039 W GRANVILLE', 'date': '07/04/2012'},
]
# first sort by 'date'
# Since groupby() only examines consecutive items, failing to sort first won’t group
# the records as you want.
from operator import itemgetter
rows.sort(key=itemgetter('date')) # use itemgetter()
print(rows)
# then group by 'date', use itertools.groupby()
from itertools import groupby
print("groupby(rows,itemgetter('date')): \n",groupby(rows,itemgetter('date')))
for date,items in groupby(rows,itemgetter('date')):
print(date)
print(items)
for item in items:
print("\t",item)
print("\n")
# group the data together by dates into a large data structure
# by this way, you don't need to sort the records first
from collections import defaultdict
rows_by_date = defaultdict(list)
rows = [
{'address': '5412 N CLARK', 'date': '07/01/2012'},
{'address': '5148 N CLARK', 'date': '07/04/2012'},
{'address': '5800 E 58TH', 'date': '07/02/2012'},
{'address': '2122 N CLARK', 'date': '07/03/2012'},
{'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
{'address': '1060 W ADDISON', 'date': '07/02/2012'},
{'address': '4801 N BROADWAY', 'date': '07/01/2012'},
{'address': '1039 W GRANVILLE', 'date': '07/04/2012'},
]
# group items by 'date' in a multidict
for row in rows:
rows_by_date[row['date']].append(row)
# print rows_by_date in a beautiful way
for date in rows_by_date:
print(rows_by_date[date])
[{'address': '5412 N CLARK', 'date': '07/01/2012'}, {'address': '4801 N BROADWAY', 'date': '07/01/2012'}, {'address': '5800 E 58TH', 'date': '07/02/2012'}, {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'}, {'address': '1060 W ADDISON', 'date': '07/02/2012'}, {'address': '2122 N CLARK', 'date': '07/03/2012'}, {'address': '5148 N CLARK', 'date': '07/04/2012'}, {'address': '1039 W GRANVILLE', 'date': '07/04/2012'}]
groupby(rows,itemgetter('date')):
<itertools.groupby object at 0x00000249A87B23B0>
07/01/2012
<itertools._grouper object at 0x00000249C162A670>
{'address': '5412 N CLARK', 'date': '07/01/2012'}
{'address': '4801 N BROADWAY', 'date': '07/01/2012'}
07/02/2012
<itertools._grouper object at 0x00000249C1429370>
{'address': '5800 E 58TH', 'date': '07/02/2012'}
{'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'}
{'address': '1060 W ADDISON', 'date': '07/02/2012'}
07/03/2012
<itertools._grouper object at 0x00000249C0FD54C0>
{'address': '2122 N CLARK', 'date': '07/03/2012'}
07/04/2012
<itertools._grouper object at 0x00000249C0FE3940>
{'address': '5148 N CLARK', 'date': '07/04/2012'}
{'address': '1039 W GRANVILLE', 'date': '07/04/2012'}
[{'address': '5412 N CLARK', 'date': '07/01/2012'}, {'address': '4801 N BROADWAY', 'date': '07/01/2012'}]
[{'address': '5148 N CLARK', 'date': '07/04/2012'}, {'address': '1039 W GRANVILLE', 'date': '07/04/2012'}]
[{'address': '5800 E 58TH', 'date': '07/02/2012'}, {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'}, {'address': '1060 W ADDISON', 'date': '07/02/2012'}]
[{'address': '2122 N CLARK', 'date': '07/03/2012'}]
1.16. Filtering Sequence Elements
# filter sequence data use a list comprehension
mylist = [1, 4, -5, 10, -7, 2, 3, -1]
print("mylist:",mylist)
print("filter sequence data use a list comprehension:")
print([n for n in mylist if n > 0 ])
print([n for n in mylist if n < 0 ])
# use generator expressions
pos = (n for n in mylist if n > 0)
print("generator expressions:",pos)
for x in pos:
print(x)
# use filter when criteria is complicated or involves exception handling
values = ['1', '2', '-3', '-', '4', 'N/A', '5']
def is_int(val):
try: # pay attention to how to write try-except clauses in Python
x = int(val)
return True
except ValueError:
return False
int_eles = list(filter(is_int,values))
print("values:",values)
print("use filter function to get int elements:",int_eles)
# transform the data when using a list comprehension
mylist = [1, 4, -5, 10, -7, 2, 3, -1]
import math
print("filter all positive elements and do sqrt() on them at the same time:")
print([math.sqrt(n) for n in mylist if n > 0])
# use if-else clause to replace values that don't meet the criteria
print("use if-else clause to replace values that don't meet the criteria:")
# print([n for n in mylist if n > 0 else 0]) WRONG
print([n if n > 0 else 0 for n in mylist])# note that if-else clause should be placed ahead
# use itertools.compress
addresses = [
'5412 N CLARK',
'5148 N CLARK',
'5800 E 58TH',
'2122 N CLARK'
'5645 N RAVENSWOOD',
'1060 W ADDISON',
'4801 N BROADWAY',
'1039 W GRANVILLE',
]
counts = [ 0, 3, 10, 4, 1, 7, 6, 1]
from itertools import compress
more5 = [n > 5 for n in counts]
print("more5:",more5)
selected_addresses = list(compress(addresses, more5))
print("use compress:\n",selected_addresses)
mylist: [1, 4, -5, 10, -7, 2, 3, -1]
filter sequence data use a list comprehension:
[1, 4, 10, 2, 3]
[-5, -7, -1]
generator expressions: <generator object <genexpr> at 0x00000249C2D83C80>
1
4
10
2
3
values: ['1', '2', '-3', '-', '4', 'N/A', '5']
use filter function to get int elements: ['1', '2', '-3', '4', '5']
filter all positive elements and do sqrt() on them at the same time:
[1.0, 2.0, 3.1622776601683795, 1.4142135623730951, 1.7320508075688772]
use if-else clause to replace values that don't meet the criteria:
[1, 4, 0, 10, 0, 2, 3, 0]
more5: [False, False, True, False, False, True, True, False]
use compress:
['5800 E 58TH', '4801 N BROADWAY', '1039 W GRANVILLE']
1.17. Extracting a Subset of a Dictionary
# use dictionary comprehension
prices = {
'ACME': 45.23,
'AAPL': 612.78,
'IBM': 205.55,
'HPQ': 37.20,
'FB': 10.75
}
print("all stocks:",prices)
# pMore200 = {key:value for key:value in prices if value > 200}WRONG
# ^ SyntaxError: invalid syntax
# pMore200 = {key:value for value in prices.values() if value > 200}
# ^ NameError: name 'key' is not defined
# pMore200 = {key:value for key,value in prices.values() if value > 200}
# TypeError: cannot unpack non-iterable float object
pMore200 = {key:value for key,value in prices.items() if value > 200}
print("price more than 200:",pMore200)
# Make a dictionary of tech stocks
tech_names = { 'AAPL', 'IBM', 'HPQ', 'MSFT' }
tech_stocks = {key:value for key,value in prices.items() if key in tech_names}
print("tech_stocks:",tech_stocks)
# some other slower way
pMore200_dict = dict((key,value) for key,value in prices.items() if value > 200)
print(pMore200_dict)
tech_stocks_only_key = {key:prices[key] for key in prices.keys() & tech_names}
print(tech_stocks_only_key)
all stocks: {'ACME': 45.23, 'AAPL': 612.78, 'IBM': 205.55, 'HPQ': 37.2, 'FB': 10.75}
price more than 200: {'AAPL': 612.78, 'IBM': 205.55}
tech_stocks: {'AAPL': 612.78, 'IBM': 205.55, 'HPQ': 37.2}
{'AAPL': 612.78, 'IBM': 205.55}
{'AAPL': 612.78, 'IBM': 205.55, 'HPQ': 37.2}
1.18. Mapping Names to Sequence Elements
# collection,namedtuple()
from collections import namedtuple
Subscriber = namedtuple('Subscriber',['addr','joined'])
sub = Subscriber('jonesy@sample.com','2012-10-18')
print("sub:",sub)
print("sub.addr:",sub.addr)
print("sub.joined:",sub.joined)
# namedtuple supports operations on tuple
print("len(sub):",len(sub))
addr, joined = sub
print(addr,joined)
# use name to access a namedtuple
Stock = namedtuple('Stock',['name','shares','price'])
def compute_cost_by_position(records):
total = 0.0
for rec in records:
total += rec[1] * rec[2]
return total
def compute_cost_by_name(records):
total = 0.0
for rec in records:
s = Stock(*rec)
total += s.shares * s.price
return total
# namedtuple as a replacement of a dictionary, namedtuple is immutable and saves memory
s = Stock('ALIBABA',100,123.45)
print("s:",s)
# s.shares = 75 //AttributeError: can't set attribute
# use ._replace() to creat a new instances with values replaced
s = s._replace(shares = 75)
print("s:",s)
NewStock = namedtuple('NewStock',['name','shares','price','date','time'])
# create a prototype instance
newstock_prototype = NewStock('',0,0.0,None,None)
# Function to convert a dictionary to a NewStock
def dict_to_newstock(s):
return newstock_prototype._replace(**s)
a = {'name': 'ACME', 'shares': 100, 'price': 123.45}
print("a:",a)
print("dict_to_newstock(a):",dict_to_newstock(a))
b = {'name': 'ACME', 'shares': 100, 'price': 123.45, 'date': '12/17/2012'}
print("b:",b)
print("dict_to_newstock(b):",dict_to_newstock(b))
sub: Subscriber(addr='jonesy@sample.com', joined='2012-10-18')
sub.addr: jonesy@sample.com
sub.joined: 2012-10-18
len(sub): 2
jonesy@sample.com 2012-10-18
s: Stock(name='ALIBABA', shares=100, price=123.45)
s: Stock(name='ALIBABA', shares=75, price=123.45)
a: {'name': 'ACME', 'shares': 100, 'price': 123.45}
dict_to_newstock(a): NewStock(name='ACME', shares=100, price=123.45, date=None, time=None)
b: {'name': 'ACME', 'shares': 100, 'price': 123.45, 'date': '12/17/2012'}
dict_to_newstock(b): NewStock(name='ACME', shares=100, price=123.45, date='12/17/2012', time=None)
1.19. Transforming and Reducing Data at the Same Time
# some examples of generator-expression agument
# Calculate the sum of squares
nums = [1,2,3,4]
s = sum(x*x for x in nums)
print(s)
# Determine if any .py files exist in a directory
import os
files = os.listdir('D:\CODE\LearnPythonHardWay')
if any(name.endswith('.py') for name in files):
print('There be python!')
else:
print("Sorry, no python!")
# # Output a tuple as CSV
s = ('ACME', 50, 123.45)
print(s)
# print(','.join(x if type(x)==type("a") else str(x) for x in s))
# print(','.join(str(x) for x in s)) // this statement in book can't run
# TypeError: 'str' object is not callable
# Too difficult to write by myself
# Data reduction across fields of a data structure
portfolio = [
{'name':'GOOG', 'shares': 50},
{'name':'YHOO', 'shares': 75},
{'name':'AOL', 'shares': 20},
{'name':'SCOX', 'shares': 65}
]
min_shares = min(s['shares'] for s in portfolio)
print(min_shares)
## single argument to a generator function
## you don’t need repeated parentheses
# round brackets
s = sum((x * x for x in nums)) # Pass generator-expr as argument
s = sum(x * x for x in nums) # More elegant syntax
## create an extra temporary list, waste extra memory
#square brackets
s = sum([x * x for x in nums])
## max()\min() accept a key argument you can use a generator
# Original: Returns 20
min_shares = min(s['shares'] for s in portfolio)
print("not use key argument in a min generator:",min_shares)
# Alternative: Returns {'name': 'AOL', 'shares': 20}
min_shares = min(portfolio, key=lambda s: s['shares'])
print("use key argument in a min generator:",min_shares)
30
There be python!
('ACME', 50, 123.45)
20
not use key argument in a min generator: 20
use key argument in a min generator: {'name': 'AOL', 'shares': 20}
1.20. Combining Multiple Mappings into a Single Mapping
# collections.chainmap
from collections import ChainMap
a = {'x':1,'y':2}
b = {'y':3,'z':4}
c = ChainMap(a,b)
print(c['x'])
print(c['y'])
print(c['z'])
## a ChainMap simply keeps a list of the underlying mappings and redefines common
## dictionary operations to scan the list.
print("c:",c)
print("len(c):",len(c))
print("list(c.keys()):",list(c.keys()))
print("list(c.values()):",list(c.values()))
## Operations that mutate the mapping always affect the first mapping listed.
print("before change c, a:",a)
print("c:",c)
c['y'] = -2
c['z'] = -4
c['k'] = 100
print("after change c, a:",a)
print("c:",c)
# .new_child or .parent
d = ChainMap()
d['x'] = 1
print(d)
print("d['x']:",d['x'])
d = d.new_child()
d['x'] = 2
print(d)
print("d['x']:",d['x'])
d = d.new_child()
d['y'] = 100
print(d)
print("d['x']:",d['x'])
# d = d.parents()//ypeError: 'ChainMap' object is not callable
d = d.parents
print(d)
print("d['x']:",d['x'])
# merge a dict
a = {'x':1,'y':2}
b = {'y':3,'z':4}
merge_dict = dict(b)
merge_dict.update(a)# the same key, keep value in a
print("merge_dict:",merge_dict)
# don’t get reflected in the merged dictionary
a['x'] = 999
print(a['x'],merge_dict['x'])
a = {'x':1,'y':2}
b = {'y':3,'z':4}
# Notice change to merged dicts
c = ChainMap(a,b)
print("c:",c)
a['x'] = 999
print(a['x'],c['x'])
c['x'] = -999
print(a['x'],c['x'])
1
2
4
c: ChainMap({'x': 1, 'y': 2}, {'y': 3, 'z': 4})
len(c): 3
list(c.keys()): ['y', 'z', 'x']
list(c.values()): [2, 4, 1]
before change c, a: {'x': 1, 'y': 2}
c: ChainMap({'x': 1, 'y': 2}, {'y': 3, 'z': 4})
after change c, a: {'x': 1, 'y': -2, 'z': -4, 'k': 100}
c: ChainMap({'x': 1, 'y': -2, 'z': -4, 'k': 100}, {'y': 3, 'z': 4})
ChainMap({'x': 1})
d['x']: 1
ChainMap({'x': 2}, {'x': 1})
d['x']: 2
ChainMap({'y': 100}, {'x': 2}, {'x': 1})
d['x']: 2
ChainMap({'x': 2}, {'x': 1})
d['x']: 2
merge_dict: {'y': 2, 'z': 4, 'x': 1}
999 1
c: ChainMap({'x': 1, 'y': 2}, {'y': 3, 'z': 4})
999 999
-999 -999
标签:rows,date,name,C1PythonCookBook,key,print,2012 来源: https://www.cnblogs.com/Matrix-250/p/16441073.html
本站声明: 1. iCode9 技术分享网(下文简称本站)提供的所有内容,仅供技术学习、探讨和分享; 2. 关于本站的所有留言、评论、转载及引用,纯属内容发起人的个人观点,与本站观点和立场无关; 3. 关于本站的所有言论和文字,纯属内容发起人的个人观点,与本站观点和立场无关; 4. 本站文章均是网友提供,不完全保证技术分享内容的完整性、准确性、时效性、风险性和版权归属;如您发现该文章侵犯了您的权益,可联系我们第一时间进行删除; 5. 本站为非盈利性的个人网站,所有内容不会用来进行牟利,也不会利用任何形式的广告来间接获益,纯粹是为了广大技术爱好者提供技术内容和技术思想的分享性交流网站。