Memo - Python
This is one of my memos about Python language.
Memos are used for recording the problems I encountered during programming and its solutions. I also write down something that is hard to remember for myself.
Links
Basic
# Get all attributes of a object
dir(object_name)
# Show the type of a object
type(object_name)
# Check the type
isinstance(obj_name, class_name)
# Main
if __name__ == "__main__":
pass
Flow of Control
# Loop with index
for index, itme in enumerate(itmes):
print(index, item)
List
# Flatten list
[item for sublist in target for item in sublist]
# Another delicate flatten way: http://stackoverflow.com/a/40857703/6243174
from collections import Iterable
def flatten(items):
"""Yield items from any nested iterable; see REF."""
for x in items:
if isinstance(x, Iterable):
yield from flatten(x)
else:
yield x
list(flatten(l)) # list of lists
#[1, 2, 3, 4, 5, 6, 7, 8, 9]
items = [[1, [2]], (3, 4, {5, 6}, 7), 8, 9] # numbers & mixed containers
list(flatten(items))
#[1, 2, 3, 4, 5, 6, 7, 8, 9]
# Filter empty strings: http://stackoverflow.com/a/3845453
str_list = filter(None, str_list) # fastest
str_list = filter(bool, str_list) # fastest
str_list = filter(len, str_list) # a bit of slower
str_list = filter(lambda item: item, str_list) # slower than list comprehension
Dict
# OrderedDict http://stackoverflow.com/questions/10844064
from collections import OrderedDict
json.dumps(OrderedDict([("a", 1), ("b", 2)])) # '{"a": 1, "b": 2}'
# OrderedDict Since Python 3.6
json.dumps(OrderedDict(a=1, b=2)) # '{"a": 1, "b": 2}'
String
Regular expression
re.findall(r'\d+\.?\d+', "116°23'56.97\"E") # ['116', '23', '56.97']
Class
Improve Your Python: Python Classes and Object Oriented Programming
File operations
Basic
with open(path, 'w') as f:
pass
# https://docs.python.org/2/library/shutil.html#shutil.move
import shutil
shutil.move(src, dst)
Read Excel
Python Excel Tutorial: The Definitive Guide
openpyxl - A Python library to read/write Excel 2010 xlsx/xlsm files.
from openpyxl import load_workbook
# Load xlsx file
wb = load_workbook(filename='tower.xlsx', read_only=True)
# Get the first sheet, ws:<ReadOnlyWorksheet>
ws = wb[wb.get_sheet_names()[0]]
Write csv & xlsx
import csv
from openpyxl import Workbook
from openpyxl.writer.write_only import WriteOnlyCell
class Catloger(object):
def save2csv(self, path):
filename = 'catalog.csv'
path = os.path.join(path, filename)
with open(path, 'w') as csvfile:
fieldnames = ['序号', '起始页', 'Topic', '内容摘录']
writer = csv.DictWriter(csvfile, fieldnames)
writer.writeheader()
writer.writerows(self.cata_list)
print('Save csv file to:', path)
def save2xlsx(self, path):
filename = 'catalog.xlsx'
path = os.path.join(path, filename)
wb = Workbook(write_only=True)
ws = wb.create_sheet()
fieldnames = ['序号', '起始页', 'Topic', '内容摘录']
ws.append(fieldnames)
for cata in self.cata_list:
print(cata['内容摘录'])
content_cell = WriteOnlyCell(ws, value=cata['内容摘录'])
content_cell.style.alignment.wrap_text = True
ws.append([cata['序号'], cata['起始页'], cata['Topic'], content_cell])
wb.save(path)
Format
Basic Data Structure
# Trim whitespace
s.strip()
# Extract number from string
import re
re.findall("[-+]?\d+[\.]?\d*[eE]?[-+]?\d*", '15.1米') # ['15.1']
# Decimal points
round(num, 2)
# JSON, dict or array to JSON string,
# ensure_ascii=False make JSON string encoding in UTF-8 rather than ASCII
json.dumps(obj, indent=4, ensure_ascii=False)
Geographic coordinates
import re
def dms2dd(degrees, minutes, seconds, direction):
dd = float(degrees) + float(minutes)/60 + float(seconds)/(60*60);
if direction == 'S' or direction == 'W':
dd *= -1
return dd;
def parse_dms(dms):
try:
return float(dms)
except:
parts = re.split('[^\d\w]+', dms)
lat_or_lng = dms2dd(parts[0], parts[1], parts[2], parts[3])
return lat_or_lng
lag = '116°23\'56.97"E'
parse_dms(lag) # 116.3992
Datetime
import datetime
object.strftime('%Y-%m-%d %H:%M:%S') # 2017-05-01 08:18:05
Numpy
import numpy as np
# ReLU fastest way
np.maximum(x, 0, x)
# won't modify x, the fastest way
x * (x > 0)
# dReLU, set 0 for x = 0
1. * (x > 0)
Forward learning
I figure out that group these memos by event is more efficient. So I can remember the story behind the code and remember them faster. After all, I’m writing memo rather than tutorial.
2017.10
Array slice
- Array slice ( when see sample of filename ).
a[start:end] # items start through end-1
a[start:] # items start through the rest of the array
a[:end] # items from the beginning through end-1
a[:] # a copy of the whole array
a[start:end:step] # start through not past end, by step
a[-1] # last item in the array
a[-2:] # last two items in the array
a[:-2] # everything except the last two items
Image processing
import matplotlib.pyplot as plt
from scipy import misc
img = misc.imread(file_path)
plt.imshow(img)
print(tpye(img), img.shape, img.dtype)
result = [item.operation() for item in items]
Exceptions and assert
Asserts should be used to test conditions that should never happen. The purpose is to crash early in the case of a corrupt program state.
Exceptions should be used for errors that can conceivably happen, and you should almost always create your own Exception classes.
Count duplicate items
class_summary = {key:img_classes.count(key) for key in img_classes}
Computing
Packages
- numpy
- scipy
- matplotlib
- scikit-learn
- pandas
- numexpr
- bottleneck
- Pillow
- ipykernel
- tensorflow-gpu
Jupyter Notebook
- Jupyter (IPython) notebooks features
- 28 Jupyter Notebook tips, tricks and shortcuts
- Node kernel - IJavascript
npm install -g ijavascript
ijinstall
jupyter notebook
# Get the content of *.py into jupyter notebook
load *.py
# Get the content of a function
function_name??
Leave a Comment
Your email address will not be published. Required fields are marked *