Memo - Python

This is one of my memos about Python language.

Memos are used for recording the problems I encountered during programming and its solutions. I also write down something that is hard to remember for myself.


Basic

# Get all attributes of a object
dir(object_name)
# Show the type of a object
type(object_name)
# Check the type
isinstance(obj_name, class_name)
# Main
if __name__ == "__main__":
    pass

Flow of Control

# Loop with index
for index, itme in enumerate(itmes):
    print(index, item)

List

# Flatten list
[item for sublist in target for item in sublist]

# Another delicate flatten way: http://stackoverflow.com/a/40857703/6243174
from collections import Iterable

def flatten(items):
    """Yield items from any nested iterable; see REF."""
    for x in items:
        if isinstance(x, Iterable):
            yield from flatten(x)
        else:
            yield x

list(flatten(l))                               # list of lists
#[1, 2, 3, 4, 5, 6, 7, 8, 9]

items = [[1, [2]], (3, 4, {5, 6}, 7), 8, 9]    # numbers & mixed containers
list(flatten(items))
#[1, 2, 3, 4, 5, 6, 7, 8, 9]

# Filter empty strings: http://stackoverflow.com/a/3845453
str_list = filter(None, str_list) # fastest
str_list = filter(bool, str_list) # fastest
str_list = filter(len, str_list)  # a bit of slower
str_list = filter(lambda item: item, str_list) # slower than list comprehension

Dict

# OrderedDict http://stackoverflow.com/questions/10844064
from collections import OrderedDict
json.dumps(OrderedDict([("a", 1), ("b", 2)]))   # '{"a": 1, "b": 2}'
# OrderedDict Since Python 3.6
json.dumps(OrderedDict(a=1, b=2))   # '{"a": 1, "b": 2}'

String

Regular expression

Regular expression operations

re.findall(r'\d+\.?\d+', "116°23'56.97\"E") # ['116', '23', '56.97']

Class

Improve Your Python: Python Classes and Object Oriented Programming

File operations

Basic

with open(path, 'w') as f:
    pass
# https://docs.python.org/2/library/shutil.html#shutil.move
import shutil
shutil.move(src, dst)

Read Excel

Python Excel Tutorial: The Definitive Guide

openpyxl - A Python library to read/write Excel 2010 xlsx/xlsm files.

from openpyxl import load_workbook
# Load xlsx file
wb = load_workbook(filename='tower.xlsx', read_only=True)
# Get the first sheet, ws:<ReadOnlyWorksheet>
ws = wb[wb.get_sheet_names()[0]]

Write csv & xlsx

import csv
from openpyxl import Workbook
from openpyxl.writer.write_only import WriteOnlyCell
class Catloger(object):
def save2csv(self, path):
filename = 'catalog.csv'
path = os.path.join(path, filename)
with open(path, 'w') as csvfile:
fieldnames = ['序号', '起始页', 'Topic', '内容摘录']
writer = csv.DictWriter(csvfile, fieldnames)
writer.writeheader()
writer.writerows(self.cata_list)
print('Save csv file to:', path)
def save2xlsx(self, path):
filename = 'catalog.xlsx'
path = os.path.join(path, filename)
wb = Workbook(write_only=True)
ws = wb.create_sheet()
fieldnames = ['序号', '起始页', 'Topic', '内容摘录']
ws.append(fieldnames)
for cata in self.cata_list:
print(cata['内容摘录'])
content_cell = WriteOnlyCell(ws, value=cata['内容摘录'])
content_cell.style.alignment.wrap_text = True
ws.append([cata['序号'], cata['起始页'], cata['Topic'], content_cell])
wb.save(path)
view raw catalog.py hosted with ❤ by GitHub

Format

Basic Data Structure

# Trim whitespace
s.strip()
# Extract number from string
import re
re.findall("[-+]?\d+[\.]?\d*[eE]?[-+]?\d*", '15.1米')    # ['15.1']
# Decimal points
round(num, 2)
# JSON, dict or array to JSON string, 
# ensure_ascii=False make JSON string encoding in UTF-8 rather than ASCII
json.dumps(obj, indent=4, ensure_ascii=False)

Geographic coordinates

import re

def dms2dd(degrees, minutes, seconds, direction):
    dd = float(degrees) + float(minutes)/60 + float(seconds)/(60*60);
    if direction == 'S' or direction == 'W':
        dd *= -1
    return dd;

def parse_dms(dms):
    try:
        return float(dms)
    except:
        parts = re.split('[^\d\w]+', dms)
        lat_or_lng = dms2dd(parts[0], parts[1], parts[2], parts[3])
        return lat_or_lng

lag = '116°23\'56.97"E'
parse_dms(lag)  # 116.3992

Datetime

Python’s strftime directives

import datetime
object.strftime('%Y-%m-%d %H:%M:%S')    # 2017-05-01 08:18:05

Numpy

import numpy as np
# ReLU fastest way
np.maximum(x, 0, x)
# won't modify x, the fastest way
x * (x > 0)
# dReLU, set 0 for x = 0
1. * (x > 0)

Forward learning

I figure out that group these memos by event is more efficient. So I can remember the story behind the code and remember them faster. After all, I’m writing memo rather than tutorial.
2017.10

Array slice

  • Array slice ( when see sample of filename ).
a[start:end] # items start through end-1
a[start:]    # items start through the rest of the array
a[:end]      # items from the beginning through end-1
a[:]         # a copy of the whole array
a[start:end:step] # start through not past end, by step
a[-1]    # last item in the array
a[-2:]   # last two items in the array
a[:-2]   # everything except the last two items

Image processing

import matplotlib.pyplot as plt
from scipy import misc
img = misc.imread(file_path)
plt.imshow(img)
print(tpye(img), img.shape, img.dtype)
result = [item.operation() for item in items]

Exceptions and assert

Asserts should be used to test conditions that should never happen. The purpose is to crash early in the case of a corrupt program state.
Exceptions should be used for errors that can conceivably happen, and you should almost always create your own Exception classes.

Best practice for Python assert - Deestan’s answer

Exceptions

Count duplicate items

using count

class_summary = {key:img_classes.count(key) for key in img_classes}

Computing

Cython 基本用法

Packages

  • numpy
  • scipy
  • matplotlib
  • scikit-learn
  • pandas
    • numexpr
    • bottleneck
  • Pillow
  • ipykernel
  • tensorflow-gpu

Jupyter Notebook

npm install -g ijavascript
ijinstall
jupyter notebook
# Get the content of *.py into jupyter notebook
load *.py
# Get the content of a function
function_name??

Categories: ,

Updated:

Leave a Comment

Your email address will not be published. Required fields are marked *

Loading...