gdb pyrasite guppy meliae

查看 进程的线程数量

ps -o nlwp pid
top -p
pstree -p pid

pyrasite查看内存

pyrasite-memory-viewer pid

pyrasite 进入进程交互

pyrasite-shell pid

guppy

from guppy import hpy

hp = hpy()
h = hp.heap()
print h
h[0].bytype
#,查看这个占内存最大的list中的数据类型。
from guppy import hpy;hxx = hpy();byrcs = hxx.heap().byrcs; byrcs[0].byid

gc

import gc
gc.garbage
#手动释放
gc.collect()

根据对象的id/address动态获取对象

import ctypes
obj = ctypes.cast(<addr_or_id>, ctypes.py_object).value

dump出所有对象地址

from meliae import loader,scanner
path = '/tmp/pyrasite.json' 
meliae.scanner.dump_all_objects(path)


om = loader.load(path)

#计算各Objects的引用关系

om.compute_parents()

#去掉各对象Instance的_dict_属性

om.collapse_instance_dicts()

#分析内存占用情况
 Index : 行索引号
  Count : 该类型的对象总数
  %(Count) : 该类型的对象总数 占 所有类型的对象总数 的百分比
  Size : 该类型的对象总字节数
  %(Size) : 该类型的对象总字节数 占 所有类型的对象总字节数 的百分比
  Cum : 累积行索引后的%(Size)
  Max : 该类型的对象中,最大者的字节数
  Kind : 类型
print om.summarize()

#得到所有的POP3ClientProtocol对象  
p = om.get_all('POP3ClientProtocol')
#查看第一个对象  
p[0]  
#说明该对象的地址为2803894924,占用了1.7K内存,引用了51个对象,它被1个对象所引用  
#可以查看该对象的所有引用  
p[0].c
#查看谁引用了这个对象
p[0].p

gdb

#接入gdb
gdb python pid
#查看线程
info threads

#coredump 如果要进行比较长时间的跟踪, 最好将python程序的进程信息全部coredump出来, 之后对core文件进行分析, 避免影响正在运行的程序.
generate-core-file

进程内查看内存

pyrasite-shell 11122
>>> import psutil, os
>>> psutil.Process(os.getpid()).memory_info().rss
29095232

查看python实际内存占用,非sys.getsizeof

import sys
import inspect

def get_size(obj, seen=None):
    """Recursively finds size of objects in bytes"""
    size = sys.getsizeof(obj)
    if seen is None:
        seen = set()
    obj_id = id(obj)
    if obj_id in seen:
        return 0
    # Important mark as seen *before* entering recursion to gracefully handle
    # self-referential objects
    seen.add(obj_id)
    if hasattr(obj, '__dict__'):
        size += get_size(obj.__dict__, seen)
        if hasattr(obj, '__class__'):
            if hasattr(obj.__class__, '__mro__'):
                for cls in obj.__class__.__mro__:
                    if '__dict__' in cls.__dict__:
                        d = cls.__dict__['__dict__']
                        if inspect.isgetsetdescriptor(d) or inspect.ismemberdescriptor(d):
                            size += get_size(obj.__dict__, seen)
                    break
    if isinstance(obj, dict):
        size += sum((get_size(v, seen) for v in obj.values()))
        size += sum((get_size(k, seen) for k in obj.keys()))
    elif hasattr(obj, '__iter__') and not isinstance(obj, (str, bytes, bytearray)):
        size += sum((get_size(i, seen) for i in obj))

    if hasattr(obj, '__slots__'): # can have __slots__ with __dict__
        size += sum(get_size(getattr(obj, s), seen) for s in obj.__slots__ if hasattr(obj, s))
    return size


obj_list = []
for x in gc.get_objects():
    try:size = get_size(x) 
    except:size=0
    obj_list.append((type(x),size,id(x),str(x)[:10]) )


obj_list = []
for k,x in locals().items():
    try:size = get_size(x) 
    except:size=0
    obj_list.append((type(x),size,id(x),str(x)[:10],k) )

obj_list = []
for k,x in globals().items():
    try:size = get_size(x) 
    except:size=0
    obj_list.append((type(x),size,id(x),str(x)[:10],k) )

import pandas as pd
import numpy as np
import sys
pd.set_option('display.max_columns', 5)

df = pd.DataFrame({'type':[x[0] for x in obj_list],
'size': [x[1] for x in obj_list],
'id': [x[2] for x in obj_list],
'str': [x[3] for x in obj_list],
'key': [x[4] for x in obj_list]
})

#类型大小排序
df['size'].groupby(df['type']).sum().sort_values(ascending=False).head(10) 

#对象大小占用排序
df.sort_values(by='size',ascending=False)
sorted(obj_list,key=lambda x:x[1],reverse=True)

解决方案

yum install autoconf automake gnome-common,libtool,gcc,c++

wget https://github.com/gperftools/gperftools/archive/gperftools-2.6.1.tar.gz
tar -zvxf gperftools-2.6.1.tar.gz
cd gperftools-gperftools-2.6.1
./autogen.sh
./configure
make
make install 
echo '/usr/local/lib' > /etc/ld.so.conf.d/local.conf

LD_PRELOAD="/usr/local/lib/libtcmalloc.so" python

查看执行时间

python -m cProfile -o test1.out server.py
python -c "import pstats; p=pstats.Stats('test1.out'); p.sort_stats('time').print_stats()" >b