A complete example of a perverted use: parsing auth.log

Imagine you are a sysadmin and your boss want a graph of all the request you do, and you don’t like using excel

#!/usr/bin/env python
from archery.bow import Hankyu as _dict
from yahi import notch, shoot, ToxicSet
from datetime import datetime
from datetime import date
import locale

import dateutil
import re
import pylab as plt
from collections import OrderedDict
import numpy as np

locale.setlocale(locale.LC_ALL,"C")

def ordered_top(a_dict, rank=10):
    res=OrderedDict({"other": 0})
    for i,(k,v) in enumerate(
            sorted(a_dict.items(),
                key=lambda (k,v): (v,k),
                reverse=True)
            ):
        if i < rank:
            res[k]=v
        else:
            res["other"]+=v
    return res


######################## Setting UP ##################################
# parsing command line & default settings. Return a not fully qualified object
context=notch(
    off="user_agent",
    log_format="custom",
    output_format="json",
    date_pattern="%b %d %H:%M:%S", 
    log_pattern="""^(?P<datetime>[^\ ]+\s{1,2}\d{1,2}\s\d{2,2}:\d{2,2}:\d{2,2})\s
    (?P<nawak>[^:]+):\s
    Invalid\ user\ (?P<user>.*?)\s
    from\ (?P<ip>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})$""")
# log sample
#May 20 12:14:15 lupin sshd[36291]: Invalid user dave from 69.60.114.57

date_formater= lambda dt :"%s-%s-%s" % ( dt.year, dt.month, dt.day)
res= shoot(
        context,
        lambda data: _dict({
            "black_list" : ToxicSet([ data["ip"] ]),
            "by_country" : _dict({ data["_country"] : 1 }),
            "date_s" : _dict({ date_formater(data["_datetime"]) : 1 }),
            "by_ip" : _dict({ data["ip"] : 1 }),
            "date" : _dict({ date(2012,
                data["_datetime"].month,
                data["_datetime"].day)
                : 1 }),
            "by_user" : _dict({ data["user"] : 1 }),
            "total" : 1 
        })
    )

# Let's go draw some plot
def labeled_bar(ax, _dict):
    pos=np.arange(len(_dict)) + .5
    ax.set_xticks(pos, _dict.keys())
    rects=ax.bar(pos,_dict.values(),label=_dict.keys(),align='center')
    for i,rect in enumerate(rects):
        height = rect.get_height()
        ax.text(rect.get_x()+rect.get_width()/2., height-100,
           '%.1f\n%s'%(height,_dict.keys()[i]), 
           ha='center', va='bottom',color='white', fontsize=8)
    return rects

fig=plt.figure(221,figsize=(18,9))
fig.suptitle(
    "%d SSH unauthorized access from %d sources" % (
        res["total"], len(res["black_list"])),
    fontsize=16,
)
ax=fig.add_subplot(221)
by_country=ordered_top(res["by_country"],5)
ax.set_title("Top 5 country by sources")
ax.pie(by_country.values(),
    labels=map(lambda (k,v):"%s (%d)"%(k,v),by_country.items()),
    shadow=True
)

ax=fig.add_subplot(222)
ax.set_title(
    "Top 10 tested users (amongst %d trials)" % len( res["by_user"])
)
by_user=ordered_top(res["by_user"])
del(by_user['other'])
labeled_bar(ax,by_user)

ax=fig.add_subplot(212)
ax.set_title("Unauthorized connections by dates")
res["date"]=OrderedDict([
    (k,v) for k,v in sorted( res["date"].items()) ]
)
ax.plot_date(plt.date2num(np.array( res["date"].keys())),
    res["date"].values(), linestyle="-")

fig.autofmt_xdate()
plt.savefig("attack.png")
_images/attack.png