From e2f679b201ca5ee1401e5987816fcdd40881ee44 Mon Sep 17 00:00:00 2001 From: nico Date: Wed, 22 May 2019 22:04:28 +0200 Subject: no more direct system calls * replace system calls to sqlite + add tabulate module to format output * update sql commands with named parameter substitution asd --- .gitignore | 8 +++----- main.py | 33 ++++++++++++++++++++++----------- requirements.txt | 3 ++- 3 files changed, 27 insertions(+), 17 deletions(-) mode change 100755 => 100644 .gitignore diff --git a/.gitignore b/.gitignore old mode 100755 new mode 100644 index c17170a..8d32868 --- a/.gitignore +++ b/.gitignore @@ -126,8 +126,6 @@ dmypy.json pyvenv.cfg pip-selfcheck.json -# do not index databases or logfiles -*.log -*.db -abuse-* -spam* +# project specific files +spam.db +spam-*.txt diff --git a/main.py b/main.py index e3ec788..014468b 100755 --- a/main.py +++ b/main.py @@ -3,8 +3,8 @@ import argparse import re import sqlite3 -import subprocess +import tabulate from defusedxml import ElementTree @@ -42,13 +42,26 @@ class AbuseReport: """ # if a specific domain is supplied return only that set if self.domain is not None: - sql = 'sqlite3 -column -header spam.db "SELECT COUNT(*) AS messages,COUNT(DISTINCT user) AS bots,domain ' \ - 'FROM spam WHERE domain=\'{}\';"'.format(self.domain) + # first and last time seen spam from specified domain + first = self.conn.execute("SELECT ts FROM spam WHERE domain=:domain ORDER BY ts LIMIT 1", + {"domain": self.domain}).fetchone()[0] + last = self.conn.execute("SELECT ts FROM spam WHERE domain=:domain ORDER BY ts DESC LIMIT 1", + {"domain": self.domain}).fetchone()[0] + + print("First seen : {first}\nLast seen : {last}\n".format(first=first, last=last)) + + result = self.conn.execute('SELECT COUNT(*) AS messages,COUNT(DISTINCT user) AS bots,domain FROM spam ' + 'WHERE domain=\'{}\';'.format(self.domain)) else: - sql = 'sqlite3 -column -header spam.db "SELECT COUNT(*) AS messages,COUNT(DISTINCT user) AS bots,domain AS domain ' \ - 'FROM spam GROUP BY domain ORDER BY 1 DESC LIMIT 10;"' - print(subprocess.getoutput(sql)) + result = self.conn.execute('SELECT COUNT(*) AS messages,COUNT(DISTINCT user) AS bots,domain AS domain ' + 'FROM spam GROUP BY domain ORDER BY 1 DESC LIMIT 10;') + + # format data as table + table = tabulate.tabulate(result, headers=["messages", "bots", "domain"], tablefmt="orgtbl") + print(table) + + def ingest(self): """ @@ -93,11 +106,9 @@ class AbuseReport: spam_body = spam_body.text # format sql - sql = 'INSERT INTO spam("user", "domain", "ts", "message") VALUES("{}", "{}", "{}", "{}");'.format( - node, domain, spam_time, spam_body - ) try: - self.conn.execute(sql) + self.conn.execute('INSERT INTO spam VALUES(:user, :domain, :spam_time, :spam_body);', + {"user": node, "domain": domain, "spam_time": spam_time, "spam_body": spam_body}) except sqlite3.IntegrityError: pass finally: @@ -115,7 +126,7 @@ if __name__ == "__main__": """ # Top 10 Domains and their score -SELECT COUNT(*) AS messages,COUNT(DISTINCT user) AS bots,domain AS 'domain' +SELECT COUNT(*) AS messages,COUNT(DISTINCT user) AS bots,domain AS 'domain' FROM spam GROUP BY domain ORDER BY 1 DESC LIMIT 10; diff --git a/requirements.txt b/requirements.txt index 20018c9..785dc6e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ defusedxml -slixmpp \ No newline at end of file +tabulate +dnspython -- cgit v1.2.3-18-g5258