From 142c7db609a2b7e48105e6cbfd28c5385f83c705 Mon Sep 17 00:00:00 2001 From: nico Date: Wed, 17 Jul 2019 18:35:46 +0200 Subject: spam time-border + add time-borders to all queries The JabberSpam manifesto states that the critical period for a server to be approved again is 14 days. Thus I adjusted all queries required for the report function to only count within these borders. Though if a domain is specified without the report argument the time border is increased to 1 month to increase visibility. misc: + add Codefactor badge --- README.md | 1 + main.py | 8 +++++--- report.py | 7 ++++--- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 5fca75e..c8cb3e2 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ # ejabberd mod_spam_filter ingest +[![CodeFactor](https://www.codefactor.io/repository/github/mightybroccoli/spamfilter-ingest/badge)](https://www.codefactor.io/repository/github/mightybroccoli/spamfilter-ingest) ## installation Python 3 virtual environment diff --git a/main.py b/main.py index d885b7a..2156d6e 100755 --- a/main.py +++ b/main.py @@ -55,7 +55,8 @@ class AbuseReport: for domain in self.domain: sql_query = self.conn.execute('''SELECT COUNT(*) AS messages,COUNT(DISTINCT user) AS bots,domain, MIN(ts) - AS first,MAX(ts) AS last FROM spam WHERE domain = :domain;''',{"domain": domain}).fetchall() + AS first,MAX(ts) AS last FROM spam WHERE ts BETWEEN DATE('now','-1 months') AND DATE('now') + and domain = :domain;''',{"domain": domain}).fetchall() # if specified domain is not listed yet, the resulting table will not show the domain name # this ugly tuple 2 list swap prevents this @@ -76,8 +77,9 @@ class AbuseReport: if self.config.get_at("top10_view"): result = self.conn.execute('''SELECT * FROM "top10"''').fetchall() else: - result = self.conn.execute('''SELECT COUNT(*) AS messages,COUNT(DISTINCT user) AS bots,domain AS domain - FROM spam GROUP BY domain ORDER BY 1 DESC LIMIT 10''').fetchall() + result = self.conn.execute('''SELECT COUNT(*) AS messages, COUNT(DISTINCT user) AS bots, domain AS + domain FROM spam WHERE ts BETWEEN DATE('now','-14 days') AND DATE('now') GROUP BY domain + ORDER BY 1 DESC LIMIT 10;''').fetchall() # tabelize data spam_table = tabulate.tabulate(result, headers=["messages", "bots", "domain", "first seen", "last seen"], diff --git a/report.py b/report.py index 6357db5..e87517c 100644 --- a/report.py +++ b/report.py @@ -38,8 +38,8 @@ class ReportDomain: :return: formatted result string """ - jids = self.conn.execute('''SELECT user || '@' || domain as jid FROM spam WHERE domain=:domain GROUP BY user - ORDER BY 1;''', {"domain": domain}).fetchall() + jids = self.conn.execute('''SELECT user || '@' || domain AS jid FROM spam WHERE ts BETWEEN DATE('now','-14 days') + AND DATE('now') AND domain=:domain GROUP BY user ORDER BY 1;''', {"domain": domain}).fetchall() return tabulate.tabulate(jids, tablefmt="plain") @@ -52,7 +52,8 @@ class ReportDomain: logs = self.conn.execute('''SELECT CHAR(10) || MIN(ts) || ' - ' || MAX(ts) || char(10) || COUNT(*) || 'messages:' || char(10) ||'========================================================================' || char(10) || message || char(10) || '========================================================================' - FROM spam WHERE domain=:domain GROUP BY message ORDER BY COUNT(*) DESC LIMIT 10;''', {"domain": domain}).fetchall() + FROM spam WHERE ts BETWEEN DATE('now','-14 days') AND DATE('now') AND domain=:domain GROUP BY message ORDER + BY COUNT(*) DESC LIMIT 10;''', {"domain": domain}).fetchall() return tabulate.tabulate(logs, tablefmt="plain") -- cgit v1.2.3-18-g5258