aboutsummaryrefslogtreecommitdiffstats
path: root/main.py
diff options
context:
space:
mode:
authornico <nico@magicbroccoli.de>2019-11-04 14:59:58 +0100
committerGitHub <noreply@github.com>2019-11-04 14:59:58 +0100
commitb957dbc82c551c9119e77bcce6fc868ca1d09320 (patch)
treede4044d7a6b5f5f3c82796ad176a5aac7cfafd93 /main.py
parentad43e2117fdf000d599e7653053ef4ee8c67f1c3 (diff)
feature: custom timeperiod (#3)
+ add ability to define a custom time period This feature enables users to further narrow the expected query result by customizing the time period. Every script feature is able to handle the custom timestamps, but beware no further consistency checks are done. Thus if you provide a stop timestamp that is prior to the start timestamp the output will be empty, no error whatsoever. Timestamps must be conformal to the Python [datetime](https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior) module. The easiest way to achieve that is to use the ISO-8601 format from the unix date function: ```bash $date -Ins -d "3 weeks ago" $date --iso-8601 -d "3 weeks ago" ```
Diffstat (limited to 'main.py')
-rwxr-xr-xmain.py55
1 files changed, 40 insertions, 15 deletions
diff --git a/main.py b/main.py
index cb85aad..3cec813 100755
--- a/main.py
+++ b/main.py
@@ -16,12 +16,14 @@ from report import ReportDomain
class AbuseReport:
- """Ingestation script for ejabberd spam logs"""
+ """ingestion script for ejabberd spam logs"""
def __init__(self, arguments):
self.infile = arguments.infile
self.domain = arguments.domain
self.report = arguments.report
+ self.start = arguments.start
+ self.stop = arguments.stop
self.path = os.path.dirname(__file__)
self.config = Config()
@@ -67,39 +69,60 @@ class AbuseReport:
# init result list
result = list()
- # if a domain is specified return only that info
+ # parse time values
+ if self.start is None:
+ # default timeperiod are 31 days calculated via the timedelta
+ default = dt.datetime.combine(dt.date.today(), dt.time()) - dt.timedelta(days=31)
+ self.start = dt.datetime.strftime(default, "%Y-%m-%dT%H:%M:%S")
+
+ if self.stop is None:
+ # set stop value to now
+ self.stop = dt.datetime.strftime(dt.datetime.now(), '%Y-%m-%dT%H:%M:%S')
+
+ # if one or more domains are specified return only their info
if self.domain is not None:
# iterate over all domains supplied
for domain in self.domain:
- sql_query = self.conn.execute('''SELECT COUNT(*) AS messages,COUNT(DISTINCT user) AS bots,domain, MIN(ts)
- AS first,MAX(ts) AS last FROM spam WHERE ts BETWEEN DATE('now','-1 months') AND DATE('now')
- and domain = :domain;''',{"domain": domain}).fetchall()
+ # build and execute
+ sql = '''SELECT COUNT(*) AS messages, COUNT(DISTINCT user) AS bots, domain, MIN(ts) AS first, MAX(ts) AS last \
+ FROM spam \
+ WHERE domain = :domain \
+ AND ts > :start \
+ AND ts < :stop;'''
+ parameter = {
+ "domain": domain,
+ "start": self.start,
+ "stop": self.stop
+ }
+ query = self.conn.execute(sql, parameter).fetchall()
# if specified domain is not listed yet, the resulting table will not show the domain name
# this ugly tuple 2 list swap prevents this
- temp = list(sql_query[0])
+ temp = list(query[0])
if temp[2] is None:
temp[2] = domain
- sql_query[0] = tuple(temp)
+ query[0] = tuple(temp)
# extend result tables
- result.extend(sql_query)
+ result.extend(query)
# generate report if enabled
if self.report:
- self.gen_report(domain, sql_query)
+ self.gen_report(domain, query)
else:
- # in any other case return top 10 view
- result = self.conn.execute('''SELECT COUNT(*) AS messages, COUNT(DISTINCT user) AS bots, domain AS
- domain FROM spam WHERE ts BETWEEN DATE('now','-14 days') AND DATE('now') GROUP BY domain
- ORDER BY 1 DESC LIMIT 10;''').fetchall()
+ # build and execute
+ sql = '''SELECT COUNT(*) AS messages, COUNT(DISTINCT user) AS bots, domain AS domain from spam \
+ WHERE ts > :start \
+ AND ts < :stop \
+ GROUP BY domain ORDER BY 1 DESC LIMIT 10;'''
+ result = self.conn.execute(sql, {"start": self.start, "stop": self.stop}).fetchall()
# tabelize data
spam_table = tabulate.tabulate(result, headers=["messages", "bots", "domain", "first seen", "last seen"],
- tablefmt="github")
+ tablefmt="github")
# output to stdout
output = "\n\n".join([spam_table])
@@ -168,7 +191,7 @@ class AbuseReport:
# format sql
try:
self.conn.execute('''INSERT INTO spam VALUES(:user, :domain, :spam_time, :spam_body);''',
- {"user": node, "domain": domain, "spam_time": spam_time, "spam_body": spam_body})
+ {"user": node, "domain": domain, "spam_time": spam_time, "spam_body": spam_body})
except sqlite3.IntegrityError:
pass
finally:
@@ -219,6 +242,8 @@ if __name__ == "__main__":
parser.add_argument('-in', '--infile', nargs='+', help='set path to input file', dest='infile')
parser.add_argument('-d', '--domain', action='append', help='specify report domain', dest='domain')
parser.add_argument('-r', '--report', action='store_true', help='toggle report output to file', dest='report')
+ parser.add_argument('-f', '--from', help='ISO-8601 timestamp from where to search', dest='start')
+ parser.add_argument('-t', '--to', help='ISO-8601 timestamp up until where to search', dest='stop')
args = parser.parse_args()
# run