1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
|
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import argparse
import datetime as dt
import os
import sqlite3
import sys
import tabulate
from ingest import IngestLogfile
from report import ReportDomain
class AbuseReport:
"""ingestion script for ejabberd spam logs"""
def __init__(self, arguments):
self.infile = arguments.infile
self.domain = arguments.domain
self.report = arguments.report
self.start = arguments.start
self.stop = arguments.stop
self.path = os.path.dirname(__file__)
self.conn = sqlite3.connect("/".join([self.path, "spam.db"]))
self.Report = ReportDomain(self.conn)
self.Ingest = IngestLogfile(self.conn)
def main(self):
"""main method guiding the actions to take"""
# run check method before each execution
self.check()
if self.infile is None:
# infile unset -> report top10
self.egest()
elif self.infile:
# infile set -> ingest
self.Ingest.read(self.infile)
# close sqlite connection
self.conn.close()
def check(self):
# check if the minimum requirements are met
table = ('table', 'spam')
master = self.conn.execute('''SELECT type, name from sqlite_master;''').fetchall()
# if not run create method
if table not in master:
self.create()
def create(self):
# open and execute base schema file
script = "/".join([self.path, "schema.sql"])
with open(script) as file:
schema = file.read()
self.conn.executescript(schema)
def egest(self):
"""egest method returning the database results"""
# init result list
result = list()
# parse time values
if self.start is None:
# default timeperiod are 31 days calculated via the timedelta
default = dt.datetime.combine(dt.date.today(), dt.time()) - dt.timedelta(days=31)
self.start = dt.datetime.strftime(default, "%Y-%m-%dT%H:%M:%S")
if self.stop is None:
# set stop value to now
self.stop = dt.datetime.strftime(dt.datetime.now(), '%Y-%m-%dT%H:%M:%S')
# add validated timestamps to report class
self.Report.addtime(self.start, self.stop)
# if one or more domains are specified return only their info
if self.domain is not None:
# iterate over all domains supplied
for domain in self.domain:
# build and execute
sql = '''SELECT COUNT(*) AS messages, COUNT(DISTINCT user) AS bots, domain, MIN(ts) AS first, MAX(ts) AS last
FROM spam
WHERE domain = :domain
AND ts > :start AND ts < :stop;'''
parameter = {
"domain": domain,
"start": self.start,
"stop": self.stop
}
query = self.conn.execute(sql, parameter).fetchall()
# if specified domain is not listed yet, the resulting table will not show the domain name
# this ugly tuple 2 list swap prevents this
temp = list(query[0])
if temp[2] is None:
temp[2] = domain
query[0] = tuple(temp)
# extend result tables
result.extend(query)
# generate report if enabled
if self.report:
self.gen_report(domain, query)
else:
# build and execute
sql = '''SELECT COUNT(*) AS messages, COUNT(DISTINCT user) AS bots, domain AS domain from spam
WHERE ts > :start AND ts < :stop
GROUP BY domain ORDER BY 1 DESC LIMIT 10;'''
result = self.conn.execute(sql, {"start": self.start, "stop": self.stop}).fetchall()
# tabelize data
spam_table = tabulate.tabulate(result, tablefmt="psql", headers=["messages", "bots", "domain","first seen",
"last seen"])
# output to stdout
output = "\n\n".join([spam_table])
print(output, file=sys.stdout)
def gen_report(self, domain: str, query: list):
"""
method generating the report files
:param domain: string containing a domain name
:param query: list of tuples containing the query results for the specified domain/s
"""
try:
# open abuse report template file
with open("/".join([self.path, "template/abuse-template.txt"]), "r", encoding="utf-8") as template:
report_template = template.read()
except FileNotFoundError as err:
print(err, file=sys.stderr)
exit(1)
# current date
now = dt.datetime.strftime(dt.datetime.now(), "%Y-%m-%d")
# output to report directory
report_filename = "abuse-{domain}-{date}.txt".format(date=now, domain=domain)
jids_filename = "abuse-{domain}-{date}-jids.txt".format(date=now, domain=domain)
logs_filename = "abuse-{domain}-{date}-logs.txt".format(date=now, domain=domain)
# write report files
with open("/".join([self.path, "report", report_filename]), "w", encoding="utf-8") as report_out:
content = self.Report.template(report_template, domain, query)
report_out.write(content)
with open("/".join([self.path, "report", jids_filename]), "w", encoding="utf-8") as report_out:
content = self.Report.jids(domain)
report_out.write(content)
with open("/".join([self.path, "report", logs_filename]), "w", encoding="utf-8") as report_out:
content = self.Report.logs(domain)
report_out.write(content)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-in', '--infile', nargs='+', help='set path to input file', dest='infile')
parser.add_argument('-d', '--domain', action='append', help='specify report domain', dest='domain')
parser.add_argument('-r', '--report', action='store_true', help='toggle report output to file', dest='report')
parser.add_argument('-f', '--from', help='ISO-8601 timestamp from where to search', dest='start')
parser.add_argument('-t', '--to', help='ISO-8601 timestamp up until where to search', dest='stop')
args = parser.parse_args()
# run
AbuseReport(args).main()
|