aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--README.md9
-rwxr-xr-xmain.py55
2 files changed, 48 insertions, 16 deletions
diff --git a/README.md b/README.md
index c8cb3e2..016aba7 100644
--- a/README.md
+++ b/README.md
@@ -34,7 +34,7 @@ $ cat config.json
## usage main.py
```
-usage: main.py [-h] [-in INFILE [INFILE ...]] [-d DOMAIN] [-r]
+usage: main.py [-h] [-in INFILE [INFILE ...]] [-d DOMAIN] [-r] [-f A] [-t B]
optional arguments:
-h, --help show this help message and exit
@@ -43,6 +43,8 @@ optional arguments:
-d DOMAIN, --domain DOMAIN
specify report domain
-r, --report toggle report output to file
+ -f A, --from A ISO-8601 timestamp where to start the search
+ -t B, --to B ISO-8601 timestamp up until where to start the search
```
#### run with no argument
@@ -95,3 +97,8 @@ $ ./main.py --d example.tld -d example.com
#### -r / --report
This flag will only take effect if the `-d` or `--domain` argument is used. If that is the case, the script will
automatically gather information about the specified domain and write them to the `report` directory.
+
+#### -f / --from and -t / --to
+With this flag it is possible to provide an [ISO-8601](https://www.w3.org/TR/NOTE-datetime) timestamp with the specified
+query, to further narrow the expected result.
+All outputting querys support the custom time period flags.
diff --git a/main.py b/main.py
index cb85aad..3cec813 100755
--- a/main.py
+++ b/main.py
@@ -16,12 +16,14 @@ from report import ReportDomain
class AbuseReport:
- """Ingestation script for ejabberd spam logs"""
+ """ingestion script for ejabberd spam logs"""
def __init__(self, arguments):
self.infile = arguments.infile
self.domain = arguments.domain
self.report = arguments.report
+ self.start = arguments.start
+ self.stop = arguments.stop
self.path = os.path.dirname(__file__)
self.config = Config()
@@ -67,39 +69,60 @@ class AbuseReport:
# init result list
result = list()
- # if a domain is specified return only that info
+ # parse time values
+ if self.start is None:
+ # default timeperiod are 31 days calculated via the timedelta
+ default = dt.datetime.combine(dt.date.today(), dt.time()) - dt.timedelta(days=31)
+ self.start = dt.datetime.strftime(default, "%Y-%m-%dT%H:%M:%S")
+
+ if self.stop is None:
+ # set stop value to now
+ self.stop = dt.datetime.strftime(dt.datetime.now(), '%Y-%m-%dT%H:%M:%S')
+
+ # if one or more domains are specified return only their info
if self.domain is not None:
# iterate over all domains supplied
for domain in self.domain:
- sql_query = self.conn.execute('''SELECT COUNT(*) AS messages,COUNT(DISTINCT user) AS bots,domain, MIN(ts)
- AS first,MAX(ts) AS last FROM spam WHERE ts BETWEEN DATE('now','-1 months') AND DATE('now')
- and domain = :domain;''',{"domain": domain}).fetchall()
+ # build and execute
+ sql = '''SELECT COUNT(*) AS messages, COUNT(DISTINCT user) AS bots, domain, MIN(ts) AS first, MAX(ts) AS last \
+ FROM spam \
+ WHERE domain = :domain \
+ AND ts > :start \
+ AND ts < :stop;'''
+ parameter = {
+ "domain": domain,
+ "start": self.start,
+ "stop": self.stop
+ }
+ query = self.conn.execute(sql, parameter).fetchall()
# if specified domain is not listed yet, the resulting table will not show the domain name
# this ugly tuple 2 list swap prevents this
- temp = list(sql_query[0])
+ temp = list(query[0])
if temp[2] is None:
temp[2] = domain
- sql_query[0] = tuple(temp)
+ query[0] = tuple(temp)
# extend result tables
- result.extend(sql_query)
+ result.extend(query)
# generate report if enabled
if self.report:
- self.gen_report(domain, sql_query)
+ self.gen_report(domain, query)
else:
- # in any other case return top 10 view
- result = self.conn.execute('''SELECT COUNT(*) AS messages, COUNT(DISTINCT user) AS bots, domain AS
- domain FROM spam WHERE ts BETWEEN DATE('now','-14 days') AND DATE('now') GROUP BY domain
- ORDER BY 1 DESC LIMIT 10;''').fetchall()
+ # build and execute
+ sql = '''SELECT COUNT(*) AS messages, COUNT(DISTINCT user) AS bots, domain AS domain from spam \
+ WHERE ts > :start \
+ AND ts < :stop \
+ GROUP BY domain ORDER BY 1 DESC LIMIT 10;'''
+ result = self.conn.execute(sql, {"start": self.start, "stop": self.stop}).fetchall()
# tabelize data
spam_table = tabulate.tabulate(result, headers=["messages", "bots", "domain", "first seen", "last seen"],
- tablefmt="github")
+ tablefmt="github")
# output to stdout
output = "\n\n".join([spam_table])
@@ -168,7 +191,7 @@ class AbuseReport:
# format sql
try:
self.conn.execute('''INSERT INTO spam VALUES(:user, :domain, :spam_time, :spam_body);''',
- {"user": node, "domain": domain, "spam_time": spam_time, "spam_body": spam_body})
+ {"user": node, "domain": domain, "spam_time": spam_time, "spam_body": spam_body})
except sqlite3.IntegrityError:
pass
finally:
@@ -219,6 +242,8 @@ if __name__ == "__main__":
parser.add_argument('-in', '--infile', nargs='+', help='set path to input file', dest='infile')
parser.add_argument('-d', '--domain', action='append', help='specify report domain', dest='domain')
parser.add_argument('-r', '--report', action='store_true', help='toggle report output to file', dest='report')
+ parser.add_argument('-f', '--from', help='ISO-8601 timestamp from where to search', dest='start')
+ parser.add_argument('-t', '--to', help='ISO-8601 timestamp up until where to search', dest='stop')
args = parser.parse_args()
# run