aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authornico <nico@magicbroccoli.de>2019-11-04 14:59:58 +0100
committerGitHub <noreply@github.com>2019-11-04 14:59:58 +0100
commitb957dbc82c551c9119e77bcce6fc868ca1d09320 (patch)
treede4044d7a6b5f5f3c82796ad176a5aac7cfafd93
parentad43e2117fdf000d599e7653053ef4ee8c67f1c3 (diff)
feature: custom timeperiod (#3)
+ add ability to define a custom time period This feature enables users to further narrow the expected query result by customizing the time period. Every script feature is able to handle the custom timestamps, but beware no further consistency checks are done. Thus if you provide a stop timestamp that is prior to the start timestamp the output will be empty, no error whatsoever. Timestamps must be conformal to the Python [datetime](https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior) module. The easiest way to achieve that is to use the ISO-8601 format from the unix date function: ```bash $date -Ins -d "3 weeks ago" $date --iso-8601 -d "3 weeks ago" ```
-rw-r--r--README.md9
-rwxr-xr-xmain.py55
2 files changed, 48 insertions, 16 deletions
diff --git a/README.md b/README.md
index c8cb3e2..016aba7 100644
--- a/README.md
+++ b/README.md
@@ -34,7 +34,7 @@ $ cat config.json
## usage main.py
```
-usage: main.py [-h] [-in INFILE [INFILE ...]] [-d DOMAIN] [-r]
+usage: main.py [-h] [-in INFILE [INFILE ...]] [-d DOMAIN] [-r] [-f A] [-t B]
optional arguments:
-h, --help show this help message and exit
@@ -43,6 +43,8 @@ optional arguments:
-d DOMAIN, --domain DOMAIN
specify report domain
-r, --report toggle report output to file
+ -f A, --from A ISO-8601 timestamp where to start the search
+ -t B, --to B ISO-8601 timestamp up until where to start the search
```
#### run with no argument
@@ -95,3 +97,8 @@ $ ./main.py --d example.tld -d example.com
#### -r / --report
This flag will only take effect if the `-d` or `--domain` argument is used. If that is the case, the script will
automatically gather information about the specified domain and write them to the `report` directory.
+
+#### -f / --from and -t / --to
+With this flag it is possible to provide an [ISO-8601](https://www.w3.org/TR/NOTE-datetime) timestamp with the specified
+query, to further narrow the expected result.
+All outputting querys support the custom time period flags.
diff --git a/main.py b/main.py
index cb85aad..3cec813 100755
--- a/main.py
+++ b/main.py
@@ -16,12 +16,14 @@ from report import ReportDomain
class AbuseReport:
- """Ingestation script for ejabberd spam logs"""
+ """ingestion script for ejabberd spam logs"""
def __init__(self, arguments):
self.infile = arguments.infile
self.domain = arguments.domain
self.report = arguments.report
+ self.start = arguments.start
+ self.stop = arguments.stop
self.path = os.path.dirname(__file__)
self.config = Config()
@@ -67,39 +69,60 @@ class AbuseReport:
# init result list
result = list()
- # if a domain is specified return only that info
+ # parse time values
+ if self.start is None:
+ # default timeperiod are 31 days calculated via the timedelta
+ default = dt.datetime.combine(dt.date.today(), dt.time()) - dt.timedelta(days=31)
+ self.start = dt.datetime.strftime(default, "%Y-%m-%dT%H:%M:%S")
+
+ if self.stop is None:
+ # set stop value to now
+ self.stop = dt.datetime.strftime(dt.datetime.now(), '%Y-%m-%dT%H:%M:%S')
+
+ # if one or more domains are specified return only their info
if self.domain is not None:
# iterate over all domains supplied
for domain in self.domain:
- sql_query = self.conn.execute('''SELECT COUNT(*) AS messages,COUNT(DISTINCT user) AS bots,domain, MIN(ts)
- AS first,MAX(ts) AS last FROM spam WHERE ts BETWEEN DATE('now','-1 months') AND DATE('now')
- and domain = :domain;''',{"domain": domain}).fetchall()
+ # build and execute
+ sql = '''SELECT COUNT(*) AS messages, COUNT(DISTINCT user) AS bots, domain, MIN(ts) AS first, MAX(ts) AS last \
+ FROM spam \
+ WHERE domain = :domain \
+ AND ts > :start \
+ AND ts < :stop;'''
+ parameter = {
+ "domain": domain,
+ "start": self.start,
+ "stop": self.stop
+ }
+ query = self.conn.execute(sql, parameter).fetchall()
# if specified domain is not listed yet, the resulting table will not show the domain name
# this ugly tuple 2 list swap prevents this
- temp = list(sql_query[0])
+ temp = list(query[0])
if temp[2] is None:
temp[2] = domain
- sql_query[0] = tuple(temp)
+ query[0] = tuple(temp)
# extend result tables
- result.extend(sql_query)
+ result.extend(query)
# generate report if enabled
if self.report:
- self.gen_report(domain, sql_query)
+ self.gen_report(domain, query)
else:
- # in any other case return top 10 view
- result = self.conn.execute('''SELECT COUNT(*) AS messages, COUNT(DISTINCT user) AS bots, domain AS
- domain FROM spam WHERE ts BETWEEN DATE('now','-14 days') AND DATE('now') GROUP BY domain
- ORDER BY 1 DESC LIMIT 10;''').fetchall()
+ # build and execute
+ sql = '''SELECT COUNT(*) AS messages, COUNT(DISTINCT user) AS bots, domain AS domain from spam \
+ WHERE ts > :start \
+ AND ts < :stop \
+ GROUP BY domain ORDER BY 1 DESC LIMIT 10;'''
+ result = self.conn.execute(sql, {"start": self.start, "stop": self.stop}).fetchall()
# tabelize data
spam_table = tabulate.tabulate(result, headers=["messages", "bots", "domain", "first seen", "last seen"],
- tablefmt="github")
+ tablefmt="github")
# output to stdout
output = "\n\n".join([spam_table])
@@ -168,7 +191,7 @@ class AbuseReport:
# format sql
try:
self.conn.execute('''INSERT INTO spam VALUES(:user, :domain, :spam_time, :spam_body);''',
- {"user": node, "domain": domain, "spam_time": spam_time, "spam_body": spam_body})
+ {"user": node, "domain": domain, "spam_time": spam_time, "spam_body": spam_body})
except sqlite3.IntegrityError:
pass
finally:
@@ -219,6 +242,8 @@ if __name__ == "__main__":
parser.add_argument('-in', '--infile', nargs='+', help='set path to input file', dest='infile')
parser.add_argument('-d', '--domain', action='append', help='specify report domain', dest='domain')
parser.add_argument('-r', '--report', action='store_true', help='toggle report output to file', dest='report')
+ parser.add_argument('-f', '--from', help='ISO-8601 timestamp from where to search', dest='start')
+ parser.add_argument('-t', '--to', help='ISO-8601 timestamp up until where to search', dest='stop')
args = parser.parse_args()
# run