aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitignore2
-rw-r--r--config.py19
-rwxr-xr-xmain.py196
-rw-r--r--report/.gitkeep0
-rw-r--r--template/abuse-template.txt28
5 files changed, 201 insertions, 44 deletions
diff --git a/.gitignore b/.gitignore
index cce7ba1..bc8cbd5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -129,4 +129,4 @@ pip-selfcheck.json
# project specific files
spam.db
config.json
-spam-*.txt*
+/report/*.txt
diff --git a/config.py b/config.py
new file mode 100644
index 0000000..12a41a4
--- /dev/null
+++ b/config.py
@@ -0,0 +1,19 @@
+# -*- coding: utf-8 -*-
+import json
+
+# try to read config.json if nonexistent create config.json an populate it
+try:
+ with open("config.json", "r", encoding="utf-8") as f:
+ config = json.load(f)
+
+except FileNotFoundError:
+ with open("config.json", "w", encoding="utf-8") as f:
+ config = {
+ "name": "",
+ }
+ f.write(json.dumps(config))
+
+
+class Config(object):
+ """extract secret key to use for the webserver"""
+ name = config["name"]
diff --git a/main.py b/main.py
index 250cebb..d6ed2d3 100755
--- a/main.py
+++ b/main.py
@@ -1,13 +1,17 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import argparse
+import datetime as dt
+import gzip
+import os
import re
import sqlite3
+import dns.resolver as dns
import tabulate
from defusedxml import ElementTree
-import os
-import gzip
+
+from config import Config
class AbuseReport:
@@ -16,9 +20,10 @@ class AbuseReport:
def __init__(self, arguments):
self.infile = arguments.infile
self.domain = arguments.domain
+ self.report = arguments.report
self.path = os.path.dirname(__file__)
- self.conn = sqlite3.connect("".join([self.path, "/spam.db"]))
+ self.conn = sqlite3.connect("/".join([self.path, "spam.db"]))
self.jid_pattern = re.compile("^(?:([^\"&'/:<>@]{1,1023})@)?([^/@]{1,1023})(?:/(.{1,1023}))?$")
self.message_pattern = re.compile(r'<message.*?</message>', re.DOTALL)
@@ -26,10 +31,9 @@ class AbuseReport:
"""
method deciding over which action to take
"""
-
if self.infile is None:
# infile unset -> report top10
- self.report()
+ self.egest()
elif self.infile:
# infile set -> ingest
@@ -38,33 +42,45 @@ class AbuseReport:
# close sqlite connection
self.conn.close()
- def report(self):
+ def egest(self):
"""
report method
:return: top10 score or domain specific data
"""
- # if a specific domain is supplied return only that set
+ result = list()
+
+ # if domain is specified return info for that domain
if self.domain is not None:
- # first and last time seen spam from specified domain
- first = self.conn.execute("SELECT ts FROM spam WHERE domain=:domain ORDER BY ts LIMIT 1",
- {"domain": self.domain}).fetchone()[0]
- last = self.conn.execute("SELECT ts FROM spam WHERE domain=:domain ORDER BY ts DESC LIMIT 1",
- {"domain": self.domain}).fetchone()[0]
+ result = list()
- print("First seen : {first}\nLast seen : {last}\n".format(first=first, last=last))
+ # iterate over all domains supplied
+ for domain in self.domain:
- result = self.conn.execute('SELECT COUNT(*) AS messages,COUNT(DISTINCT user) AS bots,domain FROM spam '
- 'WHERE domain=\'{}\';'.format(self.domain))
- else:
+ query = self.conn.execute('''SELECT COUNT(*) AS messages,COUNT(DISTINCT user) AS bots,domain,
+ MIN(ts) AS first,MAX(ts) AS last FROM spam WHERE domain = :domain;''',
+ {"domain": domain}).fetchall()
- result = self.conn.execute('SELECT COUNT(*) AS messages,COUNT(DISTINCT user) AS bots,domain AS domain '
- 'FROM spam GROUP BY domain ORDER BY 1 DESC LIMIT 10;')
+ # ugly tuple list swapping for nicer formatting
+ temp = list(query[0])
+ if temp[2] is None:
+ temp[2] = domain
+ query[0] = tuple(temp)
- # format data as table
- table = tabulate.tabulate(result, headers=["messages", "bots", "domain"], tablefmt="orgtbl")
- print(table)
+ # extend result table
+ result.extend(query)
+ # generate report if enabled
+ if self.report:
+ self.gen_report(domain, query)
+ else:
+ # in any other case return top 10
+ result = self.conn.execute('''SELECT COUNT(*) AS messages,COUNT(DISTINCT user) AS bots,domain AS domain
+ FROM spam GROUP BY domain ORDER BY 1 DESC LIMIT 10;''')
+ # format data as table
+ table = tabulate.tabulate(result, headers=["messages", "bots", "domain","first seen", "last seen"],
+ tablefmt="orgtbl")
+ print(table)
def ingest(self):
"""
@@ -73,7 +89,7 @@ class AbuseReport:
"""
magic_number = b"\x1f\x8b\x08"
- # split up list
+ # iterate over all infile elements
for element in self.infile:
try:
@@ -82,9 +98,9 @@ class AbuseReport:
content = infile.read()
except FileNotFoundError as err:
+ content = ""
print(err)
- # check file for gzip magic number
# if magic number is present decompress and decode file
if content.startswith(magic_number):
content = gzip.decompress(content).decode("utf-8")
@@ -92,6 +108,7 @@ class AbuseReport:
else:
content = content.decode("utf-8")
+ # automated run None catch
if content is not None:
self.parse(content)
@@ -143,31 +160,124 @@ class AbuseReport:
finally:
self.conn.commit()
+ def gen_report(self, domain, query):
+ try:
+ # open abuse report template file
+ with open("/".join([self.path, "template/abuse-template.txt"]), "r", encoding="utf-8") as template:
+ report_template = template.read()
+
+ except FileNotFoundError as err:
+ print(err)
+ exit(1)
+
+ # current date
+ now = dt.datetime.strftime(dt.datetime.now(), "%Y-%m-%d")
+
+ # output to report directory
+ report_filename = "abuse-{domain}-{date}.txt".format(date=now, domain=domain)
+ jids_filename = "abuse-{domain}-{date}-jids.txt".format(date=now, domain=domain)
+ logs_filename = "abuse-{domain}-{date}-logs.txt".format(date=now, domain=domain)
+
+ # write report files
+ with open("/".join([self.path, "report", report_filename]), "w", encoding="utf-8") as report_out:
+ content = self.report_template(report_template, domain, query)
+ report_out.write(content)
+
+ with open("/".join([self.path, "report", jids_filename]), "w", encoding="utf-8") as report_out:
+ content = self.report_jids(domain)
+ report_out.write(content)
+
+ with open("/".join([self.path, "report", logs_filename]), "w", encoding="utf-8") as report_out:
+ content = self.report_logs(domain)
+ report_out.write(content)
+
+ def report_template(self, template, domain, query):
+ name = Config.name
+
+ # lookup srv and domain info
+ info = self.srvlookup(domain)
+ srv = info[0]["host"]
+ ips = "".join(info[0]["ip"])
+ summary = tabulate.tabulate(query, headers=["messages", "bots", "domain","first seen", "last seen"],
+ tablefmt="orgtbl")
+
+ report_out= template.format(name=name, domain=domain, srv=srv, ips=ips, summary=summary)
+
+ return report_out
+
+ def report_jids(self, domain):
+
+ jids = self.conn.execute('''SELECT user || '@' || domain as jid FROM spam WHERE domain=:domain GROUP BY user
+ ORDER BY 1;''', {"domain": domain}).fetchall()
+
+ return tabulate.tabulate(jids, tablefmt="plain")
+
+ def report_logs(self, domain):
+ """
+
+ :param domain:
+ :return:
+ """
+ logs = self.conn.execute('''SELECT char(10)||MIN(ts)||' - '||MAX(ts)||char(10)||COUNT(*)||' messages:'||char(10)
+ ||'========================================================================'||char(10)||message||char(10)||
+ '========================================================================' FROM spam WHERE domain=:domain
+ GROUP BY message ORDER BY COUNT(*) DESC LIMIT 10;''', {"domain": domain}).fetchall()
+
+ return tabulate.tabulate(logs, tablefmt="plain")
+
+ def srvlookup(self, domain):
+ """
+ srv lookup method for the domain provided, if no srv record is found the base domain is used
+ :type domain: str
+ :param domain: provided domain to query srv records for
+ :return: sorted list of dictionaries containing host and ip info
+ """
+ # srv
+ query = '_xmpp-client._tcp.{}'.format(domain)
+
+ try:
+ srv_records = dns.query(query, 'SRV')
+
+ except (dns.NXDOMAIN, dns.NoAnswer):
+ # catch NXDOMAIN and NoAnswer tracebacks
+ srv_records = None
+
+ # extract record
+ results = list()
+
+ if srv_records is not None:
+ # extract all available records
+ for record in srv_records:
+ info = dict()
+
+ # gather necessary info from srv records
+ info["host"] = str(record.target).rstrip('.')
+ info["weight"] = record.weight
+ info["priority"] = record.priority
+ info["ip"] = [ip.address for ip in dns.query(info["host"], "A")]
+ results.append(info)
+
+ # return list sorted by priority and weight
+ return sorted(results, key=lambda i: (i['priority'], i["weight"]))
+
+ else:
+ # prevent empty info when srv records are not present
+ info = dict()
+
+ # gather necessary info from srv records
+ info["host"] = domain
+ info["ip"] = [ip.address for ip in dns.query(info["host"], "A")]
+ results.append(info)
+
+ return results
+
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-in', '--infile', nargs='+', help='set path to input file', dest='infile')
- parser.add_argument('-d', '--domain', help='specify report domain', dest='domain')
+ parser.add_argument('-d', '--domain', action='append', help='specify report domain', dest='domain')
+ parser.add_argument('-r', '--report', action='store_true', help='toggle report output to file', dest='report')
args = parser.parse_args()
# run
AbuseReport(args).main()
-
-"""
-# Top 10 Domains and their score
-SELECT COUNT(*) AS messages,COUNT(DISTINCT user) AS bots,domain AS 'domain'
-FROM spam
-GROUP BY domain
-ORDER BY 1 DESC LIMIT 10;
-
-# Most frequent messages
-SELECT COUNT(*) as count, COUNT(distinct user||domain) as bots,message
-FROM spam
-GROUP BY message HAVING bots > 1
-ORDER BY 1 DESC LIMIT 5;
-
-# report sql
-SELECT COUNT(*) AS messages,COUNT(DISTINCT user) AS bots,domain
-FROM spam
-WHERE domain="default.rs";
-"""
diff --git a/report/.gitkeep b/report/.gitkeep
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/report/.gitkeep
diff --git a/template/abuse-template.txt b/template/abuse-template.txt
new file mode 100644
index 0000000..fe729bb
--- /dev/null
+++ b/template/abuse-template.txt
@@ -0,0 +1,28 @@
+Subject: XMPP spam report for {domain} / {ips}
+
+XMPP domain: {domain}
+Server: {srv}
+Jabber IP: {ips}
+
+Hi,
+
+the above mentioned server is used as an open relay to send vast amounts
+of XMPP spam to different unrelated servers, such as the server I
+administer.
+
+Spammers are using the In-Band-Registration feature on that server to
+create a large number of accounts, and to send mass messages to my
+users.
+
+Please contact the server owner to disable In-Band-Registration, to take
+measures against spam relaying or to shut down the XMPP service.
+
+Also please find attached a list of the bot accounts and an excerpt of
+the spam messages sent to my service.
+
+{summary}
+
+
+Kind regards,
+
+{name}