diff options
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | config.py | 19 | ||||
-rwxr-xr-x | main.py | 196 | ||||
-rw-r--r-- | report/.gitkeep | 0 | ||||
-rw-r--r-- | template/abuse-template.txt | 28 |
5 files changed, 201 insertions, 44 deletions
@@ -129,4 +129,4 @@ pip-selfcheck.json # project specific files spam.db config.json -spam-*.txt* +/report/*.txt diff --git a/config.py b/config.py new file mode 100644 index 0000000..12a41a4 --- /dev/null +++ b/config.py @@ -0,0 +1,19 @@ +# -*- coding: utf-8 -*- +import json + +# try to read config.json if nonexistent create config.json an populate it +try: + with open("config.json", "r", encoding="utf-8") as f: + config = json.load(f) + +except FileNotFoundError: + with open("config.json", "w", encoding="utf-8") as f: + config = { + "name": "", + } + f.write(json.dumps(config)) + + +class Config(object): + """extract secret key to use for the webserver""" + name = config["name"] @@ -1,13 +1,17 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- import argparse +import datetime as dt +import gzip +import os import re import sqlite3 +import dns.resolver as dns import tabulate from defusedxml import ElementTree -import os -import gzip + +from config import Config class AbuseReport: @@ -16,9 +20,10 @@ class AbuseReport: def __init__(self, arguments): self.infile = arguments.infile self.domain = arguments.domain + self.report = arguments.report self.path = os.path.dirname(__file__) - self.conn = sqlite3.connect("".join([self.path, "/spam.db"])) + self.conn = sqlite3.connect("/".join([self.path, "spam.db"])) self.jid_pattern = re.compile("^(?:([^\"&'/:<>@]{1,1023})@)?([^/@]{1,1023})(?:/(.{1,1023}))?$") self.message_pattern = re.compile(r'<message.*?</message>', re.DOTALL) @@ -26,10 +31,9 @@ class AbuseReport: """ method deciding over which action to take """ - if self.infile is None: # infile unset -> report top10 - self.report() + self.egest() elif self.infile: # infile set -> ingest @@ -38,33 +42,45 @@ class AbuseReport: # close sqlite connection self.conn.close() - def report(self): + def egest(self): """ report method :return: top10 score or domain specific data """ - # if a specific domain is supplied return only that set + result = list() + + # if domain is specified return info for that domain if self.domain is not None: - # first and last time seen spam from specified domain - first = self.conn.execute("SELECT ts FROM spam WHERE domain=:domain ORDER BY ts LIMIT 1", - {"domain": self.domain}).fetchone()[0] - last = self.conn.execute("SELECT ts FROM spam WHERE domain=:domain ORDER BY ts DESC LIMIT 1", - {"domain": self.domain}).fetchone()[0] + result = list() - print("First seen : {first}\nLast seen : {last}\n".format(first=first, last=last)) + # iterate over all domains supplied + for domain in self.domain: - result = self.conn.execute('SELECT COUNT(*) AS messages,COUNT(DISTINCT user) AS bots,domain FROM spam ' - 'WHERE domain=\'{}\';'.format(self.domain)) - else: + query = self.conn.execute('''SELECT COUNT(*) AS messages,COUNT(DISTINCT user) AS bots,domain, + MIN(ts) AS first,MAX(ts) AS last FROM spam WHERE domain = :domain;''', + {"domain": domain}).fetchall() - result = self.conn.execute('SELECT COUNT(*) AS messages,COUNT(DISTINCT user) AS bots,domain AS domain ' - 'FROM spam GROUP BY domain ORDER BY 1 DESC LIMIT 10;') + # ugly tuple list swapping for nicer formatting + temp = list(query[0]) + if temp[2] is None: + temp[2] = domain + query[0] = tuple(temp) - # format data as table - table = tabulate.tabulate(result, headers=["messages", "bots", "domain"], tablefmt="orgtbl") - print(table) + # extend result table + result.extend(query) + # generate report if enabled + if self.report: + self.gen_report(domain, query) + else: + # in any other case return top 10 + result = self.conn.execute('''SELECT COUNT(*) AS messages,COUNT(DISTINCT user) AS bots,domain AS domain + FROM spam GROUP BY domain ORDER BY 1 DESC LIMIT 10;''') + # format data as table + table = tabulate.tabulate(result, headers=["messages", "bots", "domain","first seen", "last seen"], + tablefmt="orgtbl") + print(table) def ingest(self): """ @@ -73,7 +89,7 @@ class AbuseReport: """ magic_number = b"\x1f\x8b\x08" - # split up list + # iterate over all infile elements for element in self.infile: try: @@ -82,9 +98,9 @@ class AbuseReport: content = infile.read() except FileNotFoundError as err: + content = "" print(err) - # check file for gzip magic number # if magic number is present decompress and decode file if content.startswith(magic_number): content = gzip.decompress(content).decode("utf-8") @@ -92,6 +108,7 @@ class AbuseReport: else: content = content.decode("utf-8") + # automated run None catch if content is not None: self.parse(content) @@ -143,31 +160,124 @@ class AbuseReport: finally: self.conn.commit() + def gen_report(self, domain, query): + try: + # open abuse report template file + with open("/".join([self.path, "template/abuse-template.txt"]), "r", encoding="utf-8") as template: + report_template = template.read() + + except FileNotFoundError as err: + print(err) + exit(1) + + # current date + now = dt.datetime.strftime(dt.datetime.now(), "%Y-%m-%d") + + # output to report directory + report_filename = "abuse-{domain}-{date}.txt".format(date=now, domain=domain) + jids_filename = "abuse-{domain}-{date}-jids.txt".format(date=now, domain=domain) + logs_filename = "abuse-{domain}-{date}-logs.txt".format(date=now, domain=domain) + + # write report files + with open("/".join([self.path, "report", report_filename]), "w", encoding="utf-8") as report_out: + content = self.report_template(report_template, domain, query) + report_out.write(content) + + with open("/".join([self.path, "report", jids_filename]), "w", encoding="utf-8") as report_out: + content = self.report_jids(domain) + report_out.write(content) + + with open("/".join([self.path, "report", logs_filename]), "w", encoding="utf-8") as report_out: + content = self.report_logs(domain) + report_out.write(content) + + def report_template(self, template, domain, query): + name = Config.name + + # lookup srv and domain info + info = self.srvlookup(domain) + srv = info[0]["host"] + ips = "".join(info[0]["ip"]) + summary = tabulate.tabulate(query, headers=["messages", "bots", "domain","first seen", "last seen"], + tablefmt="orgtbl") + + report_out= template.format(name=name, domain=domain, srv=srv, ips=ips, summary=summary) + + return report_out + + def report_jids(self, domain): + + jids = self.conn.execute('''SELECT user || '@' || domain as jid FROM spam WHERE domain=:domain GROUP BY user + ORDER BY 1;''', {"domain": domain}).fetchall() + + return tabulate.tabulate(jids, tablefmt="plain") + + def report_logs(self, domain): + """ + + :param domain: + :return: + """ + logs = self.conn.execute('''SELECT char(10)||MIN(ts)||' - '||MAX(ts)||char(10)||COUNT(*)||' messages:'||char(10) + ||'========================================================================'||char(10)||message||char(10)|| + '========================================================================' FROM spam WHERE domain=:domain + GROUP BY message ORDER BY COUNT(*) DESC LIMIT 10;''', {"domain": domain}).fetchall() + + return tabulate.tabulate(logs, tablefmt="plain") + + def srvlookup(self, domain): + """ + srv lookup method for the domain provided, if no srv record is found the base domain is used + :type domain: str + :param domain: provided domain to query srv records for + :return: sorted list of dictionaries containing host and ip info + """ + # srv + query = '_xmpp-client._tcp.{}'.format(domain) + + try: + srv_records = dns.query(query, 'SRV') + + except (dns.NXDOMAIN, dns.NoAnswer): + # catch NXDOMAIN and NoAnswer tracebacks + srv_records = None + + # extract record + results = list() + + if srv_records is not None: + # extract all available records + for record in srv_records: + info = dict() + + # gather necessary info from srv records + info["host"] = str(record.target).rstrip('.') + info["weight"] = record.weight + info["priority"] = record.priority + info["ip"] = [ip.address for ip in dns.query(info["host"], "A")] + results.append(info) + + # return list sorted by priority and weight + return sorted(results, key=lambda i: (i['priority'], i["weight"])) + + else: + # prevent empty info when srv records are not present + info = dict() + + # gather necessary info from srv records + info["host"] = domain + info["ip"] = [ip.address for ip in dns.query(info["host"], "A")] + results.append(info) + + return results + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('-in', '--infile', nargs='+', help='set path to input file', dest='infile') - parser.add_argument('-d', '--domain', help='specify report domain', dest='domain') + parser.add_argument('-d', '--domain', action='append', help='specify report domain', dest='domain') + parser.add_argument('-r', '--report', action='store_true', help='toggle report output to file', dest='report') args = parser.parse_args() # run AbuseReport(args).main() - -""" -# Top 10 Domains and their score -SELECT COUNT(*) AS messages,COUNT(DISTINCT user) AS bots,domain AS 'domain' -FROM spam -GROUP BY domain -ORDER BY 1 DESC LIMIT 10; - -# Most frequent messages -SELECT COUNT(*) as count, COUNT(distinct user||domain) as bots,message -FROM spam -GROUP BY message HAVING bots > 1 -ORDER BY 1 DESC LIMIT 5; - -# report sql -SELECT COUNT(*) AS messages,COUNT(DISTINCT user) AS bots,domain -FROM spam -WHERE domain="default.rs"; -""" diff --git a/report/.gitkeep b/report/.gitkeep new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/report/.gitkeep diff --git a/template/abuse-template.txt b/template/abuse-template.txt new file mode 100644 index 0000000..fe729bb --- /dev/null +++ b/template/abuse-template.txt @@ -0,0 +1,28 @@ +Subject: XMPP spam report for {domain} / {ips} + +XMPP domain: {domain} +Server: {srv} +Jabber IP: {ips} + +Hi, + +the above mentioned server is used as an open relay to send vast amounts +of XMPP spam to different unrelated servers, such as the server I +administer. + +Spammers are using the In-Band-Registration feature on that server to +create a large number of accounts, and to send mass messages to my +users. + +Please contact the server owner to disable In-Band-Registration, to take +measures against spam relaying or to shut down the XMPP service. + +Also please find attached a list of the bot accounts and an excerpt of +the spam messages sent to my service. + +{summary} + + +Kind regards, + +{name} |