1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
|
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import argparse
import re
import sqlite3
import subprocess
from defusedxml import ElementTree
class AbuseReport:
"""Ingestation script for ejabberd spam logs"""
def __init__(self, arguments):
self.infile = arguments.infile
self.domain = arguments.domain
self.conn = sqlite3.connect('spam.db')
self.jid_pattern = re.compile("^(?:([^\"&'/:<>@]{1,1023})@)?([^/@]{1,1023})(?:/(.{1,1023}))?$")
self.message_pattern = re.compile(r'<message.*?</message>', re.DOTALL)
def main(self):
"""
method deciding over which action to take
"""
if self.infile is None:
# infile unset -> report top10
self.report()
elif self.infile:
# infile set -> ingest
self.ingest()
# close sqlite connection
self.conn.close()
def report(self):
"""
report method
:return: top10 score or domain specific data
"""
# if a specific domain is supplied return only that set
if self.domain is not None:
sql = 'sqlite3 -column -header spam.db "SELECT COUNT(*) AS messages,COUNT(DISTINCT user) AS bots,domain ' \
'FROM spam WHERE domain=\'{}\';"'.format(self.domain)
else:
sql = 'sqlite3 -column -header spam.db "SELECT COUNT(*) AS messages,COUNT(DISTINCT user) AS bots,domain AS domain ' \
'FROM spam GROUP BY domain ORDER BY 1 DESC LIMIT 10;"'
print(subprocess.getoutput(sql))
def ingest(self):
"""
method to ingest xml messages into sqlite database
"""
try:
with open(self.infile, "r", encoding="utf-8") as spam:
log = re.findall(self.message_pattern, spam.read())
self.db_import(log)
except FileNotFoundError as err:
print(err)
exit(1)
def db_import(self, message_log):
"""
import xml stanzas into database
:param message_log: xml messages
"""
self.conn.execute('''CREATE TABLE IF NOT EXISTS "spam" ("user" TEXT, "domain" TEXT, "ts" TEXT, "message" TEXT,
PRIMARY KEY("domain","ts"));''')
for message in message_log:
message_parsed = ElementTree.fromstring(message)
# parse from tag
spam_from = message_parsed.get('from')
match = self.jid_pattern.match(spam_from)
(node, domain, resource) = match.groups()
# stamp
spam_time = message_parsed.find('.//{urn:xmpp:delay}delay').get('stamp')
# body
spam_body = message_parsed.find('{jabber:client}body')
if spam_body is not None:
spam_body = spam_body.text
# format sql
sql = 'INSERT INTO spam("user", "domain", "ts", "message") VALUES("{}", "{}", "{}", "{}");'.format(
node, domain, spam_time, spam_body
)
try:
self.conn.execute(sql)
except sqlite3.IntegrityError:
pass
finally:
self.conn.commit()
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('-in', '--infile', help='set path to input file', dest='infile')
parser.add_argument('-d', '--domain', help='specify report domain', dest='domain')
args = parser.parse_args()
# run
AbuseReport(args).main()
"""
# Top 10 Domains and their score
SELECT COUNT(*) AS messages,COUNT(DISTINCT user) AS bots,domain AS 'domain'
FROM spam
GROUP BY domain
ORDER BY 1 DESC LIMIT 10;
# Most frequent messages
SELECT COUNT(*) as count, COUNT(distinct user||domain) as bots,message
FROM spam
GROUP BY message HAVING bots > 1
ORDER BY 1 DESC LIMIT 5;
# report sql
SELECT COUNT(*) AS messages,COUNT(DISTINCT user) AS bots,domain
FROM spam
WHERE domain="default.rs";
"""
|