-
Notifications
You must be signed in to change notification settings - Fork 0
/
classes.py
executable file
·66 lines (60 loc) · 3.36 KB
/
classes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
class Candidate:
"""
class containing information about an entity candidate
"""
def __init__(self, subject, predicate="", string="", the_type=None,
lotus_score=0.0, tp_score=0.0, pr_score=0.0, ss_score=0.0, score=0.0):
self.subject = subject # e.g. John_Smith
self.predicate = predicate # e.g. dbo:label
self.string = string # e.g. "John Smith" -> the mention of an entity as found in text
self.the_type = the_type # e.g. "Person" | "http://dbpedia.org/ontology/Person"
self.lotus_score = lotus_score # rank according to lotus (e.g. 3)
self.tp_score = tp_score # score according on temporal popularity
self.pr_score = pr_score # score according to pagerank
self.ss_score = ss_score # string similarity to the entity mention
self.score = score # overall score
class EntityMention:
"""
class containing information about an entity mention
"""
def __init__(self, mention,
begin_index, end_index,
gold_link=None,
the_type=None, sentence=None, sys_link=None, score=0.0, anchor_mention=None,
total_pr=0.0, total_lotus=0.0, total_tp=0.0, gold_pr=0.0): #, exact_match=False):
self.sentence = sentence # e.g. 4 -> which sentence is the entity mentioned in
self.mention = mention # e.g. "John Smith" -> the mention of an entity as found in text
self.the_type = the_type # e.g. "Person" | "http://dbpedia.org/ontology/Person"
self.begin_index = begin_index # e.g. 15 -> begin offset
self.end_index = end_index # e.g. 25 -> end offset
self.gold_link = gold_link # gold link if existing
self.candidates = set() # candidates from LOTUS
self.sys_link = sys_link # final system link
self.anchor_mention = anchor_mention # previous coreferential mentions of the same entity
self.gold_pr = gold_pr # pagerank of the gold instance
self.total_pr = total_pr # total PageRank of all candidates
self.total_lotus = total_lotus # total LOTUS score of all candidates
self.total_tp = total_tp # total TP of all candidates
class NewsItem:
"""
class containing information about a news item
"""
def __init__(self, identifier, content="", collection=None,
dct=None, publisher=None,
domain=None):
self.identifier = identifier # string, the original document name in the dataset
self.collection = collection # which collection does it come from (one of ECB+, SignalMedia, or some WSD dataset)
self.dct = dct # e.g. "2005-05-14T02:00:00.000+02:00" -> document creation time
self.publisher = publisher # e.g. "Reuters" -> Who is the publisher of the document (e.g. Reuters)
self.domain = domain # e.g. "crime" -> one of the topics/domains (we could also use URI identifiers for these if we want to keep more information)
self.content = content # the text of the news article
self.entity_mentions = [] # set of instances of EntityMention class
class Publisher:
"""
class containing information about a publisher
"""
def __init__(self, name, dbpedia_uri, homepage, location_dbpedia_uri):
self.name = name
self.dbpedia_uri = dbpedia_uri
self.homepage = homepage
self.location_dbpedia_uri = location_dbpedia_uri