From 799c8a0e426f2f30e5f98b05446690528dac7f99 Mon Sep 17 00:00:00 2001 From: Timofei Semenov Date: Thu, 24 Aug 2017 18:12:11 +0000 Subject: [PATCH 01/17] some code for experiments --- embeddings/build_vocab.py | 77 +++++++++++++++++ embeddings/glove.py | 77 +++++++++++++++++ embeddings/map_reduce.py | 76 +++++++++++++++++ embeddings/move_prox.py | 18 ++++ embeddings/node2vec.py | 141 +++++++++++++++++++++++++++++++ embeddings/random_walk.py | 171 ++++++++++++++++++++++++++++++++++++++ embeddings/role_model.py | 114 +++++++++++++++++++++++++ embeddings/role_train.py | 22 +++++ 8 files changed, 696 insertions(+) create mode 100644 embeddings/build_vocab.py create mode 100644 embeddings/glove.py create mode 100644 embeddings/map_reduce.py create mode 100644 embeddings/move_prox.py create mode 100644 embeddings/node2vec.py create mode 100644 embeddings/random_walk.py create mode 100644 embeddings/role_model.py create mode 100644 embeddings/role_train.py diff --git a/embeddings/build_vocab.py b/embeddings/build_vocab.py new file mode 100644 index 0000000..ac3e880 --- /dev/null +++ b/embeddings/build_vocab.py @@ -0,0 +1,77 @@ +import argparse +from collections import Counter +import logging + +from ast2vec.token_parser import TokenParser +from ast2vec.uast import UASTModel +from map_reduce import MapReduce + + +class Vocab(MapReduce): + def __init__(self, log_level, num_processes, vocab_path): + super(Vocab, self).__init__(log_level=log_level, num_processes=num_processes) + self.token_parser = TokenParser() + if vocab_path is None: + self.vocab_path = "vocab.txt" + else: + self.vocab_path = vocab_path + + def create(self, files): + vocab = Counter() + + @MapReduce.wrap_queue_in + def uasts_vocab(self, filename): + uast_model = UASTModel().load(filename) + tokens = Counter() + for uast in uast_model.uasts: + nodes = [uast] + while nodes: + node = nodes.pop() + tokens.update(self._get_tokens(node)) + nodes.extend(node.children) + return tokens + + @MapReduce.wrap_queue_out + def combine_vocab(result): + nonlocal vocab + vocab.update(result) + + self.parallelize(files, uasts_vocab, combine_vocab) + self.save_vocab(self.vocab_path, vocab) + return vocab + + @staticmethod + def read_vocab(vocab_path): + with open(vocab_path) as fin: + words = [line.split(" ")[0] for line in fin] + return words + + @staticmethod + def save_vocab(vocab_path, vocab): + with open(vocab_path, "w") as fout: + fout.write("\n".join(map(lambda x: "%s %d" % x, vocab.most_common()))) + + def _get_log_name(self): + return "Vocab" + + def _get_tokens(self, uast_node): + return ["RoleId_%d" % role for role in uast_node.roles] + \ + list(self.token_parser.process_token(uast_node.token)) + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--log-level", default="INFO", choices=logging._nameToLevel, + help="Logging verbosity.") + parser.add_argument("input", help="Input file with UASTs.") + parser.add_argument("output", help="Path to store vocabulary.") + parser.add_argument("--processes", type=int, default=2, help="Number of processes.") + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + + uasts = open(args.input).read().split("\n") + vocab = Vocab(args.log_level, args.processes, args.output) + vocab.create(uasts) diff --git a/embeddings/glove.py b/embeddings/glove.py new file mode 100644 index 0000000..fdcdef7 --- /dev/null +++ b/embeddings/glove.py @@ -0,0 +1,77 @@ +import argparse +from collections import Counter +import logging +import os +from pathlib import Path +import struct + +from ast2vec.coocc import Cooccurrences +from build_vocab import Vocab +from map_reduce import MapReduce + + +class GloVe(MapReduce): + def __init__(self, log_level, num_processes, vocab_path): + super(GloVe, self).__init__(log_level=log_level, num_processes=num_processes) + self.vocab = {word: i for i, word in enumerate(Vocab.read_vocab(vocab_path))} + + def convert(self, src_dir, output, file_filter): + self._log.info("Scanning %s", src_dir) + files = [str(p) for p in Path(src_dir).glob(file_filter)] + self._log.info("Found %d files", len(files)) + if not files: + return 0 + + self._log.info("Combine proximity matrices.") + mat = self.extract(files) + self._log.info("Finished combining.") + + self._log.info("Saving matrix.") + self.save_mat(mat, output) + + def extract(self, files): + counter = Counter() + + @MapReduce.wrap_queue_in + def process_prox(self, filename): + prox = Cooccurrences().load(filename) + return {(prox.tokens[i], prox.tokens[j]): val for + i, j, val in zip(prox.matrix.row, prox.matrix.col, prox.matrix.data)} + + @MapReduce.wrap_queue_out + def combine_prox(result): + nonlocal counter + counter.update( + {(self.vocab[i], self.vocab[j]): val for (i, j), val in result.items() + if i in self.vocab and j in self.vocab}) + + self.parallelize(files, process_prox, combine_prox) + return counter + + @staticmethod + def save_mat(mat, output): + with open(output, "wb") as fout: + for (i, j), val in mat.items(): + fout.write(struct.pack("iid", i, j, int(val))) + + def _get_log_name(self): + return "GloVe" + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--log-level", default="INFO", choices=logging._nameToLevel, + help="Logging verbosity.") + parser.add_argument("input", help="Input directory with proximity matrices.") + parser.add_argument("output", help="Path to store combined proximity matrix.") + parser.add_argument("--filter", default="**/*.asdf", help="File name glob selector.") + parser.add_argument("--processes", type=int, default=2, help="Number of processes.") + parser.add_argument("--vocabulary", default="vocab.txt", help="File with vocabulary.") + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + + glove = GloVe(args.log_level, args.processes, args.vocabulary) + glove.convert(args.input, args.output, args.filter) diff --git a/embeddings/map_reduce.py b/embeddings/map_reduce.py new file mode 100644 index 0000000..4293975 --- /dev/null +++ b/embeddings/map_reduce.py @@ -0,0 +1,76 @@ +import multiprocessing +import time + +from ast2vec.pickleable_logger import PickleableLogger +from modelforge.progress_bar import progress_bar + + +class MapReduce(PickleableLogger): + def __init__(self, log_level, num_processes): + super(MapReduce, self).__init__(log_level=log_level) + self.num_processes = num_processes + + def parallelize(self, tasks, process_queue_in, process_queue_out): + queue_in = multiprocessing.Manager().Queue() + queue_out = multiprocessing.Manager().Queue(100) + processes = [multiprocessing.Process(target=process_queue_in, + args=(self, queue_in, queue_out)) + for i in range(self.num_processes)] + for p in processes: + p.start() + for t in tasks: + queue_in.put(t) + for _ in processes: + queue_in.put(None) + failures = process_queue_out(self, len(tasks), queue_out) + for p in processes: + p.join() + self._log.info("Finished, %d failed tasks", failures) + return len(tasks) - failures + + @staticmethod + def read_vocab(vocab_path): + with open(vocab_path) as fin: + words = [line.split(" ")[0] for line in fin] + return words + + @staticmethod + def save_vocab(vocab_path, vocab): + with open(vocab_path, "w") as fout: + fout.write("\n".join( + map(lambda x: "%s %d".join(x), vocab.most_common()))) + + @staticmethod + def wrap_queue_in(func): + def wrapper(self, queue_in, queue_out): + while True: + item = queue_in.get() + if item is None: + break + try: + queue_out.put(func(self, item)) + except: + self._log.exception("%s failed", item) + queue_out.put(None) + return wrapper + + @staticmethod + def wrap_queue_out(func): + def wrapper(self, n_tasks, queue_out): + failures = 0 + start = time.time() + + for i in range(n_tasks): + result = queue_out.get() + if i % 1000 == 0: + print(i, time.time() - start) + if result is None: + failures += 1 + continue + func(result) + + return failures + return wrapper + + def _get_log_name(self): + return "MapReduce" diff --git a/embeddings/move_prox.py b/embeddings/move_prox.py new file mode 100644 index 0000000..c95be87 --- /dev/null +++ b/embeddings/move_prox.py @@ -0,0 +1,18 @@ +import os +import shutil + +PROX_DIR = "/storage/timofei/role2vec/libs/ast2vec/uast_prox" +OUT_DIR = "/storage/timofei/role2vec/embeddings" + + +for fname in ["train", "valid", "test"]: + with open("uasts_{}.txt".format(fname)) as fin: + for line in fin: + line = line.strip() + uname = line[line.rfind("/") + 1:] + letter = uname[len("uast_"):len("uast_") + 1] + pname = os.path.join(PROX_DIR, letter, uname) + if os.path.exists(pname): + oname = os.path.join(OUT_DIR, "prox_{}".format(fname), letter) + os.makedirs(oname, exist_ok=True) + shutil.move(pname, oname) diff --git a/embeddings/node2vec.py b/embeddings/node2vec.py new file mode 100644 index 0000000..49ec265 --- /dev/null +++ b/embeddings/node2vec.py @@ -0,0 +1,141 @@ +import argparse +from collections import Counter, deque +from functools import partial +from itertools import chain, islice, product, tee +import logging +import os +from pathlib import Path +import time + +from gensim.models import Word2Vec +from modelforge.progress_bar import progress_bar + +from ast2vec.uast import UASTModel +from build_vocab import Vocab +from map_reduce import MapReduce +from random_walk import Graph + + +class Node2Vec(MapReduce): + MAX_VOCAB_WORDS = 1000000 + + def __init__(self, log_level, dimensions, num_processes, vocab_path, window, graph): + super(Node2Vec, self).__init__(log_level=log_level, num_processes=num_processes) + self.graph = graph + self.word2vec = Word2Vec(size=dimensions, window=window, workers=8) + self.word2vec.build_vocab(Vocab.read_vocab(vocab_path)[:self.MAX_VOCAB_WORDS]) + + def train(self, fname, output): + # print("\n\n----- KEK -----\n\n") + self._log.info("Scanning %s", fname) + files = [line.strip() for line in open(fname).readlines()] + self._log.info("Found %d files", len(files)) + if not files: + return 0 + + self._log.info("Train model.") + self._train(files) + self._log.info("Finished training.") + + self._log.info("Saving model.") + self.word2vec.wv.save_word2vec_format(output) + + def _train(self, files): + @MapReduce.wrap_queue_in + def process_uast(self, filename): + uast = UASTModel().load(filename) + # print("\n\n----- LOL -----\n\n", filename) + return self.graph.simulate_walks(uast) + + def train_walks(self, n_tasks, queue_out): + failures = 0 + + def consume(iterator, n): + """Advance the iterator n-steps ahead. If n is none, consume entirely.""" + # Use functions that consume iterators at C speed. + if n is None: + # feed the entire iterator into a zero-length deque + deque(iterator, maxlen=0) + else: + # advance to the empty slice starting at position n + next(islice(iterator, n, n), None) + + def window(iterable, n=2): + """s -> (s0, ...,s(n-1)), (s1, ...,sn), (s2, ..., s(n+1)), ...""" + iters = tee(iterable, n) + # Could use enumerate(islice(iters, 1, None), 1) to avoid consume(it, 0), but + # that's slower for larger window sizes, while saving only small fixed "noop" cost + for i, it in enumerate(iters): + consume(it, i) + return zip(*iters) + + def batch_stream(): + nonlocal failures + i = 0 + start = time.time() + + for _ in progress_bar(range(n_tasks), self._log, expected_size=n_tasks): + result = queue_out.get() + if result: + for walk in result: + walk = [list(map(str, node.tokens)) for node in walk] + for walk_window in window(walk, n=self.word2vec.window): + yield list(product(*walk_window)) + i += 1 + if i % 10000 == 0: + print(i, time.time() - start) + else: + failures += 1 + + self.word2vec.train( + batch_stream(), + total_examples=1000000, + epochs=self.word2vec.iter) + return failures + + # walks = [] + + # @MapReduce.wrap_queue_out + # def train_walks(res_walks): + # nonlocal walks + # res_walks = list(chain.from_iterable( + # product(*(map(str, node.tokens) for node in walk)) for walk in res_walks)) + # walks.extend(res_walks) + + self.parallelize(files, process_uast, train_walks) + # self.word2vec.train(walks, total_examples=len(walks), epochs=self.word2vec.iter) + + def _get_log_name(self): + return "Node2Vec" + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--log-level", default="INFO", choices=logging._nameToLevel, + help="Logging verbosity.") + parser.add_argument("input", help="Input file with UASTs.") + parser.add_argument("output", help="Path to store the result model.") + parser.add_argument("--dimensions", default=300, help="Dimensionality of embeddings.") + parser.add_argument("--processes", type=int, default=1, help="Number of processes.") + parser.add_argument("--vocabulary", default="vocab.txt", help="File with vocabulary.") + parser.add_argument( + "-n", "--num-walks", type=int, default=1, help="Number of random walks from each node.") + parser.add_argument( + "-l", "--walk-length", type=int, default=80, help="Length of each random walk.") + parser.add_argument( + "-w", "--window", type=int, default=5, help="Window size for node context.") + parser.add_argument( + "-p", type=float, default=1.0, + help="Controls the likelihood of immediately revisiting previous node.") + parser.add_argument( + "-q", type=float, default=1.0, help="Controls the likelihood of exploring outward nodes.") + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + + graph = Graph(args.log_level, args.num_walks, args.walk_length, args.p, args.q) + node2vec = Node2Vec(args.log_level, args.dimensions, args.processes, + args.vocabulary, args.window, graph) + node2vec.train(args.input, args.output) diff --git a/embeddings/random_walk.py b/embeddings/random_walk.py new file mode 100644 index 0000000..330d0db --- /dev/null +++ b/embeddings/random_walk.py @@ -0,0 +1,171 @@ +from collections import namedtuple +import random + +import numpy as np + +from ast2vec.pickleable_logger import PickleableLogger +from ast2vec.token_parser import TokenParser + +GraphNode = namedtuple("GraphNode", ["id", "neighbors", "tokens"]) + + +class Graph(PickleableLogger): + def __init__(self, log_level, num_walks, walk_length, p, q): + assert walk_length > 1, "Random walks have at least two nodes." + + super(Graph, self).__init__(log_level=log_level) + self.num_walks = num_walks + self.walk_length = walk_length + self.p = 1 / p + self.q = 1 / q + self.token_parser = TokenParser() + + def node2vec_walk(self, start_node, edges, nodes): + """ + Simulate a random walk starting from start node. + """ + walk = [None] * self.walk_length + walk[0] = start_node + walk[1] = nodes[start_node.neighbors[int(np.random.rand() * len(start_node.neighbors))]] + + for i in range(2, self.walk_length): + cur_node = walk[i - 1] + prev_node = walk[i - 2] + walk[i] = nodes[cur_node.neighbors[alias_draw(*edges[(prev_node.id, cur_node.id)])]] + + return walk + + def simulate_walks(self, uasts): + """ + Repeatedly simulate random walks from each node. + """ + all_walks = [] + + for uast, filename in zip(uasts.uasts, uasts.filenames): + nodes, edges = self._preprocess_uast(uast) + n_nodes = len(nodes) + + if n_nodes == 1: + self._log.info( + "Skipping UAST for %s: has a single node." % filename) + continue + + self._preprocess_transition_probs(nodes, edges) + walks = [None] * (n_nodes * self.num_walks) + self._log.info("Walk iteration:") + + for walk_iter in range(self.num_walks): + self._log.info("%d/%d" % (walk_iter + 1, self.num_walks)) + for i, node in enumerate(nodes): + walks[n_nodes * walk_iter + i] = self.node2vec_walk(node, edges, nodes) + + all_walks.extend(walks) + + random.shuffle(all_walks) + return all_walks + + def _get_alias_edge(self, src_id, dst_id, edges, nodes): + """ + Get the alias edge setup lists for a given edge. + """ + unnormalized_probs = [ + self.p if dst_nbr == src_id else + 1 if (dst_nbr, src_id) in edges else + self.q for dst_nbr in nodes[dst_id].neighbors + ] + norm_const = sum(unnormalized_probs) + normalized_probs = [u_prob / norm_const for u_prob in unnormalized_probs] + + return alias_setup(normalized_probs) + + def _get_log_name(self): + return "Graph" + + def _get_tokens(self, uast_node): + return ["RoleId_%d" % role for role in uast_node.roles] + \ + list(self.token_parser.process_token(uast_node.token)) + + def _preprocess_transition_probs(self, nodes, edges): + """ + Preprocessing of transition probabilities for guiding the random walks. + """ + self._log.info("Preprocessing transition probabilities.") + for edge in edges: + edges[edge] = self._get_alias_edge(edge[0], edge[1], edges, nodes) + + def _preprocess_uast(self, root): + """ + Add neighbors information to UAST nodes. + """ + def create_node(node, id): + return GraphNode(id=id, neighbors=[], tokens=self._get_tokens(node)) + + self._log.info("Preprocessing UAST nodes.") + root_node = create_node(root, 0) + edges = {} + queue = [(root, 0)] + nodes = [root_node] + n_nodes = 1 + + while queue: + node, node_idx = queue.pop() + for child in node.children: + nodes.append(create_node(child, n_nodes)) + nodes[n_nodes].neighbors.append(node_idx) + nodes[node_idx].neighbors.append(n_nodes) + edges[(node_idx, n_nodes)] = edges[(n_nodes, node_idx)] = None + queue.append((child, n_nodes)) + n_nodes += 1 + + return nodes, edges + + +def alias_setup(probs): + """ + Compute utility lists for non-uniform sampling from discrete distributions. + Refer to https://hips.seas.harvard.edu/blog/2013/03/03/the-alias-method-efficient-sampling-with-many-discrete-outcomes/ + for details + """ + K = len(probs) + q = np.zeros(K) + J = np.zeros(K, dtype=np.int) + + # Sort the data into the outcomes with probabilities that are larger and smaller than 1/K. + smaller = [] + larger = [] + for kk, prob in enumerate(probs): + q[kk] = K * prob + if q[kk] < 1.0: + smaller.append(kk) + else: + larger.append(kk) + + # Loop through and create little binary mixtures that appropriately allocate the larger + # outcomes over the overall uniform mixture. + while len(smaller) > 0 and len(larger) > 0: + small = smaller.pop() + large = larger.pop() + + J[small] = large + q[large] = q[large] + q[small] - 1.0 + if q[large] < 1.0: + smaller.append(large) + else: + larger.append(large) + + return J, q + + +def alias_draw(J, q): + """ + Draw sample from a non-uniform discrete distribution using alias sampling. + """ + # Draw from the overall uniform mixture. + kk = int(np.random.rand() * len(J)) + + # Draw from the binary mixture, either keeping the small one, or choosing the associated + # larger one. + if np.random.rand() < q[kk]: + return kk + else: + return J[kk] diff --git a/embeddings/role_model.py b/embeddings/role_model.py new file mode 100644 index 0000000..fe6e223 --- /dev/null +++ b/embeddings/role_model.py @@ -0,0 +1,114 @@ +from collections import namedtuple +from itertools import chain + +import numpy as np +from sklearn.dummy import DummyClassifier +from sklearn.externals import joblib +from sklearn.neural_network import MLPClassifier + +from ast2vec.token_parser import TokenParser +from ast2vec.uast import UASTModel +from map_reduce import MapReduce + +Node = namedtuple("Node", ["id", "parent", "children", "roles", "tokens"]) + + +class RoleModel(MapReduce): + def __init__(self, log_level, num_processes, emb_path, model_path): + super(RoleModel, self).__init__(log_level=log_level, num_processes=num_processes) + self.emb, self.roles = self.load_emb(emb_path) + self.model = None + self.path = model_path + self.token_parser = TokenParser() + + def train(self, fname): + self._log.info("Scanning %s", fname) + files = [line.strip() for line in open(fname).readlines()] + self._log.info("Found %d files", len(files)) + if not files: + return 0 + + self._log.info("Train model.") + self.model = self._train(files) + self._log.info("Finished training.") + + self._log.info("Saving model.") + joblib.dump(self.model, self.path) + + def test(self): + self._log.info("Loading model.") + self.model = joblib.load(self.path) + + def _train(self, files): + model = MLPClassifier(random_state=1, verbose=True) + dummies = [DummyClassifier(s, random_state=1) + for s in ["stratified", "most_frequent", "uniform"]] + model.classes_ = sorted(self.roles.values()) + # classes = sorted(self.roles.values()) + + @MapReduce.wrap_queue_in + def process_uast(self, filename): + X, y = [], [] + uast_model = UASTModel().load(filename) + + for uast in uast_model.uasts: + queue = [(uast, 0)] + node_vecs = [self.mean_vec([uast])] + n_nodes = 1 + + while queue: + node, node_idx = queue.pop() + for child in node.children: + child_vec = self.mean_vec([child]) + # add child to dataset + if child.children and child_vec is not None: + labels = np.zeros(len(self.roles), dtype=np.int8) + labels[[self.roles["RoleId_%d" % role] for role in child.roles]] = 1 + X.append(np.concatenate( + (self.mean_vec(child.children), node_vecs[node_idx]))) + y.append(labels) + queue.append((child, n_nodes)) + node_vecs.append(child_vec) + n_nodes += 1 + + return X, y + + data_X, data_y = [], [] + @MapReduce.wrap_queue_out + def train_uast(result): + nonlocal model, data_X, data_y + X, y = result + data_X.extend(X), data_y.extend(y) + # model.partial_fit(X, y, classes) + # print(model.loss_) + + self.parallelize(files, process_uast, train_uast) + np.savetxt("X.txt", data_X) + np.savetxt("y.txt", data_y) + # model.fit(data_X, data_y) + # for d in dummies: + # d.fit(data_X, data_y) + # print(model.score(data_X, data_y), *(d.score(data_X, data_y) for d in dummies)) + return model + + def mean_vec(self, nodes): + vecs = [self.emb[t] for node in nodes for t in chain(node.token, + ["RoleId_%d" % role for role in node.roles]) if t in self.emb] + if vecs: + return np.mean(vecs, axis=0) + return None + + @staticmethod + def load_emb(emb_path): + emb = {} + roles = [] + + with open(emb_path) as fin: + for line in fin: + word, *vec = line.split("\t") + emb[word] = np.array(vec, dtype=np.float) + if word.startswith("RoleId_"): + roles.append(word) + + roles = {role: i for i, role in enumerate(roles)} + return emb, roles diff --git a/embeddings/role_train.py b/embeddings/role_train.py new file mode 100644 index 0000000..a5fbc2f --- /dev/null +++ b/embeddings/role_train.py @@ -0,0 +1,22 @@ +import argparse +import logging + +from role_model import RoleModel + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--log-level", default="INFO", choices=logging._nameToLevel, + help="Logging verbosity.") + parser.add_argument("input", help="Input file with UASTs.") + parser.add_argument("output", help="Path to store trained model.") + parser.add_argument("--processes", type=int, default=2, help="Number of processes.") + parser.add_argument("--embeddings", help="File with roles and tokens embeddings.") + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + + rm = RoleModel(args.log_level, args.processes, args.embeddings, args.output) + rm.train(args.input) From b14c663d0dab76a9aef5007af87452d43084682a Mon Sep 17 00:00:00 2001 From: Timofei Semenov Date: Tue, 5 Sep 2017 18:33:04 +0300 Subject: [PATCH 02/17] add testing pipeline --- embeddings/map_reduce.py | 3 +- embeddings/role_model.py | 159 ++++++++++--------- embeddings/{role_train.py => role_runner.py} | 17 +- 3 files changed, 101 insertions(+), 78 deletions(-) rename embeddings/{role_train.py => role_runner.py} (58%) diff --git a/embeddings/map_reduce.py b/embeddings/map_reduce.py index 4293975..5993af5 100644 --- a/embeddings/map_reduce.py +++ b/embeddings/map_reduce.py @@ -2,7 +2,6 @@ import time from ast2vec.pickleable_logger import PickleableLogger -from modelforge.progress_bar import progress_bar class MapReduce(PickleableLogger): @@ -67,7 +66,7 @@ def wrapper(self, n_tasks, queue_out): if result is None: failures += 1 continue - func(result) + func(self, result) return failures return wrapper diff --git a/embeddings/role_model.py b/embeddings/role_model.py index fe6e223..caca22c 100644 --- a/embeddings/role_model.py +++ b/embeddings/role_model.py @@ -1,8 +1,9 @@ from collections import namedtuple from itertools import chain +import os +import time import numpy as np -from sklearn.dummy import DummyClassifier from sklearn.externals import joblib from sklearn.neural_network import MLPClassifier @@ -14,89 +15,60 @@ class RoleModel(MapReduce): - def __init__(self, log_level, num_processes, emb_path, model_path): + def __init__(self, log_level, num_processes, emb_path): super(RoleModel, self).__init__(log_level=log_level, num_processes=num_processes) self.emb, self.roles = self.load_emb(emb_path) self.model = None - self.path = model_path self.token_parser = TokenParser() + def save(self, model_path): + if self.model is None: + raise ValueError("Model is empty.") + self._log.info("Saving model to %s.", model_path) + joblib.dump(self.model, model_path) + + def load(self, model_path): + if not os.path.exists(model_path): + raise ValueError("Provided path to model doesn't exist: %s", model_path) + self.model = joblib.load(model_path) + def train(self, fname): - self._log.info("Scanning %s", fname) - files = [line.strip() for line in open(fname).readlines()] - self._log.info("Found %d files", len(files)) - if not files: - return 0 + files = self._read(fname) self._log.info("Train model.") - self.model = self._train(files) + self.model = MLPClassifier(random_state=1, verbose=True) + self.model.classes_ = sorted(self.roles.values()) + counter = 0 + start = time.time() + + @MapReduce.wrap_queue_out + def train_uast(self, result): + nonlocal counter, start + X, y = result + counter += 1 + self.model.partial_fit(X, y) + print(self.model.loss_, time.time() - start, counter) + + self.parallelize(files, _process_uast, train_uast) self._log.info("Finished training.") - self._log.info("Saving model.") - joblib.dump(self.model, self.path) - - def test(self): - self._log.info("Loading model.") - self.model = joblib.load(self.path) - - def _train(self, files): - model = MLPClassifier(random_state=1, verbose=True) - dummies = [DummyClassifier(s, random_state=1) - for s in ["stratified", "most_frequent", "uniform"]] - model.classes_ = sorted(self.roles.values()) - # classes = sorted(self.roles.values()) - - @MapReduce.wrap_queue_in - def process_uast(self, filename): - X, y = [], [] - uast_model = UASTModel().load(filename) - - for uast in uast_model.uasts: - queue = [(uast, 0)] - node_vecs = [self.mean_vec([uast])] - n_nodes = 1 - - while queue: - node, node_idx = queue.pop() - for child in node.children: - child_vec = self.mean_vec([child]) - # add child to dataset - if child.children and child_vec is not None: - labels = np.zeros(len(self.roles), dtype=np.int8) - labels[[self.roles["RoleId_%d" % role] for role in child.roles]] = 1 - X.append(np.concatenate( - (self.mean_vec(child.children), node_vecs[node_idx]))) - y.append(labels) - queue.append((child, n_nodes)) - node_vecs.append(child_vec) - n_nodes += 1 - - return X, y - - data_X, data_y = [], [] + def test(self, fname): + files = self._read(fname) + + self._log.info("Test model.") + y_real, y_pred = [], [] + @MapReduce.wrap_queue_out - def train_uast(result): - nonlocal model, data_X, data_y + def test_uast(self, result): + nonlocal y_real, y_pred X, y = result - data_X.extend(X), data_y.extend(y) - # model.partial_fit(X, y, classes) - # print(model.loss_) - - self.parallelize(files, process_uast, train_uast) - np.savetxt("X.txt", data_X) - np.savetxt("y.txt", data_y) - # model.fit(data_X, data_y) - # for d in dummies: - # d.fit(data_X, data_y) - # print(model.score(data_X, data_y), *(d.score(data_X, data_y) for d in dummies)) - return model - - def mean_vec(self, nodes): - vecs = [self.emb[t] for node in nodes for t in chain(node.token, - ["RoleId_%d" % role for role in node.roles]) if t in self.emb] - if vecs: - return np.mean(vecs, axis=0) - return None + y_real.extend(y) + y_pred.extend(self.model.predict_proba(X)) + + self.parallelize(files, _process_uast, test_uast) + np.save("y_real.npy", y_real) + np.save("y_pred.npy", y_pred) + self._log.info("Finished testing.") @staticmethod def load_emb(emb_path): @@ -112,3 +84,46 @@ def load_emb(emb_path): roles = {role: i for i, role in enumerate(roles)} return emb, roles + + def _mean_vec(self, nodes): + vecs = [self.emb[t] for node in nodes for t in chain(node.token, + ["RoleId_%d" % role for role in node.roles]) if t in self.emb] + if vecs: + return np.mean(vecs, axis=0) + return None + + def _read(self, fname): + self._log.info("Scanning %s", fname) + files = [line.strip() for line in open(fname).readlines()] + self._log.info("Found %d files", len(files)) + if not files: + raise ValueError("Make sure the file is not empty!") + return files + + +@MapReduce.wrap_queue_in +def _process_uast(self, filename): + X, y = [], [] + uast_model = UASTModel().load(filename) + + for uast in uast_model.uasts: + queue = [(uast, 0)] + node_vecs = [self._mean_vec([uast])] + n_nodes = 1 + + while queue: + node, node_idx = queue.pop() + for child in node.children: + child_vec = self._mean_vec([child]) + grandchild_vec = self._mean_vec(child.children) + # add child to dataset + if child.children and child_vec is not None and grandchild_vec is not None: + labels = np.zeros(len(self.roles), dtype=np.int8) + labels[[self.roles["RoleId_%d" % role] for role in child.roles]] = 1 + X.append(np.concatenate((grandchild_vec, node_vecs[node_idx]))) + y.append(labels) + queue.append((child, n_nodes)) + node_vecs.append(child_vec) + n_nodes += 1 + + return X, y diff --git a/embeddings/role_train.py b/embeddings/role_runner.py similarity index 58% rename from embeddings/role_train.py rename to embeddings/role_runner.py index a5fbc2f..c36a14f 100644 --- a/embeddings/role_train.py +++ b/embeddings/role_runner.py @@ -8,8 +8,9 @@ def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("--log-level", default="INFO", choices=logging._nameToLevel, help="Logging verbosity.") - parser.add_argument("input", help="Input file with UASTs.") - parser.add_argument("output", help="Path to store trained model.") + parser.add_argument("--train", help="Input file with UASTs for training.") + parser.add_argument("--test", help="Input file with UASTs for testing.") + parser.add_argument("--model", required=True, help="Path to store trained model.") parser.add_argument("--processes", type=int, default=2, help="Number of processes.") parser.add_argument("--embeddings", help="File with roles and tokens embeddings.") return parser.parse_args() @@ -18,5 +19,13 @@ def parse_args(): if __name__ == "__main__": args = parse_args() - rm = RoleModel(args.log_level, args.processes, args.embeddings, args.output) - rm.train(args.input) + rm = RoleModel(args.log_level, args.processes, args.embeddings) + + if args.train: + rm.train(args.train) + rm.save(args.model) + else: + rm.load(args.model) + + if args.test: + rm.test(args.test) From ec3787421a46325d489491fc97709c0e485af51a Mon Sep 17 00:00:00 2001 From: Timofei Semenov Date: Wed, 6 Sep 2017 01:42:58 +0300 Subject: [PATCH 03/17] add statistics calculator --- embeddings/map_reduce.py | 7 +++++ embeddings/role_model.py | 24 ++++++---------- embeddings/role_statistics.py | 53 +++++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 16 deletions(-) create mode 100644 embeddings/role_statistics.py diff --git a/embeddings/map_reduce.py b/embeddings/map_reduce.py index 5993af5..5ae70ec 100644 --- a/embeddings/map_reduce.py +++ b/embeddings/map_reduce.py @@ -33,6 +33,13 @@ def read_vocab(vocab_path): words = [line.split(" ")[0] for line in fin] return words + @staticmethod + def read_paths(fname): + paths = [line.strip() for line in open(fname).readlines()] + if not paths: + raise ValueError("Make sure the file is not empty!") + return paths + @staticmethod def save_vocab(vocab_path, vocab): with open(vocab_path, "w") as fout: diff --git a/embeddings/role_model.py b/embeddings/role_model.py index caca22c..f5c614b 100644 --- a/embeddings/role_model.py +++ b/embeddings/role_model.py @@ -33,7 +33,7 @@ def load(self, model_path): self.model = joblib.load(model_path) def train(self, fname): - files = self._read(fname) + files = self.read_paths(fname) self._log.info("Train model.") self.model = MLPClassifier(random_state=1, verbose=True) @@ -53,7 +53,7 @@ def train_uast(self, result): self._log.info("Finished training.") def test(self, fname): - files = self._read(fname) + files = self.read_paths(fname) self._log.info("Test model.") y_real, y_pred = [], [] @@ -86,19 +86,11 @@ def load_emb(emb_path): return emb, roles def _mean_vec(self, nodes): - vecs = [self.emb[t] for node in nodes for t in chain(node.token, - ["RoleId_%d" % role for role in node.roles]) if t in self.emb] - if vecs: - return np.mean(vecs, axis=0) - return None - - def _read(self, fname): - self._log.info("Scanning %s", fname) - files = [line.strip() for line in open(fname).readlines()] - self._log.info("Found %d files", len(files)) - if not files: - raise ValueError("Make sure the file is not empty!") - return files + tokens = [t for node in nodes for t in chain(node.token, + ["RoleId_%d" % role for role in node.roles]) if t in self.emb] + if not tokens: + return None, 0 + return np.mean([self.emb[t] for t in tokens], axis=0), len(tokens) @MapReduce.wrap_queue_in @@ -117,7 +109,7 @@ def _process_uast(self, filename): child_vec = self._mean_vec([child]) grandchild_vec = self._mean_vec(child.children) # add child to dataset - if child.children and child_vec is not None and grandchild_vec is not None: + if child_vec is not None and grandchild_vec is not None: labels = np.zeros(len(self.roles), dtype=np.int8) labels[[self.roles["RoleId_%d" % role] for role in child.roles]] = 1 X.append(np.concatenate((grandchild_vec, node_vecs[node_idx]))) diff --git a/embeddings/role_statistics.py b/embeddings/role_statistics.py new file mode 100644 index 0000000..a1a1ff6 --- /dev/null +++ b/embeddings/role_statistics.py @@ -0,0 +1,53 @@ +import argparse +from collections import Counter +import json +import logging + +from ast2vec.uast import UASTModel +from map_reduce import MapReduce + + +class RoleStat(MapReduce): + def calc(self, fname, output): + paths = self.read_paths(fname) + counter = Counter() + + @MapReduce.wrap_queue_in + def process_uast(self, filename): + counter = Counter() + uast_model = UASTModel().load(filename) + for uast in uast_model.uasts: + queue = [uast] + counter[len(uast.roles)] += 1 + while queue: + node = queue.pop() + counter[len(node.roles)] += 1 + queue.extend(node.children) + return counter + + @MapReduce.wrap_queue_out + def combine_stat(self, result): + nonlocal counter + counter.update(result) + + self.parallelize(paths, process_uast, combine_stat) + with open(output, "w") as fout: + json.dump(counter, fout) + self._log.info("Finished collecting statistics.") + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--log-level", default="INFO", choices=logging._nameToLevel, + help="Logging verbosity.") + parser.add_argument("input", help="Input file with UASTs.") + parser.add_argument("output", help="Path to store resulting statisics.") + parser.add_argument("--processes", type=int, default=4, help="Number of processes.") + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + + role_stat = RoleStat(args.log_level, args.processes) + role_stat.calc(args.input, args.output) From ad68542a06273e8193c101b7662f937891b0495d Mon Sep 17 00:00:00 2001 From: Timofei Semenov Date: Wed, 6 Sep 2017 01:42:58 +0300 Subject: [PATCH 04/17] optimize mean vectors calculations --- embeddings/role_model.py | 72 ++++++++++++++++++++++++++++------------ 1 file changed, 51 insertions(+), 21 deletions(-) diff --git a/embeddings/role_model.py b/embeddings/role_model.py index f5c614b..fa584db 100644 --- a/embeddings/role_model.py +++ b/embeddings/role_model.py @@ -85,13 +85,52 @@ def load_emb(emb_path): roles = {role: i for i, role in enumerate(roles)} return emb, roles - def _mean_vec(self, nodes): - tokens = [t for node in nodes for t in chain(node.token, - ["RoleId_%d" % role for role in node.roles]) if t in self.emb] + @staticmethod + def node_iterator(root): + queue = [(root, 0)] + n_nodes = 1 + while queue: + node, node_idx = queue.pop() + yield node, node_idx + for child in node.children: + queue.append((child, n_nodes)) + n_nodes += 1 + + def _mean_vec(self, node): + tokens = [t for t in chain(node.token, ["RoleId_%d" % role for role in node.roles]) + if t in self.emb] if not tokens: return None, 0 return np.mean([self.emb[t] for t in tokens], axis=0), len(tokens) + def _mean_vecs(self, root): + node_vecs = {0: self._mean_vec(root)} + child_vecs = {} + parent_vecs = {0: None} + n_nodes = 1 # incremented in accoradance with self.node_iterator + + for node, node_idx in self.node_iterator(root): + node_child_vecs = [] + node_child_ns = [] + + for child in node.children: + child_vec = self._mean_vec(child) + node_vecs[n_nodes] = child_vec + parent_vecs[n_nodes] = node_vecs[node_idx][0] + node_child_vecs.append(child_vec[0]) + node_child_ns.append(child_vec[1]) + n_nodes += 1 + + node_child_vecs = list(filter(lambda x: x is not None, node_child_vecs)) + node_child_ns = list(filter(lambda x: x != 0, node_child_ns)) + + if node_child_vecs: + child_vecs[node_idx] = np.average(node_child_vecs, axis=0, weights=node_child_ns) + else: + child_vecs[node_idx] = None + + return child_vecs, parent_vecs + @MapReduce.wrap_queue_in def _process_uast(self, filename): @@ -99,23 +138,14 @@ def _process_uast(self, filename): uast_model = UASTModel().load(filename) for uast in uast_model.uasts: - queue = [(uast, 0)] - node_vecs = [self._mean_vec([uast])] - n_nodes = 1 - - while queue: - node, node_idx = queue.pop() - for child in node.children: - child_vec = self._mean_vec([child]) - grandchild_vec = self._mean_vec(child.children) - # add child to dataset - if child_vec is not None and grandchild_vec is not None: - labels = np.zeros(len(self.roles), dtype=np.int8) - labels[[self.roles["RoleId_%d" % role] for role in child.roles]] = 1 - X.append(np.concatenate((grandchild_vec, node_vecs[node_idx]))) - y.append(labels) - queue.append((child, n_nodes)) - node_vecs.append(child_vec) - n_nodes += 1 + child_vecs, parent_vecs = self._mean_vecs(uast) + for node, node_idx in self.node_iterator(uast): + child_vec = child_vecs[node_idx] + parent_vec = parent_vecs[node_idx] + if child_vec is not None and parent_vec is not None: + labels = np.zeros(len(self.roles), dtype=np.int8) + labels[[self.roles["RoleId_%d" % role] for role in node.roles]] = 1 + X.append(np.concatenate((child_vec, parent_vec))) + y.append(labels) return X, y From ae24863770e59d3dd73cd2f4912de990cd9889c3 Mon Sep 17 00:00:00 2001 From: Timofei Semenov Date: Thu, 7 Sep 2017 00:44:14 +0300 Subject: [PATCH 05/17] collect suspicious uasts --- embeddings/role_statistics.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/embeddings/role_statistics.py b/embeddings/role_statistics.py index a1a1ff6..9ad84ba 100644 --- a/embeddings/role_statistics.py +++ b/embeddings/role_statistics.py @@ -8,9 +8,10 @@ class RoleStat(MapReduce): - def calc(self, fname, output): + def calc(self, fname, stat_output, susp_output): paths = self.read_paths(fname) - counter = Counter() + global_counter = Counter() + suspicious = [] @MapReduce.wrap_queue_in def process_uast(self, filename): @@ -23,16 +24,22 @@ def process_uast(self, filename): node = queue.pop() counter[len(node.roles)] += 1 queue.extend(node.children) - return counter + return counter, filename @MapReduce.wrap_queue_out def combine_stat(self, result): - nonlocal counter - counter.update(result) + nonlocal global_counter + counter, filename = result + global_counter.update(counter) + if 0 in counter: + suspicious.append((filename, sum(counter.values()), counter[0])) self.parallelize(paths, process_uast, combine_stat) - with open(output, "w") as fout: - json.dump(counter, fout) + with open(stat_output, "w") as fout: + json.dump(global_counter, fout) + with open(susp_output, "w") as fout: + for susp_entry in suspicious: + fout.write(", ".join(map(str, susp_entry)) + "\n") self._log.info("Finished collecting statistics.") @@ -41,7 +48,8 @@ def parse_args(): parser.add_argument("--log-level", default="INFO", choices=logging._nameToLevel, help="Logging verbosity.") parser.add_argument("input", help="Input file with UASTs.") - parser.add_argument("output", help="Path to store resulting statisics.") + parser.add_argument("--stat", help="Path to store resulting statisics.") + parser.add_argument("--susp", help="Path to store suspicious UASTs.") parser.add_argument("--processes", type=int, default=4, help="Number of processes.") return parser.parse_args() @@ -50,4 +58,4 @@ def parse_args(): args = parse_args() role_stat = RoleStat(args.log_level, args.processes) - role_stat.calc(args.input, args.output) + role_stat.calc(args.input, args.stat, args.susp) From c2b7649fd763944b4122b0b7d5b1f6171e705309 Mon Sep 17 00:00:00 2001 From: Timofei Semenov Date: Mon, 11 Sep 2017 01:34:48 +0300 Subject: [PATCH 06/17] modify node2vec to save proximity matrices --- embeddings/node2vec.py | 178 +++++++++++++++++++---------------------- 1 file changed, 84 insertions(+), 94 deletions(-) diff --git a/embeddings/node2vec.py b/embeddings/node2vec.py index 49ec265..0cf5605 100644 --- a/embeddings/node2vec.py +++ b/embeddings/node2vec.py @@ -1,122 +1,112 @@ import argparse -from collections import Counter, deque -from functools import partial -from itertools import chain, islice, product, tee +from collections import defaultdict, deque +from itertools import combinations, islice, product, tee import logging +import multiprocessing import os -from pathlib import Path import time -from gensim.models import Word2Vec -from modelforge.progress_bar import progress_bar +import numpy +from scipy.sparse import coo_matrix +from ast2vec.coocc import Cooccurrences +from ast2vec.pickleable_logger import PickleableLogger from ast2vec.uast import UASTModel -from build_vocab import Vocab -from map_reduce import MapReduce from random_walk import Graph -class Node2Vec(MapReduce): +class Node2Vec(PickleableLogger): MAX_VOCAB_WORDS = 1000000 - def __init__(self, log_level, dimensions, num_processes, vocab_path, window, graph): + def __init__(self, log_level, num_processes, vocab_path, window, graph): super(Node2Vec, self).__init__(log_level=log_level, num_processes=num_processes) self.graph = graph - self.word2vec = Word2Vec(size=dimensions, window=window, workers=8) - self.word2vec.build_vocab(Vocab.read_vocab(vocab_path)[:self.MAX_VOCAB_WORDS]) + self.num_processes = num_processes + self.vocab = set(self.read_vocab(vocab_path)[:Node2Vec.MAX_VOCAB_WORDS]) + self.window = window - def train(self, fname, output): - # print("\n\n----- KEK -----\n\n") + def process(self, fname, output): self._log.info("Scanning %s", fname) - files = [line.strip() for line in open(fname).readlines()] - self._log.info("Found %d files", len(files)) - if not files: - return 0 - - self._log.info("Train model.") - self._train(files) - self._log.info("Finished training.") - - self._log.info("Saving model.") - self.word2vec.wv.save_word2vec_format(output) - - def _train(self, files): - @MapReduce.wrap_queue_in - def process_uast(self, filename): - uast = UASTModel().load(filename) - # print("\n\n----- LOL -----\n\n", filename) - return self.graph.simulate_walks(uast) - - def train_walks(self, n_tasks, queue_out): - failures = 0 - - def consume(iterator, n): - """Advance the iterator n-steps ahead. If n is none, consume entirely.""" - # Use functions that consume iterators at C speed. - if n is None: - # feed the entire iterator into a zero-length deque - deque(iterator, maxlen=0) - else: - # advance to the empty slice starting at position n - next(islice(iterator, n, n), None) - - def window(iterable, n=2): - """s -> (s0, ...,s(n-1)), (s1, ...,sn), (s2, ..., s(n+1)), ...""" - iters = tee(iterable, n) - # Could use enumerate(islice(iters, 1, None), 1) to avoid consume(it, 0), but - # that's slower for larger window sizes, while saving only small fixed "noop" cost - for i, it in enumerate(iters): - consume(it, i) - return zip(*iters) - - def batch_stream(): - nonlocal failures - i = 0 - start = time.time() - - for _ in progress_bar(range(n_tasks), self._log, expected_size=n_tasks): - result = queue_out.get() - if result: - for walk in result: - walk = [list(map(str, node.tokens)) for node in walk] - for walk_window in window(walk, n=self.word2vec.window): - yield list(product(*walk_window)) - i += 1 - if i % 10000 == 0: - print(i, time.time() - start) - else: - failures += 1 - - self.word2vec.train( - batch_stream(), - total_examples=1000000, - epochs=self.word2vec.iter) - return failures - - # walks = [] - - # @MapReduce.wrap_queue_out - # def train_walks(res_walks): - # nonlocal walks - # res_walks = list(chain.from_iterable( - # product(*(map(str, node.tokens) for node in walk)) for walk in res_walks)) - # walks.extend(res_walks) - - self.parallelize(files, process_uast, train_walks) - # self.word2vec.train(walks, total_examples=len(walks), epochs=self.word2vec.iter) + paths = self.read_paths(fname) + self._log.info("Found %d files", len(paths)) + + self._log.info("Processing files.") + paths = self._preprocess_paths(paths, output) + start_time = time.time() + with multiprocessing.Pool(self.num_processes) as pool: + pool.starmap(self.process_uast, paths) + self._log.info("Finished processing in %.2f.", time.time() - start_time) + + def process_uast(self, filename, output): + uast = UASTModel().load(filename) + dok_matrix = defaultdict(int) + + for walk in self.graph.simulate_walks(uast): + walk = [[t for t in map(str, node.tokens) if t in self.vocab] for node in walk] + for walk_window_raw in window(walk, n=self.window): + for walk_window in product(*walk_window_raw): + for word1, word2 in combinations(walk_window, 2): + dok_matrix[(word1, word2)] += 1 + dok_matrix[(word2, word1)] += 1 + + del uast + + mat = coo_matrix((Node2Vec.MAX_VOCAB_WORDS, Node2Vec.MAX_VOCAB_WORDS), dtype=numpy.float32) + mat.row = row = numpy.empty(len(dok_matrix), dtype=numpy.int32) + mat.col = col = numpy.empty(len(dok_matrix), dtype=numpy.int32) + mat.data = data = numpy.empty(len(dok_matrix), dtype=numpy.float32) + for i, (coord, val) in enumerate(sorted(dok_matrix.items())): + row[i], col[i] = coord + data[i] = val + + del dok_matrix + + coocc = Cooccurrences() + coocc.construct(tokens=self.vocab, matrix=mat) + coocc.save(output) def _get_log_name(self): return "Node2Vec" + def _preprocess_paths(self, paths, output): + preprocessed_paths = [] + for p in paths: + name = os.path.basename(p) + if name.startswith("uast_"): + name = name[len("uast_"):] + out = os.path.join(output, name[0], name) + preprocessed_paths.append((p, out)) + return preprocessed_paths + + +def consume(iterator, n): + """Advance the iterator n-steps ahead. If n is none, consume entirely.""" + # Use functions that consume iterators at C speed. + if n is None: + # feed the entire iterator into a zero-length deque + deque(iterator, maxlen=0) + else: + # advance to the empty slice starting at position n + next(islice(iterator, n, n), None) + + +def window(iterable, n=2): + """s -> (s0, ...,s(n-1)), (s1, ...,sn), (s2, ..., s(n+1)), ...""" + iters = tee(iterable, n) + # Could use enumerate(islice(iters, 1, None), 1) to avoid consume(it, 0), but + # that's slower for larger window sizes, while saving only small fixed "noop" cost + for i, it in enumerate(iters): + consume(it, i) + return zip(*iters) + def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("--log-level", default="INFO", choices=logging._nameToLevel, help="Logging verbosity.") parser.add_argument("input", help="Input file with UASTs.") - parser.add_argument("output", help="Path to store the result model.") - parser.add_argument("--dimensions", default=300, help="Dimensionality of embeddings.") - parser.add_argument("--processes", type=int, default=1, help="Number of processes.") + parser.add_argument("output", help="Path to store the resulting matrices.") + parser.add_argument("--processes", type=int, default=4, help="Number of processes.") parser.add_argument("--vocabulary", default="vocab.txt", help="File with vocabulary.") parser.add_argument( "-n", "--num-walks", type=int, default=1, help="Number of random walks from each node.") @@ -138,4 +128,4 @@ def parse_args(): graph = Graph(args.log_level, args.num_walks, args.walk_length, args.p, args.q) node2vec = Node2Vec(args.log_level, args.dimensions, args.processes, args.vocabulary, args.window, graph) - node2vec.train(args.input, args.output) + node2vec.process(args.input, args.output) From 46eb9875cccdacca2b92c6b9b0648c2879f13f1d Mon Sep 17 00:00:00 2001 From: Timofei Semenov Date: Mon, 11 Sep 2017 01:48:12 +0300 Subject: [PATCH 07/17] Move common functions to utils.py --- embeddings/build_vocab.py | 11 --------- embeddings/glove.py | 1 - embeddings/map_reduce.py | 19 ---------------- embeddings/node2vec.py | 32 +++++--------------------- embeddings/role_model.py | 9 ++++---- embeddings/role_statistics.py | 3 ++- embeddings/utils.py | 42 +++++++++++++++++++++++++++++++++++ 7 files changed, 55 insertions(+), 62 deletions(-) create mode 100644 embeddings/utils.py diff --git a/embeddings/build_vocab.py b/embeddings/build_vocab.py index ac3e880..79be37e 100644 --- a/embeddings/build_vocab.py +++ b/embeddings/build_vocab.py @@ -40,17 +40,6 @@ def combine_vocab(result): self.save_vocab(self.vocab_path, vocab) return vocab - @staticmethod - def read_vocab(vocab_path): - with open(vocab_path) as fin: - words = [line.split(" ")[0] for line in fin] - return words - - @staticmethod - def save_vocab(vocab_path, vocab): - with open(vocab_path, "w") as fout: - fout.write("\n".join(map(lambda x: "%s %d" % x, vocab.most_common()))) - def _get_log_name(self): return "Vocab" diff --git a/embeddings/glove.py b/embeddings/glove.py index fdcdef7..d961de5 100644 --- a/embeddings/glove.py +++ b/embeddings/glove.py @@ -1,7 +1,6 @@ import argparse from collections import Counter import logging -import os from pathlib import Path import struct diff --git a/embeddings/map_reduce.py b/embeddings/map_reduce.py index 5ae70ec..6f70c50 100644 --- a/embeddings/map_reduce.py +++ b/embeddings/map_reduce.py @@ -27,25 +27,6 @@ def parallelize(self, tasks, process_queue_in, process_queue_out): self._log.info("Finished, %d failed tasks", failures) return len(tasks) - failures - @staticmethod - def read_vocab(vocab_path): - with open(vocab_path) as fin: - words = [line.split(" ")[0] for line in fin] - return words - - @staticmethod - def read_paths(fname): - paths = [line.strip() for line in open(fname).readlines()] - if not paths: - raise ValueError("Make sure the file is not empty!") - return paths - - @staticmethod - def save_vocab(vocab_path, vocab): - with open(vocab_path, "w") as fout: - fout.write("\n".join( - map(lambda x: "%s %d".join(x), vocab.most_common()))) - @staticmethod def wrap_queue_in(func): def wrapper(self, queue_in, queue_out): diff --git a/embeddings/node2vec.py b/embeddings/node2vec.py index 0cf5605..22e492d 100644 --- a/embeddings/node2vec.py +++ b/embeddings/node2vec.py @@ -1,6 +1,6 @@ import argparse -from collections import defaultdict, deque -from itertools import combinations, islice, product, tee +from collections import defaultdict +from itertools import combinations, product import logging import multiprocessing import os @@ -13,16 +13,17 @@ from ast2vec.pickleable_logger import PickleableLogger from ast2vec.uast import UASTModel from random_walk import Graph +from utils import read_vocab, window class Node2Vec(PickleableLogger): MAX_VOCAB_WORDS = 1000000 def __init__(self, log_level, num_processes, vocab_path, window, graph): - super(Node2Vec, self).__init__(log_level=log_level, num_processes=num_processes) + super(Node2Vec, self).__init__(log_level=log_level) self.graph = graph self.num_processes = num_processes - self.vocab = set(self.read_vocab(vocab_path)[:Node2Vec.MAX_VOCAB_WORDS]) + self.vocab = set(read_vocab(vocab_path)[:Node2Vec.MAX_VOCAB_WORDS]) self.window = window def process(self, fname, output): @@ -79,27 +80,6 @@ def _preprocess_paths(self, paths, output): return preprocessed_paths -def consume(iterator, n): - """Advance the iterator n-steps ahead. If n is none, consume entirely.""" - # Use functions that consume iterators at C speed. - if n is None: - # feed the entire iterator into a zero-length deque - deque(iterator, maxlen=0) - else: - # advance to the empty slice starting at position n - next(islice(iterator, n, n), None) - - -def window(iterable, n=2): - """s -> (s0, ...,s(n-1)), (s1, ...,sn), (s2, ..., s(n+1)), ...""" - iters = tee(iterable, n) - # Could use enumerate(islice(iters, 1, None), 1) to avoid consume(it, 0), but - # that's slower for larger window sizes, while saving only small fixed "noop" cost - for i, it in enumerate(iters): - consume(it, i) - return zip(*iters) - - def parse_args(): parser = argparse.ArgumentParser() parser.add_argument("--log-level", default="INFO", choices=logging._nameToLevel, @@ -126,6 +106,6 @@ def parse_args(): args = parse_args() graph = Graph(args.log_level, args.num_walks, args.walk_length, args.p, args.q) - node2vec = Node2Vec(args.log_level, args.dimensions, args.processes, + node2vec = Node2Vec(args.log_level, args.processes, args.vocabulary, args.window, graph) node2vec.process(args.input, args.output) diff --git a/embeddings/role_model.py b/embeddings/role_model.py index fa584db..a5c6f32 100644 --- a/embeddings/role_model.py +++ b/embeddings/role_model.py @@ -10,6 +10,7 @@ from ast2vec.token_parser import TokenParser from ast2vec.uast import UASTModel from map_reduce import MapReduce +from utils import read_paths Node = namedtuple("Node", ["id", "parent", "children", "roles", "tokens"]) @@ -33,7 +34,7 @@ def load(self, model_path): self.model = joblib.load(model_path) def train(self, fname): - files = self.read_paths(fname) + paths = read_paths(fname) self._log.info("Train model.") self.model = MLPClassifier(random_state=1, verbose=True) @@ -49,11 +50,11 @@ def train_uast(self, result): self.model.partial_fit(X, y) print(self.model.loss_, time.time() - start, counter) - self.parallelize(files, _process_uast, train_uast) + self.parallelize(paths, _process_uast, train_uast) self._log.info("Finished training.") def test(self, fname): - files = self.read_paths(fname) + paths = read_paths(fname) self._log.info("Test model.") y_real, y_pred = [], [] @@ -65,7 +66,7 @@ def test_uast(self, result): y_real.extend(y) y_pred.extend(self.model.predict_proba(X)) - self.parallelize(files, _process_uast, test_uast) + self.parallelize(paths, _process_uast, test_uast) np.save("y_real.npy", y_real) np.save("y_pred.npy", y_pred) self._log.info("Finished testing.") diff --git a/embeddings/role_statistics.py b/embeddings/role_statistics.py index 9ad84ba..a45c5ab 100644 --- a/embeddings/role_statistics.py +++ b/embeddings/role_statistics.py @@ -5,11 +5,12 @@ from ast2vec.uast import UASTModel from map_reduce import MapReduce +from utils import read_paths class RoleStat(MapReduce): def calc(self, fname, stat_output, susp_output): - paths = self.read_paths(fname) + paths = read_paths(fname) global_counter = Counter() suspicious = [] diff --git a/embeddings/utils.py b/embeddings/utils.py new file mode 100644 index 0000000..649c31d --- /dev/null +++ b/embeddings/utils.py @@ -0,0 +1,42 @@ +from collections import deque +from itertools import islice, tee + + +def consume(iterator, n): + """Advance the iterator n-steps ahead. If n is none, consume entirely.""" + # Use functions that consume iterators at C speed. + if n is None: + # feed the entire iterator into a zero-length deque + deque(iterator, maxlen=0) + else: + # advance to the empty slice starting at position n + next(islice(iterator, n, n), None) + + +def window(iterable, n=2): + """s -> (s0, ...,s(n-1)), (s1, ...,sn), (s2, ..., s(n+1)), ...""" + iters = tee(iterable, n) + # Could use enumerate(islice(iters, 1, None), 1) to avoid consume(it, 0), but + # that's slower for larger window sizes, while saving only small fixed "noop" cost + for i, it in enumerate(iters): + consume(it, i) + return zip(*iters) + + +def read_paths(fname): + paths = [line.strip() for line in open(fname).readlines()] + if not paths: + raise ValueError("Make sure the file is not empty!") + return paths + + +def read_vocab(vocab_path): + with open(vocab_path) as fin: + words = [line.split(" ")[0] for line in fin] + return words + + +def save_vocab(vocab_path, vocab): + with open(vocab_path, "w") as fout: + fout.write("\n".join( + map(lambda x: "%s %d".join(x), vocab.most_common()))) From 0eac04de7e2ae5830cd89f26f85209b5378632cc Mon Sep 17 00:00:00 2001 From: Timofei Semenov Date: Mon, 11 Sep 2017 17:51:26 +0300 Subject: [PATCH 08/17] Speed up node2vec --- embeddings/node2vec.py | 6 ++- embeddings/random_walk.py | 105 ++++++++++++++++---------------------- embeddings/role_model.py | 48 +++++++++++------ embeddings/role_runner.py | 31 ----------- embeddings/utils.py | 11 ++++ 5 files changed, 92 insertions(+), 109 deletions(-) delete mode 100644 embeddings/role_runner.py diff --git a/embeddings/node2vec.py b/embeddings/node2vec.py index 22e492d..1e232e8 100644 --- a/embeddings/node2vec.py +++ b/embeddings/node2vec.py @@ -13,7 +13,7 @@ from ast2vec.pickleable_logger import PickleableLogger from ast2vec.uast import UASTModel from random_walk import Graph -from utils import read_vocab, window +from utils import read_paths, read_vocab, window class Node2Vec(PickleableLogger): @@ -28,7 +28,7 @@ def __init__(self, log_level, num_processes, vocab_path, window, graph): def process(self, fname, output): self._log.info("Scanning %s", fname) - paths = self.read_paths(fname) + paths = read_paths(fname) self._log.info("Found %d files", len(paths)) self._log.info("Processing files.") @@ -39,6 +39,7 @@ def process(self, fname, output): self._log.info("Finished processing in %.2f.", time.time() - start_time) def process_uast(self, filename, output): + self._log.info("Processing %s", filename) uast = UASTModel().load(filename) dok_matrix = defaultdict(int) @@ -65,6 +66,7 @@ def process_uast(self, filename, output): coocc = Cooccurrences() coocc.construct(tokens=self.vocab, matrix=mat) coocc.save(output) + self._log.info("Finished processing %s", filename) def _get_log_name(self): return "Node2Vec" diff --git a/embeddings/random_walk.py b/embeddings/random_walk.py index 330d0db..4be8720 100644 --- a/embeddings/random_walk.py +++ b/embeddings/random_walk.py @@ -5,6 +5,7 @@ from ast2vec.pickleable_logger import PickleableLogger from ast2vec.token_parser import TokenParser +from utils import node_iterator GraphNode = namedtuple("GraphNode", ["id", "neighbors", "tokens"]) @@ -25,13 +26,20 @@ def node2vec_walk(self, start_node, edges, nodes): Simulate a random walk starting from start node. """ walk = [None] * self.walk_length - walk[0] = start_node - walk[1] = nodes[start_node.neighbors[int(np.random.rand() * len(start_node.neighbors))]] + prev_node = walk[0] = start_node + cur_node = walk[1] = nodes[random.choice(start_node.neighbors)] for i in range(2, self.walk_length): - cur_node = walk[i - 1] - prev_node = walk[i - 2] - walk[i] = nodes[cur_node.neighbors[alias_draw(*edges[(prev_node.id, cur_node.id)])]] + J, q = edges[(prev_node.id, cur_node.id)] + kk = int(np.random.rand() * len(J)) + + if np.random.rand() < q[kk]: + ind = kk + else: + ind = J[kk] + + prev_node = cur_node + cur_node = walk[i] = nodes[cur_node.neighbors[ind]] return walk @@ -39,7 +47,7 @@ def simulate_walks(self, uasts): """ Repeatedly simulate random walks from each node. """ - all_walks = [] + walks = [] for uast, filename in zip(uasts.uasts, uasts.filenames): nodes, edges = self._preprocess_uast(uast) @@ -51,32 +59,23 @@ def simulate_walks(self, uasts): continue self._preprocess_transition_probs(nodes, edges) - walks = [None] * (n_nodes * self.num_walks) self._log.info("Walk iteration:") for walk_iter in range(self.num_walks): self._log.info("%d/%d" % (walk_iter + 1, self.num_walks)) - for i, node in enumerate(nodes): - walks[n_nodes * walk_iter + i] = self.node2vec_walk(node, edges, nodes) - - all_walks.extend(walks) + iter_nodes = set(node.id for node in nodes) - random.shuffle(all_walks) - return all_walks + while iter_nodes: + node = random.sample(iter_nodes, 1) + walk = self.node2vec_walk(node, edges, nodes) + walks.append(walk) - def _get_alias_edge(self, src_id, dst_id, edges, nodes): - """ - Get the alias edge setup lists for a given edge. - """ - unnormalized_probs = [ - self.p if dst_nbr == src_id else - 1 if (dst_nbr, src_id) in edges else - self.q for dst_nbr in nodes[dst_id].neighbors - ] - norm_const = sum(unnormalized_probs) - normalized_probs = [u_prob / norm_const for u_prob in unnormalized_probs] + for walk_node in walk: + if walk_node.id in iter_nodes: + iter_nodes.remove(walk_node.id) - return alias_setup(normalized_probs) + random.shuffle(walks) + return walks def _get_log_name(self): return "Graph" @@ -91,7 +90,12 @@ def _preprocess_transition_probs(self, nodes, edges): """ self._log.info("Preprocessing transition probabilities.") for edge in edges: - edges[edge] = self._get_alias_edge(edge[0], edge[1], edges, nodes) + unnormalized_probs = np.array([ + self.p if dst_nbr == edge[0] else + 1 if (dst_nbr, edge[0]) in edges else + self.q for dst_nbr in nodes[edge[1]].neighbors + ]) + edges[edge] = alias_setup(unnormalized_probs / unnormalized_probs.sum()) def _preprocess_uast(self, root): """ @@ -103,18 +107,15 @@ def create_node(node, id): self._log.info("Preprocessing UAST nodes.") root_node = create_node(root, 0) edges = {} - queue = [(root, 0)] nodes = [root_node] n_nodes = 1 - while queue: - node, node_idx = queue.pop() + for node, node_idx in node_iterator(root): for child in node.children: nodes.append(create_node(child, n_nodes)) nodes[n_nodes].neighbors.append(node_idx) nodes[node_idx].neighbors.append(n_nodes) edges[(node_idx, n_nodes)] = edges[(n_nodes, node_idx)] = None - queue.append((child, n_nodes)) n_nodes += 1 return nodes, edges @@ -127,45 +128,27 @@ def alias_setup(probs): for details """ K = len(probs) - q = np.zeros(K) + q = probs * K J = np.zeros(K, dtype=np.int) # Sort the data into the outcomes with probabilities that are larger and smaller than 1/K. - smaller = [] - larger = [] - for kk, prob in enumerate(probs): - q[kk] = K * prob - if q[kk] < 1.0: - smaller.append(kk) - else: - larger.append(kk) + smaller = np.where(q < 1.0)[0] + larger = np.where(q >= 1.0)[0] + s_idx = len(smaller) - 1 + l_idx = len(larger) - 1 # Loop through and create little binary mixtures that appropriately allocate the larger # outcomes over the overall uniform mixture. - while len(smaller) > 0 and len(larger) > 0: - small = smaller.pop() - large = larger.pop() - + while s_idx >= 0 and l_idx >= 0: + small = smaller[s_idx] + large = larger[l_idx] J[small] = large - q[large] = q[large] + q[small] - 1.0 + q[large] += q[small] - 1.0 + if q[large] < 1.0: - smaller.append(large) + smaller[s_idx] = large + l_idx -= 1 else: - larger.append(large) + s_idx -= 1 return J, q - - -def alias_draw(J, q): - """ - Draw sample from a non-uniform discrete distribution using alias sampling. - """ - # Draw from the overall uniform mixture. - kk = int(np.random.rand() * len(J)) - - # Draw from the binary mixture, either keeping the small one, or choosing the associated - # larger one. - if np.random.rand() < q[kk]: - return kk - else: - return J[kk] diff --git a/embeddings/role_model.py b/embeddings/role_model.py index a5c6f32..49b49db 100644 --- a/embeddings/role_model.py +++ b/embeddings/role_model.py @@ -1,5 +1,7 @@ +import argparse from collections import namedtuple from itertools import chain +import logging import os import time @@ -10,7 +12,7 @@ from ast2vec.token_parser import TokenParser from ast2vec.uast import UASTModel from map_reduce import MapReduce -from utils import read_paths +from utils import node_iterator, read_paths Node = namedtuple("Node", ["id", "parent", "children", "roles", "tokens"]) @@ -86,17 +88,6 @@ def load_emb(emb_path): roles = {role: i for i, role in enumerate(roles)} return emb, roles - @staticmethod - def node_iterator(root): - queue = [(root, 0)] - n_nodes = 1 - while queue: - node, node_idx = queue.pop() - yield node, node_idx - for child in node.children: - queue.append((child, n_nodes)) - n_nodes += 1 - def _mean_vec(self, node): tokens = [t for t in chain(node.token, ["RoleId_%d" % role for role in node.roles]) if t in self.emb] @@ -108,9 +99,9 @@ def _mean_vecs(self, root): node_vecs = {0: self._mean_vec(root)} child_vecs = {} parent_vecs = {0: None} - n_nodes = 1 # incremented in accoradance with self.node_iterator + n_nodes = 1 # incremented in accoradance with node_iterator - for node, node_idx in self.node_iterator(root): + for node, node_idx in node_iterator(root): node_child_vecs = [] node_child_ns = [] @@ -140,7 +131,7 @@ def _process_uast(self, filename): for uast in uast_model.uasts: child_vecs, parent_vecs = self._mean_vecs(uast) - for node, node_idx in self.node_iterator(uast): + for node, node_idx in node_iterator(uast): child_vec = child_vecs[node_idx] parent_vec = parent_vecs[node_idx] if child_vec is not None and parent_vec is not None: @@ -150,3 +141,30 @@ def _process_uast(self, filename): y.append(labels) return X, y + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--log-level", default="INFO", choices=logging._nameToLevel, + help="Logging verbosity.") + parser.add_argument("--train", help="Input file with UASTs for training.") + parser.add_argument("--test", help="Input file with UASTs for testing.") + parser.add_argument("--model", required=True, help="Path to store trained model.") + parser.add_argument("--processes", type=int, default=2, help="Number of processes.") + parser.add_argument("--embeddings", help="File with roles and tokens embeddings.") + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + + rm = RoleModel(args.log_level, args.processes, args.embeddings) + + if args.train: + rm.train(args.train) + rm.save(args.model) + else: + rm.load(args.model) + + if args.test: + rm.test(args.test) diff --git a/embeddings/role_runner.py b/embeddings/role_runner.py deleted file mode 100644 index c36a14f..0000000 --- a/embeddings/role_runner.py +++ /dev/null @@ -1,31 +0,0 @@ -import argparse -import logging - -from role_model import RoleModel - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--log-level", default="INFO", choices=logging._nameToLevel, - help="Logging verbosity.") - parser.add_argument("--train", help="Input file with UASTs for training.") - parser.add_argument("--test", help="Input file with UASTs for testing.") - parser.add_argument("--model", required=True, help="Path to store trained model.") - parser.add_argument("--processes", type=int, default=2, help="Number of processes.") - parser.add_argument("--embeddings", help="File with roles and tokens embeddings.") - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - - rm = RoleModel(args.log_level, args.processes, args.embeddings) - - if args.train: - rm.train(args.train) - rm.save(args.model) - else: - rm.load(args.model) - - if args.test: - rm.test(args.test) diff --git a/embeddings/utils.py b/embeddings/utils.py index 649c31d..257585d 100644 --- a/embeddings/utils.py +++ b/embeddings/utils.py @@ -2,6 +2,17 @@ from itertools import islice, tee +def node_iterator(root): + queue = [(root, 0)] + n_nodes = 1 + while queue: + node, node_idx = queue.pop() + yield node, node_idx + for child in node.children: + queue.append((child, n_nodes)) + n_nodes += 1 + + def consume(iterator, n): """Advance the iterator n-steps ahead. If n is none, consume entirely.""" # Use functions that consume iterators at C speed. From 93b5efca934186af4539b10576ba5990b1a1130c Mon Sep 17 00:00:00 2001 From: Timofei Semenov Date: Tue, 12 Sep 2017 01:02:00 +0300 Subject: [PATCH 09/17] Fix algorithmic mistake --- embeddings/node2vec.py | 10 +++++----- embeddings/random_walk.py | 15 ++++----------- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/embeddings/node2vec.py b/embeddings/node2vec.py index 1e232e8..631b4ae 100644 --- a/embeddings/node2vec.py +++ b/embeddings/node2vec.py @@ -1,6 +1,6 @@ import argparse from collections import defaultdict -from itertools import combinations, product +from itertools import product import logging import multiprocessing import os @@ -13,7 +13,7 @@ from ast2vec.pickleable_logger import PickleableLogger from ast2vec.uast import UASTModel from random_walk import Graph -from utils import read_paths, read_vocab, window +from utils import read_paths, read_vocab class Node2Vec(PickleableLogger): @@ -45,9 +45,9 @@ def process_uast(self, filename, output): for walk in self.graph.simulate_walks(uast): walk = [[t for t in map(str, node.tokens) if t in self.vocab] for node in walk] - for walk_window_raw in window(walk, n=self.window): - for walk_window in product(*walk_window_raw): - for word1, word2 in combinations(walk_window, 2): + for i, cur_tokens in enumerate(walk[:-1]): + for next_tokens in walk[(i + 1):(i + self.window)]: + for word1, word2 in product(cur_tokens, next_tokens): dok_matrix[(word1, word2)] += 1 dok_matrix[(word2, word1)] += 1 diff --git a/embeddings/random_walk.py b/embeddings/random_walk.py index 4be8720..36eb315 100644 --- a/embeddings/random_walk.py +++ b/embeddings/random_walk.py @@ -31,7 +31,7 @@ def node2vec_walk(self, start_node, edges, nodes): for i in range(2, self.walk_length): J, q = edges[(prev_node.id, cur_node.id)] - kk = int(np.random.rand() * len(J)) + kk = np.random.randint(len(J)) if np.random.rand() < q[kk]: ind = kk @@ -47,15 +47,12 @@ def simulate_walks(self, uasts): """ Repeatedly simulate random walks from each node. """ - walks = [] - for uast, filename in zip(uasts.uasts, uasts.filenames): nodes, edges = self._preprocess_uast(uast) n_nodes = len(nodes) if n_nodes == 1: - self._log.info( - "Skipping UAST for %s: has a single node." % filename) + self._log.info("Skipping UAST for %s: has a single node." % filename) continue self._preprocess_transition_probs(nodes, edges) @@ -68,15 +65,12 @@ def simulate_walks(self, uasts): while iter_nodes: node = random.sample(iter_nodes, 1) walk = self.node2vec_walk(node, edges, nodes) - walks.append(walk) + yield walk for walk_node in walk: if walk_node.id in iter_nodes: iter_nodes.remove(walk_node.id) - random.shuffle(walks) - return walks - def _get_log_name(self): return "Graph" @@ -105,9 +99,8 @@ def create_node(node, id): return GraphNode(id=id, neighbors=[], tokens=self._get_tokens(node)) self._log.info("Preprocessing UAST nodes.") - root_node = create_node(root, 0) edges = {} - nodes = [root_node] + nodes = [create_node(root, 0)] n_nodes = 1 for node, node_idx in node_iterator(root): From 4db1a270f2d6d8f1756b8f3be0ea7fe67d7c0011 Mon Sep 17 00:00:00 2001 From: Timofei Semenov Date: Wed, 13 Sep 2017 03:23:23 +0300 Subject: [PATCH 10/17] Switch node2vec to MapReduce --- embeddings/node2vec.py | 88 ++++++++++++++++++++++----------------- embeddings/random_walk.py | 2 +- embeddings/role_model.py | 2 +- 3 files changed, 51 insertions(+), 41 deletions(-) diff --git a/embeddings/node2vec.py b/embeddings/node2vec.py index 631b4ae..5f9d9d9 100644 --- a/embeddings/node2vec.py +++ b/embeddings/node2vec.py @@ -2,7 +2,6 @@ from collections import defaultdict from itertools import product import logging -import multiprocessing import os import time @@ -10,20 +9,21 @@ from scipy.sparse import coo_matrix from ast2vec.coocc import Cooccurrences -from ast2vec.pickleable_logger import PickleableLogger from ast2vec.uast import UASTModel +from map_reduce import MapReduce from random_walk import Graph from utils import read_paths, read_vocab -class Node2Vec(PickleableLogger): +class Node2Vec(MapReduce): MAX_VOCAB_WORDS = 1000000 def __init__(self, log_level, num_processes, vocab_path, window, graph): - super(Node2Vec, self).__init__(log_level=log_level) + super(Node2Vec, self).__init__(log_level=log_level, num_processes=num_processes) self.graph = graph self.num_processes = num_processes - self.vocab = set(read_vocab(vocab_path)[:Node2Vec.MAX_VOCAB_WORDS]) + self.vocab = {w: i for i, w in enumerate( + read_vocab(vocab_path)[:Node2Vec.MAX_VOCAB_WORDS])} self.window = window def process(self, fname, output): @@ -31,43 +31,51 @@ def process(self, fname, output): paths = read_paths(fname) self._log.info("Found %d files", len(paths)) + @MapReduce.wrap_queue_in + def process_uast(self, obj): + filename, output = obj + self._log.info("Processing %s", filename) + uast = UASTModel().load(filename) + dok_matrix = defaultdict(int) + + for walk in self.graph.simulate_walks(uast): + walk = [[self.vocab[t] for t in map(str, node.tokens) + if t in self.vocab] for node in walk] + for i, cur_tokens in enumerate(walk[:-1]): + for next_tokens in walk[(i + 1):(i + self.window)]: + for word1, word2 in product(cur_tokens, next_tokens): + dok_matrix[(word1, word2)] += 1 + dok_matrix[(word2, word1)] += 1 + + del uast + + mat = coo_matrix( + (Node2Vec.MAX_VOCAB_WORDS, Node2Vec.MAX_VOCAB_WORDS), dtype=numpy.float32) + mat.row = row = numpy.empty(len(dok_matrix), dtype=numpy.int32) + mat.col = col = numpy.empty(len(dok_matrix), dtype=numpy.int32) + mat.data = data = numpy.empty(len(dok_matrix), dtype=numpy.float32) + for i, (coord, val) in enumerate(sorted(dok_matrix.items())): + row[i], col[i] = coord + data[i] = val + + del dok_matrix + + coocc = Cooccurrences() + coocc.construct(tokens=sorted(self.vocab, key=self.vocab.get), matrix=mat) + coocc.save(output) + self._log.info("Finished processing %s", filename) + return filename + + @MapReduce.wrap_queue_out + def process_output(self, result): + pass + self._log.info("Processing files.") paths = self._preprocess_paths(paths, output) start_time = time.time() - with multiprocessing.Pool(self.num_processes) as pool: - pool.starmap(self.process_uast, paths) + self.parallelize(paths, process_uast, process_output) self._log.info("Finished processing in %.2f.", time.time() - start_time) - def process_uast(self, filename, output): - self._log.info("Processing %s", filename) - uast = UASTModel().load(filename) - dok_matrix = defaultdict(int) - - for walk in self.graph.simulate_walks(uast): - walk = [[t for t in map(str, node.tokens) if t in self.vocab] for node in walk] - for i, cur_tokens in enumerate(walk[:-1]): - for next_tokens in walk[(i + 1):(i + self.window)]: - for word1, word2 in product(cur_tokens, next_tokens): - dok_matrix[(word1, word2)] += 1 - dok_matrix[(word2, word1)] += 1 - - del uast - - mat = coo_matrix((Node2Vec.MAX_VOCAB_WORDS, Node2Vec.MAX_VOCAB_WORDS), dtype=numpy.float32) - mat.row = row = numpy.empty(len(dok_matrix), dtype=numpy.int32) - mat.col = col = numpy.empty(len(dok_matrix), dtype=numpy.int32) - mat.data = data = numpy.empty(len(dok_matrix), dtype=numpy.float32) - for i, (coord, val) in enumerate(sorted(dok_matrix.items())): - row[i], col[i] = coord - data[i] = val - - del dok_matrix - - coocc = Cooccurrences() - coocc.construct(tokens=self.vocab, matrix=mat) - coocc.save(output) - self._log.info("Finished processing %s", filename) - def _get_log_name(self): return "Node2Vec" @@ -77,8 +85,10 @@ def _preprocess_paths(self, paths, output): name = os.path.basename(p) if name.startswith("uast_"): name = name[len("uast_"):] - out = os.path.join(output, name[0], name) - preprocessed_paths.append((p, out)) + out_dir = os.path.join(output, name[0]) + os.makedirs(out_dir, exist_ok=True) + out_fname = os.path.join(out_dir, name) + preprocessed_paths.append((p, out_fname)) return preprocessed_paths diff --git a/embeddings/random_walk.py b/embeddings/random_walk.py index 36eb315..73e69bc 100644 --- a/embeddings/random_walk.py +++ b/embeddings/random_walk.py @@ -63,7 +63,7 @@ def simulate_walks(self, uasts): iter_nodes = set(node.id for node in nodes) while iter_nodes: - node = random.sample(iter_nodes, 1) + node = nodes[random.sample(iter_nodes, 1)[0]] walk = self.node2vec_walk(node, edges, nodes) yield walk diff --git a/embeddings/role_model.py b/embeddings/role_model.py index 49b49db..1d6ff4c 100644 --- a/embeddings/role_model.py +++ b/embeddings/role_model.py @@ -151,7 +151,7 @@ def parse_args(): parser.add_argument("--test", help="Input file with UASTs for testing.") parser.add_argument("--model", required=True, help="Path to store trained model.") parser.add_argument("--processes", type=int, default=2, help="Number of processes.") - parser.add_argument("--embeddings", help="File with roles and tokens embeddings.") + parser.add_argument("--embeddings", required=True, help="File with roles and tokens embeddings.") return parser.parse_args() From 7d7214d53cff15e155f603da87144d1bc95713ea Mon Sep 17 00:00:00 2001 From: Timofei Semenov Date: Mon, 25 Sep 2017 23:19:01 +0300 Subject: [PATCH 11/17] Refactor code --- .gitignore | 6 - embeddings/map_reduce.py | 63 ---------- embeddings/move_prox.py | 18 --- embeddings/utils.py | 53 -------- role2vec/__init__.py | 0 role2vec/__main__.py | 96 +++++++++++++++ {embeddings => role2vec}/glove.py | 63 ++++++---- role2vec/map_reduce.py | 111 +++++++++++++++++ {embeddings => role2vec}/node2vec.py | 89 +++++++------- {embeddings => role2vec}/random_walk.py | 55 +++++++-- role2vec/roles/base.py | 92 ++++++++++++++ .../role_model.py => role2vec/roles/mlp.py | 115 +++++++----------- .../role_statistics.py => role2vec/stats.py | 0 role2vec/utils.py | 83 +++++++++++++ .../build_vocab.py => role2vec/vocab.py | 39 ++++-- 15 files changed, 584 insertions(+), 299 deletions(-) delete mode 100644 embeddings/map_reduce.py delete mode 100644 embeddings/move_prox.py delete mode 100644 embeddings/utils.py create mode 100644 role2vec/__init__.py create mode 100644 role2vec/__main__.py rename {embeddings => role2vec}/glove.py (50%) create mode 100644 role2vec/map_reduce.py rename {embeddings => role2vec}/node2vec.py (57%) rename {embeddings => role2vec}/random_walk.py (68%) create mode 100644 role2vec/roles/base.py rename embeddings/role_model.py => role2vec/roles/mlp.py (55%) rename embeddings/role_statistics.py => role2vec/stats.py (100%) create mode 100644 role2vec/utils.py rename embeddings/build_vocab.py => role2vec/vocab.py (63%) diff --git a/.gitignore b/.gitignore index 5c8fe09..491c84c 100644 --- a/.gitignore +++ b/.gitignore @@ -18,9 +18,3 @@ # jupyter notebooks *.ipynb_checkpoints/ -# virtualenv -role2vec/* - -# python libraries -ast2vec/* -vecino/* diff --git a/embeddings/map_reduce.py b/embeddings/map_reduce.py deleted file mode 100644 index 6f70c50..0000000 --- a/embeddings/map_reduce.py +++ /dev/null @@ -1,63 +0,0 @@ -import multiprocessing -import time - -from ast2vec.pickleable_logger import PickleableLogger - - -class MapReduce(PickleableLogger): - def __init__(self, log_level, num_processes): - super(MapReduce, self).__init__(log_level=log_level) - self.num_processes = num_processes - - def parallelize(self, tasks, process_queue_in, process_queue_out): - queue_in = multiprocessing.Manager().Queue() - queue_out = multiprocessing.Manager().Queue(100) - processes = [multiprocessing.Process(target=process_queue_in, - args=(self, queue_in, queue_out)) - for i in range(self.num_processes)] - for p in processes: - p.start() - for t in tasks: - queue_in.put(t) - for _ in processes: - queue_in.put(None) - failures = process_queue_out(self, len(tasks), queue_out) - for p in processes: - p.join() - self._log.info("Finished, %d failed tasks", failures) - return len(tasks) - failures - - @staticmethod - def wrap_queue_in(func): - def wrapper(self, queue_in, queue_out): - while True: - item = queue_in.get() - if item is None: - break - try: - queue_out.put(func(self, item)) - except: - self._log.exception("%s failed", item) - queue_out.put(None) - return wrapper - - @staticmethod - def wrap_queue_out(func): - def wrapper(self, n_tasks, queue_out): - failures = 0 - start = time.time() - - for i in range(n_tasks): - result = queue_out.get() - if i % 1000 == 0: - print(i, time.time() - start) - if result is None: - failures += 1 - continue - func(self, result) - - return failures - return wrapper - - def _get_log_name(self): - return "MapReduce" diff --git a/embeddings/move_prox.py b/embeddings/move_prox.py deleted file mode 100644 index c95be87..0000000 --- a/embeddings/move_prox.py +++ /dev/null @@ -1,18 +0,0 @@ -import os -import shutil - -PROX_DIR = "/storage/timofei/role2vec/libs/ast2vec/uast_prox" -OUT_DIR = "/storage/timofei/role2vec/embeddings" - - -for fname in ["train", "valid", "test"]: - with open("uasts_{}.txt".format(fname)) as fin: - for line in fin: - line = line.strip() - uname = line[line.rfind("/") + 1:] - letter = uname[len("uast_"):len("uast_") + 1] - pname = os.path.join(PROX_DIR, letter, uname) - if os.path.exists(pname): - oname = os.path.join(OUT_DIR, "prox_{}".format(fname), letter) - os.makedirs(oname, exist_ok=True) - shutil.move(pname, oname) diff --git a/embeddings/utils.py b/embeddings/utils.py deleted file mode 100644 index 257585d..0000000 --- a/embeddings/utils.py +++ /dev/null @@ -1,53 +0,0 @@ -from collections import deque -from itertools import islice, tee - - -def node_iterator(root): - queue = [(root, 0)] - n_nodes = 1 - while queue: - node, node_idx = queue.pop() - yield node, node_idx - for child in node.children: - queue.append((child, n_nodes)) - n_nodes += 1 - - -def consume(iterator, n): - """Advance the iterator n-steps ahead. If n is none, consume entirely.""" - # Use functions that consume iterators at C speed. - if n is None: - # feed the entire iterator into a zero-length deque - deque(iterator, maxlen=0) - else: - # advance to the empty slice starting at position n - next(islice(iterator, n, n), None) - - -def window(iterable, n=2): - """s -> (s0, ...,s(n-1)), (s1, ...,sn), (s2, ..., s(n+1)), ...""" - iters = tee(iterable, n) - # Could use enumerate(islice(iters, 1, None), 1) to avoid consume(it, 0), but - # that's slower for larger window sizes, while saving only small fixed "noop" cost - for i, it in enumerate(iters): - consume(it, i) - return zip(*iters) - - -def read_paths(fname): - paths = [line.strip() for line in open(fname).readlines()] - if not paths: - raise ValueError("Make sure the file is not empty!") - return paths - - -def read_vocab(vocab_path): - with open(vocab_path) as fin: - words = [line.split(" ")[0] for line in fin] - return words - - -def save_vocab(vocab_path, vocab): - with open(vocab_path, "w") as fout: - fout.write("\n".join( - map(lambda x: "%s %d".join(x), vocab.most_common()))) diff --git a/role2vec/__init__.py b/role2vec/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/role2vec/__main__.py b/role2vec/__main__.py new file mode 100644 index 0000000..97442f6 --- /dev/null +++ b/role2vec/__main__.py @@ -0,0 +1,96 @@ +import argparse +import logging +import sys + +from ast2vec.__main__ import ArgumentDefaultsHelpFormatterNoNone, one_arg_parser +from modelforge.logs import setup_logging +from role2vec.glove import glove_entry +from role2vec.node2vec import node2vec_entry +from role2vec.roles.base import roles_entry + + +def get_parser() -> argparse.ArgumentParser: + """ + Create main parser. + + :return: Parser + """ + parser = argparse.ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatterNoNone) + parser.add_argument("--log-level", default="INFO", choices=logging._nameToLevel, + help="Logging verbosity.") + + # Create all common arguments + + process_arg = one_arg_parser("--processes", type=int, default=2, help="Number of processes.") + vocab_arg = one_arg_parser("--vocabulary", default="vocab.txt", help="File with vocabulary.") + + # Construct subparsers + + subparsers = parser.add_subparsers(help="Commands", dest="command") + + glove_parser = subparsers.add_parser( + "glove", help="Convert proximity matrices into GloVe suitable format. Refer to " + "https://github.com/stanfordnlp/GloVe", + formatter_class=ArgumentDefaultsHelpFormatterNoNone, + parents=[process_arg, vocab_arg]) + glove_parser.set_defaults(handler=glove_entry) + glove_parser.add_argument("input", help="Input directory with proximity matrices.") + glove_parser.add_argument("output", help="Path to store combined proximity matrix.") + glove_parser.add_argument("--filter", default="**/*.asdf", help="File name glob selector.") + + node2vec_parser = subparsers.add_parser( + "node2vec", help="Node2Vec random walk algorithm for assembling proximity matrices from " + "UASTs. Refer to https://github.com/aditya-grover/node2vec", + formatter_class=ArgumentDefaultsHelpFormatterNoNone, + parents=[process_arg, vocab_arg]) + node2vec_parser.set_defaults(handler=node2vec_entry) + node2vec_parser.add_argument("input", help="Input file with UASTs.") + node2vec_parser.add_argument("output", help="Path to store the resulting matrices.") + node2vec_parser.add_argument( + "-n", "--num-walks", type=int, default=1, help="Number of random walks from each node.") + node2vec_parser.add_argument( + "-l", "--walk-length", type=int, default=80, help="Length of each random walk.") + node2vec_parser.add_argument( + "-w", "--window", type=int, default=5, help="Window size for node context.") + node2vec_parser.add_argument( + "-p", type=float, default=1.0, + help="Controls the likelihood of immediately revisiting previous node.") + node2vec_parser.add_argument( + "-q", type=float, default=1.0, help="Controls the likelihood of exploring outward nodes.") + + roles_parser = subparsers.add_parser( + "mlp", help="Predict roles using Multi-Layer Perceptron.", + formatter_class=ArgumentDefaultsHelpFormatterNoNone, + parents=[process_arg]) + roles_parser.set_defaults(handler=roles_entry) + roles_parser.add_argument("algorithm", help="Specify training algorithm.") + roles_parser.add_argument("--train", help="Input file with UASTs for training.") + roles_parser.add_argument("--test", help="Input file with UASTs for testing.") + roles_parser.add_argument("--model", required=True, help="Path to store trained model.") + roles_parser.add_argument( + "--embeddings", required=True, help="File with roles and tokens embeddings.") + + return parser + + +def main(): + """ + Create all the argparsers and invoke the function from set_defaults(). + + :return: The result of the function from set_defaults(). + """ + parser = get_parser() + args = parser.parse_args() + args.log_level = logging._nameToLevel[args.log_level] + setup_logging(args.log_level) + try: + handler = args.handler + except AttributeError: + def print_usage(_): + parser.print_usage() + + handler = print_usage + return handler(args) + +if __name__ == "__main__": + sys.exit(main()) diff --git a/embeddings/glove.py b/role2vec/glove.py similarity index 50% rename from embeddings/glove.py rename to role2vec/glove.py index d961de5..cc732d8 100644 --- a/embeddings/glove.py +++ b/role2vec/glove.py @@ -1,20 +1,37 @@ -import argparse from collections import Counter -import logging from pathlib import Path import struct +from typing import List, Tuple from ast2vec.coocc import Cooccurrences -from build_vocab import Vocab from map_reduce import MapReduce +from utils import read_vocab class GloVe(MapReduce): - def __init__(self, log_level, num_processes, vocab_path): + """ + Converts proximity matrices into GloVe suitable format. + Refer to https://github.com/stanfordnlp/GloVe + """ + + def __init__(self, log_level: str, num_processes: int, vocab_path: str): + """ + :param log_level: Log level of GloVe. + :param num_processes: Number of running processes. There's always one additional process + for reducing data. + :param vocab_path: Path to stored vocabulary. + """ super(GloVe, self).__init__(log_level=log_level, num_processes=num_processes) - self.vocab = {word: i for i, word in enumerate(Vocab.read_vocab(vocab_path))} + self.vocab = {word: i for i, word in enumerate(read_vocab(vocab_path))} - def convert(self, src_dir, output, file_filter): + def convert(self, src_dir: str, output: str, file_filter: str) -> None: + """ + Combine all proximity matrices and save them into GloVe suitable format. + + :param src_dir: Path to stored proximity matrices. + :param output: Path for storing the resulting GloVe suitable matrix. + :param file_filter: Pattern for recursively scanning `src_dir`. + """ self._log.info("Scanning %s", src_dir) files = [str(p) for p in Path(src_dir).glob(file_filter)] self._log.info("Found %d files", len(files)) @@ -22,13 +39,19 @@ def convert(self, src_dir, output, file_filter): return 0 self._log.info("Combine proximity matrices.") - mat = self.extract(files) + mat = self.combine_mats(files) self._log.info("Finished combining.") self._log.info("Saving matrix.") self.save_mat(mat, output) - def extract(self, files): + def combine_mats(self, files: List[str]) -> Counter[Tuple[str, str], int]: + """ + Combine proximity matrices. + + :param files: List of filepaths to stored proximity matrices. + :return: Mapping from token pairs to their proximity combined over all matrices. + """ counter = Counter() @MapReduce.wrap_queue_in @@ -48,7 +71,13 @@ def combine_prox(result): return counter @staticmethod - def save_mat(mat, output): + def save_mat(mat: Counter[Tuple[str, str], int], output: str) -> None: + """ + Save matrix in GloVe suitable format. + + :param mat: Counter storing proximities. + :param output: Path for storing the resulting GloVe suitable matrix. + """ with open(output, "wb") as fout: for (i, j), val in mat.items(): fout.write(struct.pack("iid", i, j, int(val))) @@ -57,20 +86,6 @@ def _get_log_name(self): return "GloVe" -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--log-level", default="INFO", choices=logging._nameToLevel, - help="Logging verbosity.") - parser.add_argument("input", help="Input directory with proximity matrices.") - parser.add_argument("output", help="Path to store combined proximity matrix.") - parser.add_argument("--filter", default="**/*.asdf", help="File name glob selector.") - parser.add_argument("--processes", type=int, default=2, help="Number of processes.") - parser.add_argument("--vocabulary", default="vocab.txt", help="File with vocabulary.") - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - +def glove_entry(args): glove = GloVe(args.log_level, args.processes, args.vocabulary) glove.convert(args.input, args.output, args.filter) diff --git a/role2vec/map_reduce.py b/role2vec/map_reduce.py new file mode 100644 index 0000000..eb9dced --- /dev/null +++ b/role2vec/map_reduce.py @@ -0,0 +1,111 @@ +import multiprocessing +import time +from typing import List + +from ast2vec.pickleable_logger import PickleableLogger + + +class MapReduce(PickleableLogger): + """ + Base class for parallel data processign. Creates a pool of workers for data mangling and + reduces data in the main process. + """ + + def __init__(self, log_level: str, num_processes: int, queue_lim: int=100): + """ + :param log_level: Log level of MapReduce. + :param num_processes: Number of running processes. There's always one additional process + for reducing data. + :param queue_lim: Maximum number of results in queue for reducing. + """ + super(MapReduce, self).__init__(log_level=log_level) + self.num_processes = num_processes + self.queue_lim = queue_lim + + def parallelize(self, tasks: List[str], process_queue_in, process_queue_out) -> int: + """ + Process tasks in parallel. + + :param tasks: List of filenames. + :param process_queue_in: Function for processing items from the task queue. + :param process_queue_out: Function for processing items from the result queue. + :return: Number of failed tasks. + """ + queue_in = multiprocessing.Manager().Queue() + queue_out = multiprocessing.Manager().Queue(self.queue_lim) + processes = [multiprocessing.Process(target=process_queue_in, + args=(self, queue_in, queue_out)) + for i in range(self.num_processes)] + n_tasks = len(tasks) + start_time = time.time() + + self._log.info("Starting tasks.") + for p in processes: + p.start() + for t in tasks: + queue_in.put(t) + for _ in processes: + queue_in.put(None) + + failures = process_queue_out(self, n_tasks, queue_out) + for p in processes: + p.join() + + self._log.info("Finished %d/%d tasks in %.2f" % + (n_tasks - failures, n_tasks, time.time() - start_time)) + return len(tasks) - failures + + @staticmethod + def wrap_queue_in(func): + """ + Wrapper for automatic quering of tasks and storing results in the result queue. + + :param func: Function that can process a single task and accepts `self` as parameter. + """ + def wrapper(self, queue_in, queue_out): + while True: + item = queue_in.get() + if item is None: + break + try: + queue_out.put(func(self, item)) + except: + self._log.exception("%s failed", item) + queue_out.put(None) + return wrapper + + @staticmethod + def wrap_queue_out(freq: int=1000): + """ + Wrapper for allowing parametrization. + + :param freq: Logs information every `freq` iterations. + """ + def outer_wrapper(func): + """ + Wrapper for automatic quering of results and reducing them. + + :param func: Function that can process a result and accepts `self` as parameter. + """ + def wrapper(self, n_tasks, queue_out): + failures = 0 + start = time.time() + + for i in range(n_tasks): + result = queue_out.get() + if (i + 1) % freq == 0: + self._log.info("Processed %d/%d in %.2f" % + (i + 1, n_tasks, time.time() - start)) + if result is None: + failures += 1 + continue + func(self, result) + + self._log.info("Finished %d/%d in %.2f seconds" % + (i + 1, n_tasks, time.time() - start)) + return failures + return wrapper + return outer_wrapper + + def _get_log_name(self): + return "MapReduce" diff --git a/embeddings/node2vec.py b/role2vec/node2vec.py similarity index 57% rename from embeddings/node2vec.py rename to role2vec/node2vec.py index 5f9d9d9..c588bcd 100644 --- a/embeddings/node2vec.py +++ b/role2vec/node2vec.py @@ -1,12 +1,10 @@ -import argparse from collections import defaultdict from itertools import product -import logging import os -import time +from typing import List import numpy -from scipy.sparse import coo_matrix +from scipy.sparse import coo_matrix, diags from ast2vec.coocc import Cooccurrences from ast2vec.uast import UASTModel @@ -16,17 +14,35 @@ class Node2Vec(MapReduce): + """ + Uses Node2Vec random walk algorithm for assembling proximity matrices from UASTs. + Refer to https://github.com/aditya-grover/node2vec + """ + MAX_VOCAB_WORDS = 1000000 - def __init__(self, log_level, num_processes, vocab_path, window, graph): + def __init__(self, log_level: str, num_processes: int, vocab_path: str, window: int, + graph: Graph): + """ + :param log_level: Log level of Node2Vec. + :param num_processes: Number of running processes. There's always one additional process + for reducing data. + :param vocab_path: Path to stored vocabulary. + :param window: Context window size for collecting proximities. + :param graph: Graph object for random walks generation. + """ super(Node2Vec, self).__init__(log_level=log_level, num_processes=num_processes) self.graph = graph - self.num_processes = num_processes - self.vocab = {w: i for i, w in enumerate( - read_vocab(vocab_path)[:Node2Vec.MAX_VOCAB_WORDS])} + self.vocab = {w: i for i, w in enumerate(read_vocab(vocab_path, Node2Vec.MAX_VOCAB_WORDS))} self.window = window - def process(self, fname, output): + def process(self, fname: str, output_dir: str) -> None: + """ + Extract proximity matrices from UASTs. + + :param fname: Path to file with filepaths to stored UASTs. + :param output_dir: Path to directory for storing proximity matrices. + """ self._log.info("Scanning %s", fname) paths = read_paths(fname) self._log.info("Found %d files", len(paths)) @@ -41,24 +57,27 @@ def process_uast(self, obj): for walk in self.graph.simulate_walks(uast): walk = [[self.vocab[t] for t in map(str, node.tokens) if t in self.vocab] for node in walk] + # Connect each token to the next `self.window` tokens. for i, cur_tokens in enumerate(walk[:-1]): for next_tokens in walk[(i + 1):(i + self.window)]: for word1, word2 in product(cur_tokens, next_tokens): + # Symmetry will be accounted for later dok_matrix[(word1, word2)] += 1 - dok_matrix[(word2, word1)] += 1 del uast mat = coo_matrix( - (Node2Vec.MAX_VOCAB_WORDS, Node2Vec.MAX_VOCAB_WORDS), dtype=numpy.float32) + (Node2Vec.MAX_VOCAB_WORDS, Node2Vec.MAX_VOCAB_WORDS), dtype=numpy.int32) mat.row = row = numpy.empty(len(dok_matrix), dtype=numpy.int32) mat.col = col = numpy.empty(len(dok_matrix), dtype=numpy.int32) - mat.data = data = numpy.empty(len(dok_matrix), dtype=numpy.float32) + mat.data = data = numpy.empty(len(dok_matrix), dtype=numpy.int32) for i, (coord, val) in enumerate(sorted(dok_matrix.items())): row[i], col[i] = coord data[i] = val del dok_matrix + # Accounting for symmetry + mat = coo_matrix(mat + mat.T - diags(mat.diagonal())) coocc = Cooccurrences() coocc.construct(tokens=sorted(self.vocab, key=self.vocab.get), matrix=mat) @@ -70,54 +89,34 @@ def process_uast(self, obj): def process_output(self, result): pass - self._log.info("Processing files.") - paths = self._preprocess_paths(paths, output) - start_time = time.time() + self._log.info("Preprocessing file names.") + paths = self._preprocess_paths(paths, output_dir) self.parallelize(paths, process_uast, process_output) - self._log.info("Finished processing in %.2f.", time.time() - start_time) def _get_log_name(self): return "Node2Vec" - def _preprocess_paths(self, paths, output): + def _preprocess_paths(self, paths: List[str], output_dir: str) -> List[str]: + """ + Prepare paths for storing proximity matrices. + + :param paths: List of filepaths to stored UASTs. + :param output_dir: Path to directory for storing proximity matrices. + :return: List of filepaths for storing proximity matrices. + """ preprocessed_paths = [] for p in paths: name = os.path.basename(p) if name.startswith("uast_"): name = name[len("uast_"):] - out_dir = os.path.join(output, name[0]) + out_dir = os.path.join(output_dir, name[0]) os.makedirs(out_dir, exist_ok=True) out_fname = os.path.join(out_dir, name) preprocessed_paths.append((p, out_fname)) return preprocessed_paths -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--log-level", default="INFO", choices=logging._nameToLevel, - help="Logging verbosity.") - parser.add_argument("input", help="Input file with UASTs.") - parser.add_argument("output", help="Path to store the resulting matrices.") - parser.add_argument("--processes", type=int, default=4, help="Number of processes.") - parser.add_argument("--vocabulary", default="vocab.txt", help="File with vocabulary.") - parser.add_argument( - "-n", "--num-walks", type=int, default=1, help="Number of random walks from each node.") - parser.add_argument( - "-l", "--walk-length", type=int, default=80, help="Length of each random walk.") - parser.add_argument( - "-w", "--window", type=int, default=5, help="Window size for node context.") - parser.add_argument( - "-p", type=float, default=1.0, - help="Controls the likelihood of immediately revisiting previous node.") - parser.add_argument( - "-q", type=float, default=1.0, help="Controls the likelihood of exploring outward nodes.") - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - +def node2vec_entry(args): graph = Graph(args.log_level, args.num_walks, args.walk_length, args.p, args.q) - node2vec = Node2Vec(args.log_level, args.processes, - args.vocabulary, args.window, graph) + node2vec = Node2Vec(args.log_level, args.processes, args.vocabulary, args.window, graph) node2vec.process(args.input, args.output) diff --git a/embeddings/random_walk.py b/role2vec/random_walk.py similarity index 68% rename from embeddings/random_walk.py rename to role2vec/random_walk.py index 73e69bc..387c36d 100644 --- a/embeddings/random_walk.py +++ b/role2vec/random_walk.py @@ -1,5 +1,6 @@ from collections import namedtuple import random +from typing import Dict, Iterator, List, Tuple import numpy as np @@ -11,8 +12,20 @@ class Graph(PickleableLogger): - def __init__(self, log_level, num_walks, walk_length, p, q): - assert walk_length > 1, "Random walks have at least two nodes." + """ + Generates random walks from UASTs. + """ + + def __init__(self, log_level: str, num_walks: int, walk_length: int, p: float, q: float): + """ + :param log_level: Log level of Node2Vec. + :param num_walks: Number of random walks from each node. + :param walk_length: Random walk length. + :param p: Controls the likelihood of immediately revisiting previous node. + :param q: Controls the likelihood of exploring outward nodes. + """ + if walk_length <= 1: + raise ValueError("Random walks have at least two nodes.") super(Graph, self).__init__(log_level=log_level) self.num_walks = num_walks @@ -21,9 +34,15 @@ def __init__(self, log_level, num_walks, walk_length, p, q): self.q = 1 / q self.token_parser = TokenParser() - def node2vec_walk(self, start_node, edges, nodes): + def node2vec_walk(self, start_node: GraphNode, edges: Dict[Tuple[int, int], None], + nodes: List[GraphNode]) -> List[GraphNode]: """ Simulate a random walk starting from start node. + + :param start_node: Starting node for random walk. + :param edges: Dict for storing mapping from node id pairs to transition probabilities. + :param nodes: List of UAST nodes. + :return: List of GraphNodes in random walk. """ walk = [None] * self.walk_length prev_node = walk[0] = start_node @@ -33,6 +52,7 @@ def node2vec_walk(self, start_node, edges, nodes): J, q = edges[(prev_node.id, cur_node.id)] kk = np.random.randint(len(J)) + # Draw a sample from discrete distribution at constant time. if np.random.rand() < q[kk]: ind = kk else: @@ -43,9 +63,12 @@ def node2vec_walk(self, start_node, edges, nodes): return walk - def simulate_walks(self, uasts): + def simulate_walks(self, uasts) -> Iterator[List[GraphNode]]: """ Repeatedly simulate random walks from each node. + + :param uasts: List of UASTs. + :return: Iterator over random walks generated for the input UASTs. """ for uast, filename in zip(uasts.uasts, uasts.filenames): nodes, edges = self._preprocess_uast(uast) @@ -74,13 +97,23 @@ def simulate_walks(self, uasts): def _get_log_name(self): return "Graph" - def _get_tokens(self, uast_node): + def _get_tokens(self, uast_node) -> List[str]: + """ + Return node tokens. + + :param uast_node: UAST node. + :return: List of tokens. + """ return ["RoleId_%d" % role for role in uast_node.roles] + \ list(self.token_parser.process_token(uast_node.token)) - def _preprocess_transition_probs(self, nodes, edges): + def _preprocess_transition_probs(self, nodes: List[GraphNode], + edges: Dict[Tuple[int, int], None]) -> None: """ Preprocessing of transition probabilities for guiding the random walks. + + :param nodes: List of GraphNodes in UAST. + :param edges: Dict for storing mapping from node id pairs to transition probabilities. """ self._log.info("Preprocessing transition probabilities.") for edge in edges: @@ -91,9 +124,12 @@ def _preprocess_transition_probs(self, nodes, edges): ]) edges[edge] = alias_setup(unnormalized_probs / unnormalized_probs.sum()) - def _preprocess_uast(self, root): + def _preprocess_uast(self, root) -> Tuple[List[GraphNode], Dict[Tuple[int, int], None]]: """ Add neighbors information to UAST nodes. + + :param root: Root node in UAST. + :return: Nodes and edges in the UAST. """ def create_node(node, id): return GraphNode(id=id, neighbors=[], tokens=self._get_tokens(node)) @@ -114,11 +150,14 @@ def create_node(node, id): return nodes, edges -def alias_setup(probs): +def alias_setup(probs: np.array) -> Tuple[np.array, np.array]: """ Compute utility lists for non-uniform sampling from discrete distributions. Refer to https://hips.seas.harvard.edu/blog/2013/03/03/the-alias-method-efficient-sampling-with-many-discrete-outcomes/ for details + + :param probs: Discrete distribution. + :return: Two helper tables. """ K = len(probs) q = probs * K diff --git a/role2vec/roles/base.py b/role2vec/roles/base.py new file mode 100644 index 0000000..dbb4552 --- /dev/null +++ b/role2vec/roles/base.py @@ -0,0 +1,92 @@ +import os + +from sklearn.externals import joblib + +from ast2vec.token_parser import TokenParser +from map_reduce import MapReduce +from utils import read_embeddings + +ROLES_MODELS = dict() + + +def register_roles_model(cls): + """ + Check some conventions for class declaration and add it to ROLES_MODELS. + + :param cls: Class for roles prediction. + """ + base = "Roles" + assert issubclass(cls, RolesBase), "Must be a subclass of RolesBase." + assert cls.__name__.startswith(base), "Make sure to start your class name with %s." % (base, ) + ROLES_MODELS[cls.__name__[len(base):].lower()] = cls + + return cls + + +class RolesBase(MapReduce): + """ + Base class for roles prediction. + """ + + def __init__(self, log_level: str, num_processes: int, emb_path: str): + """ + :param log_level: Log level of RolesBase. + :param num_processes: Number of running processes. There's always one additional process + for reducing data. + :param emb_path: Path to stored roles embeddings. + """ + super(RolesBase, self).__init__(log_level=log_level, num_processes=num_processes) + self.emb, self.roles = read_embeddings(emb_path) + self.model = None + self.token_parser = TokenParser() + + def save(self, model_path: str) -> None: + """ + Store trained model on disk. + + :param model_path: Path for storing trained model. + """ + if self.model is None: + raise ValueError("Model is empty.") + self._log.info("Saving model to %s.", model_path) + joblib.dump(self.model, model_path) + + def load(self, model_path: str) -> None: + """ + Load trained model from disk. + + :param model_path: Path to trained model. + """ + if not os.path.exists(model_path): + raise ValueError("Provided path to model doesn't exist: %s", model_path) + self.model = joblib.load(model_path) + + def train(self, fname: str) -> None: + """ + Train model. + + :param fname: Path to train file with filepaths to stored UASTs. + """ + raise NotImplementedError + + def test(self, fname: str) -> None: + """ + Test model. + + :param fname: Path to test file with filepaths to stored UASTs. + """ + raise NotImplementedError + + +def roles_entry(args): + RolesModel = ROLES_MODELS[args.algorithm] + rm = RolesModel(args.log_level, args.processes, args.embeddings) + + if args.train: + rm.train(args.train) + rm.save(args.model) + else: + rm.load(args.model) + + if args.test: + rm.test(args.test) diff --git a/embeddings/role_model.py b/role2vec/roles/mlp.py similarity index 55% rename from embeddings/role_model.py rename to role2vec/roles/mlp.py index 1d6ff4c..42374de 100644 --- a/embeddings/role_model.py +++ b/role2vec/roles/mlp.py @@ -1,41 +1,28 @@ -import argparse -from collections import namedtuple from itertools import chain -import logging -import os import time +from typing import Dict, Tuple import numpy as np -from sklearn.externals import joblib from sklearn.neural_network import MLPClassifier -from ast2vec.token_parser import TokenParser from ast2vec.uast import UASTModel from map_reduce import MapReduce +from roles_base import register_roles_model, RolesBase from utils import node_iterator, read_paths -Node = namedtuple("Node", ["id", "parent", "children", "roles", "tokens"]) +@register_roles_model +class RolesMLP(RolesBase): + """ + Predicts roles using Multi-Layer Perceptron. + """ -class RoleModel(MapReduce): - def __init__(self, log_level, num_processes, emb_path): - super(RoleModel, self).__init__(log_level=log_level, num_processes=num_processes) - self.emb, self.roles = self.load_emb(emb_path) - self.model = None - self.token_parser = TokenParser() + def train(self, fname: str) -> None: + """ + Train model. - def save(self, model_path): - if self.model is None: - raise ValueError("Model is empty.") - self._log.info("Saving model to %s.", model_path) - joblib.dump(self.model, model_path) - - def load(self, model_path): - if not os.path.exists(model_path): - raise ValueError("Provided path to model doesn't exist: %s", model_path) - self.model = joblib.load(model_path) - - def train(self, fname): + :param fname: Path to train file with filepaths to stored UASTs. + """ paths = read_paths(fname) self._log.info("Train model.") @@ -55,7 +42,12 @@ def train_uast(self, result): self.parallelize(paths, _process_uast, train_uast) self._log.info("Finished training.") - def test(self, fname): + def test(self, fname: str) -> None: + """ + Test model. + + :param fname: Path to test file with filepaths to stored UASTs. + """ paths = read_paths(fname) self._log.info("Test model.") @@ -73,29 +65,27 @@ def test_uast(self, result): np.save("y_pred.npy", y_pred) self._log.info("Finished testing.") - @staticmethod - def load_emb(emb_path): - emb = {} - roles = [] - - with open(emb_path) as fin: - for line in fin: - word, *vec = line.split("\t") - emb[word] = np.array(vec, dtype=np.float) - if word.startswith("RoleId_"): - roles.append(word) + def _mean_vec(self, node) -> Tuple[np.array, int]: + """ + Calculate mean of role/token embeddings for a node. - roles = {role: i for i, role in enumerate(roles)} - return emb, roles - - def _mean_vec(self, node): - tokens = [t for t in chain(node.token, ["RoleId_%d" % role for role in node.roles]) + :param node: UAST node. + :return: Mean of role/token embeddings and their total number. + """ + tokens = [t for t in chain(node.token, ("RoleId_%d" % role for role in node.roles)) if t in self.emb] if not tokens: return None, 0 return np.mean([self.emb[t] for t in tokens], axis=0), len(tokens) - def _mean_vecs(self, root): + def _mean_vecs(self, root) -> Tuple[Dict[int, np.array], Dict[int, np.array]]: + """ + Calculate mean of role/token embeddings for nodes and their children in a UAST. + + :param root: UAST root node. + :return: Mappings from node indices to their parent's and their childrens' mean role/token + embeddings. + """ node_vecs = {0: self._mean_vec(root)} child_vecs = {} parent_vecs = {0: None} @@ -125,7 +115,15 @@ def _mean_vecs(self, root): @MapReduce.wrap_queue_in -def _process_uast(self, filename): +def _process_uast(self, filename: str) -> Tuple[np.array, np.array]: + """ + Convert UAST into feature and label arrays. + Had to be defined outside of RolesMLP so that we don't suppply `self` twice. + + :param filename: Path to stored UAST. + :return: Array of concatenated mean parent and children role/token embeddings for each node and + the corresponding array of node roles. + """ X, y = [], [] uast_model = UASTModel().load(filename) @@ -140,31 +138,4 @@ def _process_uast(self, filename): X.append(np.concatenate((child_vec, parent_vec))) y.append(labels) - return X, y - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--log-level", default="INFO", choices=logging._nameToLevel, - help="Logging verbosity.") - parser.add_argument("--train", help="Input file with UASTs for training.") - parser.add_argument("--test", help="Input file with UASTs for testing.") - parser.add_argument("--model", required=True, help="Path to store trained model.") - parser.add_argument("--processes", type=int, default=2, help="Number of processes.") - parser.add_argument("--embeddings", required=True, help="File with roles and tokens embeddings.") - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - - rm = RoleModel(args.log_level, args.processes, args.embeddings) - - if args.train: - rm.train(args.train) - rm.save(args.model) - else: - rm.load(args.model) - - if args.test: - rm.test(args.test) + return np.array(X), np.array(y) diff --git a/embeddings/role_statistics.py b/role2vec/stats.py similarity index 100% rename from embeddings/role_statistics.py rename to role2vec/stats.py diff --git a/role2vec/utils.py b/role2vec/utils.py new file mode 100644 index 0000000..b27f488 --- /dev/null +++ b/role2vec/utils.py @@ -0,0 +1,83 @@ +from collections import Counter, deque +from itertools import islice, tee +from typing import Dict, Iterable, Iterator, List, Tuple + +import numpy as np + + +def node_iterator(root): + """ + Enumerate UAST nodes using depth-first approach. + """ + queue = [(root, 0)] + n_nodes = 1 + while queue: + node, node_idx = queue.pop() + yield node, node_idx + for child in node.children: + queue.append((child, n_nodes)) + n_nodes += 1 + + +def consume(iterator: Iterator, n: int) -> None: + """ + Advance the iterator n-steps ahead. If n is none, consume entirely. + + :param iterator: Input iterator. + :param n: Number of steps. + """ + # Use functions that consume iterators at C speed. + if n is None: + # feed the entire iterator into a zero-length deque + deque(iterator, maxlen=0) + else: + # advance to the empty slice starting at position n + next(islice(iterator, n, n), None) + + +def window(iterable: Iterable, n: int=2) -> Iterator: + """ + Create consecutive windows of elements from iterable. + + :param iterable: Input iterable. + :param n: Window size. + :return: Iterator for windows from the input iterable. + """ + iters = tee(iterable, n) + for i, it in enumerate(iters): + consume(it, i) + return zip(*iters) + + +def read_embeddings(emb_path: str) -> Tuple[Dict[str, np.array], List[str]]: + emb = {} + roles = [] + + with open(emb_path) as fin: + for line in fin: + word, *vec = line.split("\t") + emb[word] = np.array(vec, dtype=np.float) + if word.startswith("RoleId_"): + roles.append(word) + + roles = {role: i for i, role in enumerate(roles)} + return emb, roles + + +def read_paths(fname: str) -> List[str]: + paths = [line.strip() for line in open(fname).readlines()] + if not paths: + raise ValueError("Make sure the file is not empty!") + return paths + + +def read_vocab(vocab_path: str, num_words: int=None) -> List[str]: + with open(vocab_path) as fin: + words = [line.split(" ")[0] for line in islice(fin, num_words)] + return words + + +def save_vocab(vocab_path: str, vocab: Counter[str, int]) -> None: + with open(vocab_path, "w") as fout: + fout.write("\n".join( + map(lambda x: "%s %d".join(x), vocab.most_common()))) diff --git a/embeddings/build_vocab.py b/role2vec/vocab.py similarity index 63% rename from embeddings/build_vocab.py rename to role2vec/vocab.py index 79be37e..1fd6c59 100644 --- a/embeddings/build_vocab.py +++ b/role2vec/vocab.py @@ -1,22 +1,35 @@ import argparse from collections import Counter import logging +from typing import List from ast2vec.token_parser import TokenParser from ast2vec.uast import UASTModel from map_reduce import MapReduce +from utils import save_vocab class Vocab(MapReduce): - def __init__(self, log_level, num_processes, vocab_path): + """ + Collects vocabulary from UASTs. + """ + + def __init__(self, log_level: str, num_processes: int): + """ + :param log_level: Log level of Vocab. + :param num_processes: Number of running processes. There's always one additional process + for reducing data. + """ super(Vocab, self).__init__(log_level=log_level, num_processes=num_processes) self.token_parser = TokenParser() - if vocab_path is None: - self.vocab_path = "vocab.txt" - else: - self.vocab_path = vocab_path - def create(self, files): + def create(self, files: List[str]) -> Counter[str, int]: + """ + Create vocabulary by processing supplied UASTs. + + :param files: List of filepaths to stored UASTs. + :return: Dict with tokens and their number of occurrences. + """ vocab = Counter() @MapReduce.wrap_queue_in @@ -37,13 +50,18 @@ def combine_vocab(result): vocab.update(result) self.parallelize(files, uasts_vocab, combine_vocab) - self.save_vocab(self.vocab_path, vocab) return vocab def _get_log_name(self): return "Vocab" - def _get_tokens(self, uast_node): + def _get_tokens(self, uast_node) -> List[str]: + """ + Return node tokens. + + :param uast_node: UAST node. + :return: List of tokens. + """ return ["RoleId_%d" % role for role in uast_node.roles] + \ list(self.token_parser.process_token(uast_node.token)) @@ -53,7 +71,7 @@ def parse_args(): parser.add_argument("--log-level", default="INFO", choices=logging._nameToLevel, help="Logging verbosity.") parser.add_argument("input", help="Input file with UASTs.") - parser.add_argument("output", help="Path to store vocabulary.") + parser.add_argument("output", default="vocab.txt", help="Path to store vocabulary.") parser.add_argument("--processes", type=int, default=2, help="Number of processes.") return parser.parse_args() @@ -63,4 +81,5 @@ def parse_args(): uasts = open(args.input).read().split("\n") vocab = Vocab(args.log_level, args.processes, args.output) - vocab.create(uasts) + words = vocab.create(uasts) + save_vocab(args.output, words) From d9fad7e594fcf4f31109b84b2c5927e362e5410e Mon Sep 17 00:00:00 2001 From: Timofei Semenov Date: Tue, 26 Sep 2017 18:55:18 +0300 Subject: [PATCH 12/17] Fix imports --- role2vec/glove.py | 10 +++++----- role2vec/node2vec.py | 6 +++--- role2vec/random_walk.py | 2 +- role2vec/roles/base.py | 4 ++-- role2vec/roles/mlp.py | 6 +++--- role2vec/stats.py | 4 ++-- role2vec/utils.py | 4 ++-- role2vec/vocab.py | 8 ++++---- 8 files changed, 22 insertions(+), 22 deletions(-) diff --git a/role2vec/glove.py b/role2vec/glove.py index cc732d8..cb73e72 100644 --- a/role2vec/glove.py +++ b/role2vec/glove.py @@ -1,11 +1,11 @@ from collections import Counter from pathlib import Path import struct -from typing import List, Tuple +from typing import Dict, List, Tuple from ast2vec.coocc import Cooccurrences -from map_reduce import MapReduce -from utils import read_vocab +from role2vec.map_reduce import MapReduce +from role2vec.utils import read_vocab class GloVe(MapReduce): @@ -45,7 +45,7 @@ def convert(self, src_dir: str, output: str, file_filter: str) -> None: self._log.info("Saving matrix.") self.save_mat(mat, output) - def combine_mats(self, files: List[str]) -> Counter[Tuple[str, str], int]: + def combine_mats(self, files: List[str]) -> Dict[Tuple[str, str], int]: """ Combine proximity matrices. @@ -71,7 +71,7 @@ def combine_prox(result): return counter @staticmethod - def save_mat(mat: Counter[Tuple[str, str], int], output: str) -> None: + def save_mat(mat: Dict[Tuple[str, str], int], output: str) -> None: """ Save matrix in GloVe suitable format. diff --git a/role2vec/node2vec.py b/role2vec/node2vec.py index c588bcd..37d2a75 100644 --- a/role2vec/node2vec.py +++ b/role2vec/node2vec.py @@ -8,9 +8,9 @@ from ast2vec.coocc import Cooccurrences from ast2vec.uast import UASTModel -from map_reduce import MapReduce -from random_walk import Graph -from utils import read_paths, read_vocab +from role2vec.map_reduce import MapReduce +from role2vec.random_walk import Graph +from role2vec.utils import read_paths, read_vocab class Node2Vec(MapReduce): diff --git a/role2vec/random_walk.py b/role2vec/random_walk.py index 387c36d..122652b 100644 --- a/role2vec/random_walk.py +++ b/role2vec/random_walk.py @@ -6,7 +6,7 @@ from ast2vec.pickleable_logger import PickleableLogger from ast2vec.token_parser import TokenParser -from utils import node_iterator +from role2vec.utils import node_iterator GraphNode = namedtuple("GraphNode", ["id", "neighbors", "tokens"]) diff --git a/role2vec/roles/base.py b/role2vec/roles/base.py index dbb4552..8f5b761 100644 --- a/role2vec/roles/base.py +++ b/role2vec/roles/base.py @@ -3,8 +3,8 @@ from sklearn.externals import joblib from ast2vec.token_parser import TokenParser -from map_reduce import MapReduce -from utils import read_embeddings +from role2vec.map_reduce import MapReduce +from role2vec.utils import read_embeddings ROLES_MODELS = dict() diff --git a/role2vec/roles/mlp.py b/role2vec/roles/mlp.py index 42374de..2362687 100644 --- a/role2vec/roles/mlp.py +++ b/role2vec/roles/mlp.py @@ -6,9 +6,9 @@ from sklearn.neural_network import MLPClassifier from ast2vec.uast import UASTModel -from map_reduce import MapReduce -from roles_base import register_roles_model, RolesBase -from utils import node_iterator, read_paths +from role2vec.map_reduce import MapReduce +from role2vec.roles_base import register_roles_model, RolesBase +from role2vec.utils import node_iterator, read_paths @register_roles_model diff --git a/role2vec/stats.py b/role2vec/stats.py index a45c5ab..6903402 100644 --- a/role2vec/stats.py +++ b/role2vec/stats.py @@ -4,8 +4,8 @@ import logging from ast2vec.uast import UASTModel -from map_reduce import MapReduce -from utils import read_paths +from role2vec.map_reduce import MapReduce +from role2vec.utils import node_iterator, read_paths class RoleStat(MapReduce): diff --git a/role2vec/utils.py b/role2vec/utils.py index b27f488..c575f74 100644 --- a/role2vec/utils.py +++ b/role2vec/utils.py @@ -1,4 +1,4 @@ -from collections import Counter, deque +from collections import deque from itertools import islice, tee from typing import Dict, Iterable, Iterator, List, Tuple @@ -77,7 +77,7 @@ def read_vocab(vocab_path: str, num_words: int=None) -> List[str]: return words -def save_vocab(vocab_path: str, vocab: Counter[str, int]) -> None: +def save_vocab(vocab_path: str, vocab: Dict[str, int]) -> None: with open(vocab_path, "w") as fout: fout.write("\n".join( map(lambda x: "%s %d".join(x), vocab.most_common()))) diff --git a/role2vec/vocab.py b/role2vec/vocab.py index 1fd6c59..142428a 100644 --- a/role2vec/vocab.py +++ b/role2vec/vocab.py @@ -1,12 +1,12 @@ import argparse from collections import Counter import logging -from typing import List +from typing import Dict, List from ast2vec.token_parser import TokenParser from ast2vec.uast import UASTModel -from map_reduce import MapReduce -from utils import save_vocab +from role2vec.map_reduce import MapReduce +from role2vec.utils import save_vocab class Vocab(MapReduce): @@ -23,7 +23,7 @@ def __init__(self, log_level: str, num_processes: int): super(Vocab, self).__init__(log_level=log_level, num_processes=num_processes) self.token_parser = TokenParser() - def create(self, files: List[str]) -> Counter[str, int]: + def create(self, files: List[str]) -> Dict[str, int]: """ Create vocabulary by processing supplied UASTs. From 14f5286e2f98d01704e2ddf8c25874d637437a04 Mon Sep 17 00:00:00 2001 From: Timofei Semenov Date: Wed, 27 Sep 2017 01:11:26 +0300 Subject: [PATCH 13/17] Add stats command to main --- role2vec/__main__.py | 15 ++++++++++++--- role2vec/stats.py | 42 ++++++++++++++++++------------------------ 2 files changed, 30 insertions(+), 27 deletions(-) diff --git a/role2vec/__main__.py b/role2vec/__main__.py index 97442f6..b3815af 100644 --- a/role2vec/__main__.py +++ b/role2vec/__main__.py @@ -6,6 +6,7 @@ from modelforge.logs import setup_logging from role2vec.glove import glove_entry from role2vec.node2vec import node2vec_entry +from role2vec.stats import stats_entry from role2vec.roles.base import roles_entry @@ -23,6 +24,7 @@ def get_parser() -> argparse.ArgumentParser: process_arg = one_arg_parser("--processes", type=int, default=2, help="Number of processes.") vocab_arg = one_arg_parser("--vocabulary", default="vocab.txt", help="File with vocabulary.") + uast_input_arg = one_arg_parser("input", help="Input file with UASTs.") # Construct subparsers @@ -42,9 +44,8 @@ def get_parser() -> argparse.ArgumentParser: "node2vec", help="Node2Vec random walk algorithm for assembling proximity matrices from " "UASTs. Refer to https://github.com/aditya-grover/node2vec", formatter_class=ArgumentDefaultsHelpFormatterNoNone, - parents=[process_arg, vocab_arg]) + parents=[process_arg, vocab_arg, uast_input_arg]) node2vec_parser.set_defaults(handler=node2vec_entry) - node2vec_parser.add_argument("input", help="Input file with UASTs.") node2vec_parser.add_argument("output", help="Path to store the resulting matrices.") node2vec_parser.add_argument( "-n", "--num-walks", type=int, default=1, help="Number of random walks from each node.") @@ -59,7 +60,7 @@ def get_parser() -> argparse.ArgumentParser: "-q", type=float, default=1.0, help="Controls the likelihood of exploring outward nodes.") roles_parser = subparsers.add_parser( - "mlp", help="Predict roles using Multi-Layer Perceptron.", + "mlp", help="Train/test roles prediction model.", formatter_class=ArgumentDefaultsHelpFormatterNoNone, parents=[process_arg]) roles_parser.set_defaults(handler=roles_entry) @@ -70,6 +71,14 @@ def get_parser() -> argparse.ArgumentParser: roles_parser.add_argument( "--embeddings", required=True, help="File with roles and tokens embeddings.") + stats_parser = subparsers.add_parser( + "stats", help="Collect statistics for number of nodes w.r.t. number of node roles in " + "UASTs.", formatter_class=ArgumentDefaultsHelpFormatterNoNone, + parents=[process_arg, uast_input_arg]) + stats_parser.set_defaults(handler=stats_entry) + stats_parser.add_argument("--stat", required=True, help="Path to store resulting statisics.") + stats_parser.add_argument("--susp", required=True, help="Path to store suspicious UASTs.") + return parser diff --git a/role2vec/stats.py b/role2vec/stats.py index 6903402..30acdfc 100644 --- a/role2vec/stats.py +++ b/role2vec/stats.py @@ -1,15 +1,26 @@ -import argparse from collections import Counter import json -import logging from ast2vec.uast import UASTModel from role2vec.map_reduce import MapReduce from role2vec.utils import node_iterator, read_paths -class RoleStat(MapReduce): - def calc(self, fname, stat_output, susp_output): +class RolesStats(MapReduce): + """ + Collects statistics for number of nodes w.r.t. number of node roles in all UASTs. + """ + + def calc(self, fname: str, stat_output: str, susp_output: str) -> None: + """ + Compute statistics and store them in JSON format. + + :param fname: Path to file with filepaths to stored UASTs. + :param stat_output: Path for storing JSON file with statistics. + :param susp_output: Path for storing txt file with info about suspicious UASTs. The file + has three columns: filepath to UAST, number of nodes in UAST, number of + nodes without roles in UAST. + """ paths = read_paths(fname) global_counter = Counter() suspicious = [] @@ -19,12 +30,8 @@ def process_uast(self, filename): counter = Counter() uast_model = UASTModel().load(filename) for uast in uast_model.uasts: - queue = [uast] - counter[len(uast.roles)] += 1 - while queue: - node = queue.pop() + for node, _ in node_iterator(uast): counter[len(node.roles)] += 1 - queue.extend(node.children) return counter, filename @MapReduce.wrap_queue_out @@ -44,19 +51,6 @@ def combine_stat(self, result): self._log.info("Finished collecting statistics.") -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--log-level", default="INFO", choices=logging._nameToLevel, - help="Logging verbosity.") - parser.add_argument("input", help="Input file with UASTs.") - parser.add_argument("--stat", help="Path to store resulting statisics.") - parser.add_argument("--susp", help="Path to store suspicious UASTs.") - parser.add_argument("--processes", type=int, default=4, help="Number of processes.") - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - - role_stat = RoleStat(args.log_level, args.processes) +def stats_entry(args): + role_stat = RolesStats(args.log_level, args.processes) role_stat.calc(args.input, args.stat, args.susp) From dd71e7ce84aceb6b5ad2bd87d32647684c6d1ddf Mon Sep 17 00:00:00 2001 From: Timofei Semenov Date: Tue, 26 Sep 2017 23:13:38 +0300 Subject: [PATCH 14/17] Setup travis --- .travis.yml | 42 +++++++++++++++++++++++++++++++++++++++++ README.md | 0 requirements.txt | 1 + role2vec/random_walk.py | 3 ++- setup.py | 40 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 85 insertions(+), 1 deletion(-) create mode 100644 .travis.yml create mode 100644 README.md create mode 100644 requirements.txt create mode 100644 setup.py diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..eca0afa --- /dev/null +++ b/.travis.yml @@ -0,0 +1,42 @@ +language: python +sudo: false +dist: trusty +services: +- docker +cache: + directories: + - "$HOME/.cache/pip" +addons: + apt: + packages: + - libboost-all-dev + - libxml2-dev +_install: &_install + - gimme 1.8 + - source ~/.gimme/envs/latest.env + - pip install --upgrade pip + - pip install -r requirements.txt codecov + - pip install -e . +_coverage: &_coverage + - SCRIPT="coverage run --concurrency=multiprocessing -m unittest discover && coverage combine" +matrix: + include: + - python: 3.4 + env: *_coverage + install: *_install + - python: 3.5 + env: *_coverage + install: *_install + - python: 3.6 + env: SCRIPT="pep8 --max-line-length=99 ." + install: pip install pep8 + - python: 3.6 + env: *_coverage + install: *_install + after_success: + - codecov + fast_finish: true +script: +- (eval "$SCRIPT") +notifications: + email: false diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c263a92 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +ast2vec[tf]>=0.3.4-alpha \ No newline at end of file diff --git a/role2vec/random_walk.py b/role2vec/random_walk.py index 122652b..3df5dbd 100644 --- a/role2vec/random_walk.py +++ b/role2vec/random_walk.py @@ -153,7 +153,8 @@ def create_node(node, id): def alias_setup(probs: np.array) -> Tuple[np.array, np.array]: """ Compute utility lists for non-uniform sampling from discrete distributions. - Refer to https://hips.seas.harvard.edu/blog/2013/03/03/the-alias-method-efficient-sampling-with-many-discrete-outcomes/ + Refer to https://hips.seas.harvard.edu/blog/2013/03/03/the-alias-method-efficient-sampling-with + -many-discrete-outcomes/ for details :param probs: Discrete distribution. diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..032a857 --- /dev/null +++ b/setup.py @@ -0,0 +1,40 @@ +import sys + +from setuptools import setup, find_packages + +if sys.version_info < (3, 5, 0): + typing = ["typing"] +else: + typing = [] + +setup( + name="role2vec", + description="Part of source{d}'s stack for machine learning on source code. Provides API and " + "tools to train and use models for role prediction of UAST nodes extracted from " + "Babelfish.", + version="0.0.1-alpha", + license="Apache 2.0", + author="source{d}", + author_email="machine-learning@sourced.tech", + url="https://github.com/src-d/role2vec", + download_url="https://github.com/src-d/role2vec", + packages=find_packages(exclude=("role2vec.tests",)), + entry_points={ + "console_scripts": ["role2vec=role2vec.__main__:main"], + }, + keywords=["machine learning on source code", "word2vec", "id2vec", + "github", "swivel", "nbow", "bblfsh", "babelfish"], + install_requires=["ast2vec[tf]>=0.3.4-alpha"] + typing, + package_data={"": ["LICENSE", "README.md"]}, + classifiers=[ + "Development Status :: 3 - Alpha", + "Environment :: Console", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Operating System :: POSIX", + "Programming Language :: Python :: 3.4", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Topic :: Software Development :: Libraries" + ] +) From fa2a0a6df235bb95446f568f9be45ab765c1af94 Mon Sep 17 00:00:00 2001 From: Timofei Semenov Date: Wed, 27 Sep 2017 01:27:50 +0300 Subject: [PATCH 15/17] Fix MapReduce.wrap_queue_out usage --- role2vec/glove.py | 2 +- role2vec/node2vec.py | 2 +- role2vec/roles/mlp.py | 4 ++-- role2vec/stats.py | 2 +- role2vec/vocab.py | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/role2vec/glove.py b/role2vec/glove.py index cb73e72..beb01b5 100644 --- a/role2vec/glove.py +++ b/role2vec/glove.py @@ -60,7 +60,7 @@ def process_prox(self, filename): return {(prox.tokens[i], prox.tokens[j]): val for i, j, val in zip(prox.matrix.row, prox.matrix.col, prox.matrix.data)} - @MapReduce.wrap_queue_out + @MapReduce.wrap_queue_out() def combine_prox(result): nonlocal counter counter.update( diff --git a/role2vec/node2vec.py b/role2vec/node2vec.py index 37d2a75..81071dc 100644 --- a/role2vec/node2vec.py +++ b/role2vec/node2vec.py @@ -85,7 +85,7 @@ def process_uast(self, obj): self._log.info("Finished processing %s", filename) return filename - @MapReduce.wrap_queue_out + @MapReduce.wrap_queue_out() def process_output(self, result): pass diff --git a/role2vec/roles/mlp.py b/role2vec/roles/mlp.py index 2362687..7c85b52 100644 --- a/role2vec/roles/mlp.py +++ b/role2vec/roles/mlp.py @@ -31,7 +31,7 @@ def train(self, fname: str) -> None: counter = 0 start = time.time() - @MapReduce.wrap_queue_out + @MapReduce.wrap_queue_out() def train_uast(self, result): nonlocal counter, start X, y = result @@ -53,7 +53,7 @@ def test(self, fname: str) -> None: self._log.info("Test model.") y_real, y_pred = [], [] - @MapReduce.wrap_queue_out + @MapReduce.wrap_queue_out() def test_uast(self, result): nonlocal y_real, y_pred X, y = result diff --git a/role2vec/stats.py b/role2vec/stats.py index 30acdfc..2bfa2d2 100644 --- a/role2vec/stats.py +++ b/role2vec/stats.py @@ -34,7 +34,7 @@ def process_uast(self, filename): counter[len(node.roles)] += 1 return counter, filename - @MapReduce.wrap_queue_out + @MapReduce.wrap_queue_out() def combine_stat(self, result): nonlocal global_counter counter, filename = result diff --git a/role2vec/vocab.py b/role2vec/vocab.py index 142428a..49a5b2d 100644 --- a/role2vec/vocab.py +++ b/role2vec/vocab.py @@ -44,7 +44,7 @@ def uasts_vocab(self, filename): nodes.extend(node.children) return tokens - @MapReduce.wrap_queue_out + @MapReduce.wrap_queue_out() def combine_vocab(result): nonlocal vocab vocab.update(result) From 1618af5039b41f41d198f3921477481d644df8dc Mon Sep 17 00:00:00 2001 From: Timofei Semenov Date: Wed, 27 Sep 2017 12:13:02 +0300 Subject: [PATCH 16/17] Add vocab command to main --- role2vec/__main__.py | 8 ++++++++ role2vec/utils.py | 3 +-- role2vec/vocab.py | 29 ++++++----------------------- 3 files changed, 15 insertions(+), 25 deletions(-) diff --git a/role2vec/__main__.py b/role2vec/__main__.py index b3815af..f1d1d94 100644 --- a/role2vec/__main__.py +++ b/role2vec/__main__.py @@ -7,6 +7,7 @@ from role2vec.glove import glove_entry from role2vec.node2vec import node2vec_entry from role2vec.stats import stats_entry +from role2vec.vocab import vocab_entry from role2vec.roles.base import roles_entry @@ -79,6 +80,13 @@ def get_parser() -> argparse.ArgumentParser: stats_parser.add_argument("--stat", required=True, help="Path to store resulting statisics.") stats_parser.add_argument("--susp", required=True, help="Path to store suspicious UASTs.") + vocab_parser = subparsers.add_parser( + "vocab", help="Collect vocabulary from UASTs.", + formatter_class=ArgumentDefaultsHelpFormatterNoNone, + parents=[process_arg, uast_input_arg]) + vocab_parser.set_defaults(handler=vocab_entry) + vocab_parser.add_argument("output", default="vocab.txt", help="Path to store vocabulary.") + return parser diff --git a/role2vec/utils.py b/role2vec/utils.py index c575f74..84226e2 100644 --- a/role2vec/utils.py +++ b/role2vec/utils.py @@ -79,5 +79,4 @@ def read_vocab(vocab_path: str, num_words: int=None) -> List[str]: def save_vocab(vocab_path: str, vocab: Dict[str, int]) -> None: with open(vocab_path, "w") as fout: - fout.write("\n".join( - map(lambda x: "%s %d".join(x), vocab.most_common()))) + fout.write("\n".join(map(lambda x: "%s %d" % x, vocab.most_common()))) diff --git a/role2vec/vocab.py b/role2vec/vocab.py index 49a5b2d..dd85d3a 100644 --- a/role2vec/vocab.py +++ b/role2vec/vocab.py @@ -1,12 +1,10 @@ -import argparse from collections import Counter -import logging from typing import Dict, List from ast2vec.token_parser import TokenParser from ast2vec.uast import UASTModel from role2vec.map_reduce import MapReduce -from role2vec.utils import save_vocab +from role2vec.utils import node_iterator, read_paths, save_vocab class Vocab(MapReduce): @@ -37,15 +35,12 @@ def uasts_vocab(self, filename): uast_model = UASTModel().load(filename) tokens = Counter() for uast in uast_model.uasts: - nodes = [uast] - while nodes: - node = nodes.pop() + for node, _ in node_iterator(uast): tokens.update(self._get_tokens(node)) - nodes.extend(node.children) return tokens @MapReduce.wrap_queue_out() - def combine_vocab(result): + def combine_vocab(self, result): nonlocal vocab vocab.update(result) @@ -66,20 +61,8 @@ def _get_tokens(self, uast_node) -> List[str]: list(self.token_parser.process_token(uast_node.token)) -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--log-level", default="INFO", choices=logging._nameToLevel, - help="Logging verbosity.") - parser.add_argument("input", help="Input file with UASTs.") - parser.add_argument("output", default="vocab.txt", help="Path to store vocabulary.") - parser.add_argument("--processes", type=int, default=2, help="Number of processes.") - return parser.parse_args() - - -if __name__ == "__main__": - args = parse_args() - - uasts = open(args.input).read().split("\n") - vocab = Vocab(args.log_level, args.processes, args.output) +def vocab_entry(args): + uasts = read_paths(args.input) + vocab = Vocab(args.log_level, args.processes) words = vocab.create(uasts) save_vocab(args.output, words) From 30cd63032660d95692a18c8a8b223940dce52372 Mon Sep 17 00:00:00 2001 From: Timofei Semenov Date: Wed, 27 Sep 2017 12:51:37 +0300 Subject: [PATCH 17/17] Add a few tests --- requirements.txt | 3 +- role2vec/__main__.py | 5 +- role2vec/tests/__init__.py | 5 + role2vec/tests/models.py | 5 + role2vec/tests/test_roles_base.py | 51 +++ role2vec/tests/test_stats.py | 23 ++ role2vec/tests/test_vocab.py | 23 ++ role2vec/tests/uast.asdf | Bin 0 -> 65976 bytes role2vec/tests/uast.txt | 1 + role2vec/tests/vocab.txt | 539 ++++++++++++++++++++++++++++++ role2vec/utils.py | 38 +-- setup.py | 2 +- 12 files changed, 657 insertions(+), 38 deletions(-) create mode 100644 role2vec/tests/__init__.py create mode 100644 role2vec/tests/models.py create mode 100644 role2vec/tests/test_roles_base.py create mode 100644 role2vec/tests/test_stats.py create mode 100644 role2vec/tests/test_vocab.py create mode 100644 role2vec/tests/uast.asdf create mode 100644 role2vec/tests/uast.txt create mode 100755 role2vec/tests/vocab.txt diff --git a/requirements.txt b/requirements.txt index c263a92..1d6a773 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ -ast2vec[tf]>=0.3.4-alpha \ No newline at end of file +ast2vec[tf]>=0.3.4-alpha +scikit-learn>=0.19.0 \ No newline at end of file diff --git a/role2vec/__main__.py b/role2vec/__main__.py index f1d1d94..fedcf49 100644 --- a/role2vec/__main__.py +++ b/role2vec/__main__.py @@ -8,7 +8,7 @@ from role2vec.node2vec import node2vec_entry from role2vec.stats import stats_entry from role2vec.vocab import vocab_entry -from role2vec.roles.base import roles_entry +from role2vec.roles.base import ROLES_MODELS, roles_entry def get_parser() -> argparse.ArgumentParser: @@ -65,7 +65,8 @@ def get_parser() -> argparse.ArgumentParser: formatter_class=ArgumentDefaultsHelpFormatterNoNone, parents=[process_arg]) roles_parser.set_defaults(handler=roles_entry) - roles_parser.add_argument("algorithm", help="Specify training algorithm.") + roles_parser.add_argument( + "algorithm", choices=ROLES_MODELS.keys(), help="Specify training algorithm.") roles_parser.add_argument("--train", help="Input file with UASTs for training.") roles_parser.add_argument("--test", help="Input file with UASTs for testing.") roles_parser.add_argument("--model", required=True, help="Path to store trained model.") diff --git a/role2vec/tests/__init__.py b/role2vec/tests/__init__.py new file mode 100644 index 0000000..9aeddfc --- /dev/null +++ b/role2vec/tests/__init__.py @@ -0,0 +1,5 @@ +from modelforge.logs import setup_logging + + +def setup(): + setup_logging("INFO") diff --git a/role2vec/tests/models.py b/role2vec/tests/models.py new file mode 100644 index 0000000..8a9feb9 --- /dev/null +++ b/role2vec/tests/models.py @@ -0,0 +1,5 @@ +import os + +UAST = os.path.join(os.path.dirname(__file__), "uast.asdf") +UAST_FILE = os.path.join(os.path.dirname(__file__), "uast.txt") +VOCAB = os.path.join(os.path.dirname(__file__), "vocab.txt") diff --git a/role2vec/tests/test_roles_base.py b/role2vec/tests/test_roles_base.py new file mode 100644 index 0000000..bee53d0 --- /dev/null +++ b/role2vec/tests/test_roles_base.py @@ -0,0 +1,51 @@ +import os +import tempfile +import unittest + +from sklearn.externals import joblib + +from role2vec.roles.base import RolesBase + + +class RolesBaseTests(unittest.TestCase): + def setUp(self): + self.model = 1334 + with tempfile.NamedTemporaryFile(delete=False) as model_path: + self.model_path = model_path.name + joblib.dump(self.model, self.model_path) + with tempfile.NamedTemporaryFile() as emb_path: + self.rb = RolesBase(log_level="INFO", num_processes=1, emb_path=emb_path.name) + + def tearDown(self): + os.remove(self.model_path) + + def test_save(self): + with self.assertRaises(ValueError): + self.rb.save("") + try: + self.rb.model = self.model + with tempfile.NamedTemporaryFile() as model_path: + self.assertIsNone(self.rb.save(model_path.name)) + finally: + self.rb.model = None + + def test_load(self): + with self.assertRaises(ValueError): + self.rb.load("") + try: + self.rb.load(self.model_path) + self.assertEqual(self.rb.model, self.model) + finally: + self.rb.model = None + + def test_train(self): + with self.assertRaises(NotImplementedError): + self.rb.train("") + + def test_test(self): + with self.assertRaises(NotImplementedError): + self.rb.test("") + + +if __name__ == "__main__": + unittest.main() diff --git a/role2vec/tests/test_stats.py b/role2vec/tests/test_stats.py new file mode 100644 index 0000000..aa03c2a --- /dev/null +++ b/role2vec/tests/test_stats.py @@ -0,0 +1,23 @@ +import json +import tempfile +import unittest + +from role2vec.stats import RolesStats +import role2vec.tests.models as paths + + +class RolesStatsTests(unittest.TestCase): + def setUp(self): + self.rs = RolesStats(log_level="INFO", num_processes=1) + + def test_calc(self): + with tempfile.NamedTemporaryFile() as stat, tempfile.NamedTemporaryFile() as susp: + self.rs.calc(paths.UAST_FILE, stat.name, susp.name) + role_stats = json.loads(stat.read().decode("utf8")) + self.assertEqual(role_stats, {"0": 1, "1": 498, "2": 830, "3": 1634, "4": 1407, + "5": 412, "6": 718, "7": 2, "8": 4, "10": 359, + "11": 411}) + + +if __name__ == "__main__": + unittest.main() diff --git a/role2vec/tests/test_vocab.py b/role2vec/tests/test_vocab.py new file mode 100644 index 0000000..2606d58 --- /dev/null +++ b/role2vec/tests/test_vocab.py @@ -0,0 +1,23 @@ +import unittest + +from role2vec.vocab import Vocab +import role2vec.tests.models as paths + + +class VocabTests(unittest.TestCase): + def setUp(self): + self.vocab = Vocab(log_level="INFO", num_processes=1) + self.words_true = {} + with open(paths.VOCAB) as fin: + for line in fin: + word, count = line.split() + self.words_true[word] = int(count) + + def test_create(self): + words = self.vocab.create([paths.UAST]) + self.assertEqual(len(words), 539) + self.assertEqual(words, self.words_true) + + +if __name__ == "__main__": + unittest.main() diff --git a/role2vec/tests/uast.asdf b/role2vec/tests/uast.asdf new file mode 100644 index 0000000000000000000000000000000000000000..b45d4b416b36e73209421301e7f02e79db81cabf GIT binary patch literal 65976 zcmb5Vby%EFlQufI2M7`z26uO7aM$1tgFAyKxP{>E?oJ5q?(Xg`Ap{7{$8XZj+C9ttsa2`Mr*W>#iaWQu?4M(P@3N)lpf68{!b>4+&Ryw}-~ zsWil-$$(@K6H9(qh^v`3GuYgX-^A72f(4n0iHQto=I8=u`KQhFFLVD?jqI#VT})iO z`2XGQ>SzJ+FmVC@ONGqG#0_HQ=)zB??qp&HCer}hfnCiUoxo)3X4YVbcZsZnE5sV& z1_3jYSvlH+olGpj{A4s%5Qq~$3yY;S#LCT-+043E6vb zb#!wvd$+~kqLyKjGdK@hl!PsiJgaxm7Sl1m7jx)nS+gklZ*RZ(H!gqb})Yrz4d!o$n^f< zuy-^E+r9f~;tF|hxw%=J^OJFKvA-8qP9`2UUQ;GcJ~KWh6JA~pCJr_*FRuyMlpV}v z`mX$65!Yj7B;#NtV>du{0XsRmT03+lh%zYz9`z+;|HZJ3B6mBgan5Xr(b6ASy(;k#BF zK;nP3!1Qkm|B2$B5{nVk7LFKM7`E*B&0nv_rD@}YDhL- zEy>3huP~c@pIC&AlSIUD9t%hLKH41Z|Lb9M+#KWrBNTe6z>n$qXY*-e1s1F>DuIoS zfh`FuhUp=xVUB|h3o9vMZUlZf%T#wux%6rKb#V#7%h^Kx>XM@FR-u&Dq2$Hkh?1m{ zG}0-yIo>HgDvp!164mYo-6PjRuC3v&RjM%Qj*gmy`qyA@f|ecyJEZ>1hvR#1Pd?Da zeUPh1bfsY>iXwZ;KVxToJV4UqA>tCQIy!})zm-ZlIw*jc;1Pbv`WQVQXx6TJabz)! zgVTf2kZ@8)!zU>(!ryLab2=C?9cx#^Q=rhP%sl1}nQ%C<&)pm$3>1$wVCdGZKn)q~ zr8YPN-)PD^Cb-;LmZ~wg)RSvWvZDUd(!oUhFzT+QsWH+*uX%0?4TF!E0v7@P9pF2X zl#rHlVm;-1HWN1L9C}0FrpPaDGPC|UUN6$pI@6-MaV?TTP~W;)W}O0Dr|h)O(Ea6L z7Go>tW1eb#3}r;e;-zPOO=&xu22LT{Ujxg(FSlus z#TV}+xP9ZI5wd%4iFS;dPspG}7aD1Y;%kI6sQ`oG^Gh9E0vWTMgS4Gkn{PcV-|L$% zu=JdatxHX1REw&34l7VpkV9?_-hU1ej@M!HpwH`{CVok{n8?zHb>*wSi``1iQ{ z?l=H0Rwu!|HAzr!nf~EK=UFDsTJ0cLUAOooIhV1YtB*;$Xxz%g%7}rz_mUu9&NS$g z?%Q6&Nf5h^M=DrIJ1o=H+pGP?7gyIf&55Q)+7_hI^=Zg810IbqGbT&TfQ?t+_EBct zaooY-CQ2t#2===iC=&N09@KB{uf<=#32A6*-dd$-#8{K-WYYjj#kV{~-=gdzAGKrt z0+l~}`jn)%Q_e$kU4}U^{ps(|rvbNe`cdSHKmwxkxg2NN+kr~o0xj}LuCdXGd{JK~I2cVM~yaz|w5=)vsBr4t+Z0dGD|zxL(r z`hbg4rdo@n`0(djRFxbL(RQ4SRi=5m&pL^n7Jj!zFoc0EZ_Iv9L6#5`b&@1;g7CiW zFaFZOY}NX(iBFsIq@fW^kGH$DuC|wndiTHpWu?763Ne2w=5KFsC^d(0*OseIRx1u~ zG=)aU<+u45sp3D)id;u)duo0muziNHs;9GkRtKbiY9>~a<|c4oZkpP*k#~QR2e_c< zN1rnmjlnT{khOI$Q2gTG@~CXH11*=NgdFgyU;hxtZ(ln{Gt|VHAwKKjaVVqvu=)XD zGnczzWrnD~BX{4uapnCr^hg>&fZhCqXa(8(0H)?I)997OEixJ%OF8RydXmE0?Z%${ zFsX4on(z1Jq2RqpNrsxUyK-Pl^q0j$?526v5}i%}vi{%aD6Z^_$?KQx>%~(oE>`{b zwNx|jXP{v;x<4zo<>$%q2rs{HGD8{zU-b*wZqd$)$Uw)DT9k(12*uxfZ~Vifu-j_U zc;AP!Kf4)cwYF_;4d!iNugT^@c&m0D-@QutQEFG^_N*F1qChFmwm$&W*o+M?M1O_O z;L~*)t>82&Ej+~J&}H?QZbkHAI{*PLZ`|bZMqs3!2Q}g9S$hKFe$y>>BB(sKXWlnj4y?L^DUw-`1k<{lT+q52pjb;zw>DZ;jCXvETJ2IdI#^ zsltsB`65~+clCd>!-i@selY=ZKecOZ#|uFIW|3UnEPf8->7-ms%d4Guy<4^rT1cg1 zL;@`7WdgaxFCRz%EB!*~Z5@|}l3=g}!yosW?{va5{k^(KM;1qLqhSaLxVUGXzUra# zf+e_-Y=8+An=SRsjTZ4H=4hym*9mQ7NfVYTWLySh1n~#Vn`R{O76JfuE&Te#<7;?d zhFgu`7pu~I*l{;vWb`h~yW2mD@F6FfS17n?UoG7YSP*2o`uueIy1+6_E|iN`eD5(e z-&7ay5p8;2SFjjLgonpzhKHQ(`Yc)(k#_N0(JJ2mB8+|w0*hqOwWKPZJBgH1yoyeX zK%8hU`|VdRy@$rhDO?-Dz(ZJH*eG)-0+>!6elBI;PW#kP<{J73 zk_Y2{sceHyf`k2N6AcNSjTpS zi~rJn`jS(t8X!XG-67dyT>1lX6-$|L_hv)OVD|dAt{MXH32{=3<->?u*a7=Au3-(8 zn}k>stUos#zRY@O+qtid1Mc5nteIng1uEkmz_jf4Gu93#iC#gn3&+UTuu3O-YAb7E z4q)0Kx4qAuXQP|6XBJ9Oi#^x~pwf+r+-{rUNrl#-9k=w0&AxQ1_8NIBa9Q5Wss_2G zRLvq&*8uRUQWkyLmvjZ&rX4IV37+@BoH;cj>0Opf%R$50MI069;`!4~;NG#xZ~$ak zLj-W-FVmZ*X!RLI(*F2b(}wN4K1Cu@U9mG1@G%t9rga)(&6*?v4(ldklRwY8VPHcb;ly=i$?Y1Tz0=5k`Lu74ZIu?Xn(J%*x zhIrJYyuDllm7qbGX;EOBRES+7*P}Y{*Oi24CD7+p5}<)hE;vdw*FH^@Jsn))(Eu&JVVnw-%0=<(6t_J)6IW08i|W|w9@?i%wq zDu7wZeK!Y&@dB@P1pt}PYa8I_*M)NhNLz;5F$28#%|%v=@@d>yQmw4{>mLqbk1yZv z#g|j7?8Da+0htO2x6f| z&Rb`3!p$)O6R`1Zk?;+&u%im+jBBEo z<@oILvx=me8^_mz3005TdslhfbfS|Qj~kOEj6 zs~DHs3Vh_N5Zo{?knWvf2OTlvD2@er`p0Ah2gZoFX1(8(X*{jqgGHDY1@ zUuytl6Uo!u_ZYRo?(b1W`ij2P^YIZ0u<+5lIAES(^`rO4>fZb>1b`^2{k`N^GNcZ! zpGb)oo)-W>%Q}Isi?g8Zan@D_7o;)(l29A^&yHQbI2~+vVxd7#1GD|fVz6tC+bBQW z8JR5jBpXdMWgo48&z~j-_yx&-i~_ab(@P^-fmWuzTiR))HQt5GI;B=TzJxCmyFX_- zaw(nUt1<1ejtW(}^eQFPf+)Sox%Beq?L~SO(#H~J>FyjozIOddrmG`Ucu+}rMmxv( zE0=fKlNaJgYh z`qTy74hWE?pg{EQD*kp6%A>n#W;Vf3-pC1pr2dEt=ip}e@)Zyu*yiw1$E&7Ca(WyW zW?DP0Jy^}k;+aMI`f%&Fl$yx-DH34T_*Kzid==`?FFrs#_nQR_F6-MN0AuXAKDU|1 z^z{XP4}Q+;ga7^E*2}c=G$}Q7Zn|Hvv}%==p-h8rh0T^N(Prf9=L4{&oR{X?(D+E!3@Aov*wn8=^Mi`ikP#i)Q9K47G%uhpn#gin8jW)5m| zBL>^gR~oh9B@1nZ14^w)H@-hVkbzmJTK!XY;H_8x{FL5#|7f z7FXOW+S0HTKzCdB%#1k4I5=a+UMArY*Pr`K8K%p9Ib-AM7yFLtH4xRx3TA3%-nG8v zTH{sf*o93As(b*?TGeRiK`HeoP&6?5~?RZVn-@C!)^=60*6A(P7OW2Oiy$ z+XDhnSEh~6`H}rpAS1#r`dH?ha}tjp=Yzh0)3f3EEAUsCmg-sHae#l5SQj6ve=K5f zrhKsSu4l;kak2h~DErZ?Aci9(z_lCu4vFK%nz)z_Wo3GLQQ}7+vf?(n`6!@w4F8Y@ zsyU*r8kFQ##HQ&03=+R3>a_}L=7SUc+5CXU9Ms%G+DWJNElF!wl8RjOm24e4jsxC* zvK#@^nSne^nK2^MpghdViwHeVXO!d7Yu&gm0BTO-5nX@P5h|8xT|mgRqXKyBrdaew z`3nOOHE_1n?`@zM$Gfia$kF)~*N&*9^J85>GMNK^5IthOnZ0A~uMv zju5;TeV7xAFRw^f%$$4C1(H=4e9`0vphZF1Ple@ZPb;dH5qjGO_fRwk}n{ zjdg>C-fHTJ}3`9i6m3dNeav!O+uQKLe-X3y|+ zj}zc2(6wPiucPza{Nahj387@N%&a(aVTPt10IyU;;!|SECh?Xq7(000nAHI^Cd`1i z=)9C|a#|%8m|Xr)#3Cou-(8W!;uiEK!NpP*H&8j)-PI(pWN6WO&J)@K+j!~lqO*KU zQvh5i)Ct&J_-p9s-sT=iHh`?t>=;`RzH!QI+GK%E!V^0x;;^WgNc>NRteS2 zNc)Q?C3KtM;iG>rvAFGyy%VZEK$JHWvE}O!2AcdTv3N;Fokaj&HdJ>%8|1@f!0t6I zM9mF#W(+erufPctwK)M$IKcL^APvI8ALR}-u&wrHOr(_QFMjxni~1G8=>}^0#0c1Qh=7L~dn5uiO&0O#4K*b|0v!-r zu)mnQB$1%!isQCr$rq51T%A~u>9Jc``QCv*HbvGXE2jR|& zT-`1JXtNW}%$|e=)7etdF9P4p*Z@o+Z+p}NBq-#|EO{uY8i@XY7?Ph~SOH z->~;VG^Hnfy<#*;?_wlL!3yj4myxdAUlWQjK&kYPgC5?=S98YEA-y_xelc%CfYwRy zcuwWsg4!5r_YFemH5|aZc&l6x-VDLPKLNlQZTmSpbcj_Op7^pvRiD&f{^Mq9dF%pZH;m1m2H?i} z5rJ8u;kOmTqPjvYbgok3swXnD*((MBtz}ia4Vtedym13P2jy|r3ZO9-8jEs7`i6q+ z9iTMlzc+2o(IyrO@`#3Z!d$YTo_`Se6qt6I5{hCUy8Y!gzkGu6R{kwyb=(D@RyI)y z2(IPTgqr4iz!*RBN8n830_fxBK?Kk1svxMl?RiEDMFrq*`qa`6)`h7kC7X^fvm$Ya zlTg#LR-YhzII@uxdh%~kXp!%q0y$dJ zcAv^=y3LHwIsyKt71V+eg?U~;Sy#jUKfX#0Mjp1dCw@*qdl)jFBv4-Ksp%I)A-KR7 zSbwX$T4d(FFYk9@o9p&Xn=ARvV9QkRTp+%xASpuFG@G7iXMz8Vu*oO zKNfOZRu>1BhxEJ25EZh2woK%ZxvHYKFy_p9z|U~w4erq}BzPx3<~|tV)i8h+R!^%@ z$2j*H$>b+JS%t-ov`o23^^`g-phgv)*O1qJZueK0Sl*VQKc3Z)t{~Njp+6)bKQhDW zb%2HMbkeb*nD!*e)T#anl+B)NFs}e9v9#FB6y?6LMey80#8HD^8)HhgdGQkXz$su{BF=Ni1dZ>d1uO~F z(pUf!Y5MU9FC%Ih;a}j|SCWJpG4h~84n47uC%##WrVA{tpPd@9tv9V6-PRg0C^*NA zra3g7EN|oJ3uUvc$)F3koc5-J!c*vlGx)(Afy3y=9Np4NK*(oB%rN4~KV0rOxU)Qn z2H28BdcVCn?)sI2pOKvbmTI9spEk$gIwMtRLU`ZFoUgtCckh85CivL?7Se`{LZEs< zWZ-2te-5TkvgKM9THjX~3u#K)Aa>lR#0hU6sJvoiqrZVfEWvnssX{zRzYP1cec*wk z=*@(3d%Co_J}FUNG1~&jL-#MgWPzf6j!jQgO%tUtTN!OsiHFWo z>S=t%O9A^@4jVwpKIAyo-hemgWEWh=z;^rv+}sQlb|PNGhMf^)KK;+|mP4WwrXkCT zZi6ZCRrOP+G`j4ijg8E0LmWj_oPoIOe-OEf;QrbR)ga*U5`m1l6Dw?ghg(6CWp%*k zp0x7>SDwiYNmA?w;SDd`AUSpqQo;pz!E7`8BGur-Jb8c()pX`4rx0UbC3m+}vE*$~ z5Szl}*l_ikTNuNEiwQMbM`L;BHvy_YQVD$+_P>HdM{#Ov$zBFI?R&gdaLx-F_rmo@ zgl!N&T03=wo%%ICfG5hmpCOZQ%vw_Nch?^Gqd2hxv0~D0Uyc?(gd`Q+9`PcC>Ery` z#)vu6J0Ao;AQzGlUpo*Lm=>djMkkk5xlKfs*VJkm`&3FFt94d~P}O19xLf??p`fIX z>rPiZBf(-IJwWYoG5khsKic}t4C2Ggxe+$Jt$QK`&Iii-DL1z>0t8N0IC8eLG|$sm zFxBiaqq78wPIK%)Tz)NY=~LAAI!|tVAJclJod<%_*^%=a!b%M=F>{( zJq2B3_qc}i;kIy_nwT>e)q&{AK~DPejV$@rzsPv8f+w%b&Te6QLnL>0dQ5ql&(%|O z;J9J~8d0${f;O8a2o!S~VDq^7H~us&^ChVIG|o{{rQo zCHOuPb7djyW5)zqI$g|iQXibc!6~_8Pply$pY}GA9-d-or}IE&$5;H@S(hif&lO=I zn@JD3CpU$9`bq-QUMb_j+U#)b9RSKqWZkB0u9PnqnH6Pv96m_#cDOxlr`7Oq096Jl z$=VP0sb z5l4%yy{m=Y3fma7*`;OBGR1t!(5kFiS%otByhn+)7Dgy)l5TKAu8-+5Q)~7^okRoYYlpj^8YZ-I5#LUH!*1tBV_gu2 zRQbJ!IOXy=``-6lVx6~5&@uf@dOoyi>XdEqcz4sJG-@uRQ+QixVwY2vNSI%PxpGIi zN=9JiB=L`++`YEt!gwyu*?4?jvfqF7Q&EQ&ed>L`gib4teq+|bVb7GNM&SrjuZ=y~ zRi?JU{q|LPZtvWuRba8RglPYKRUGnS|NE^QcU5+8Il{O@@Vg4nOL7TpX4l^)1i_6S zT+!oE2_7NI%0JSWdQg2&u%He%PrtghZzj6BnPP;_Dgk`>LLfzK$yhU735r*9@w{Q`y_NdG&pD1PLQ1=)P#|IKPvv&r1U0>36B05B z%Jh=)dP9s;hKRUgi5HXrKEYA+BK&()kcTpd^?3DYZjrMT-JCVGzj1Y*hBTE#Ga3g%)Vqnpm^pA&6BvbtTh#$^>VVn{;BgbB#qQ+R#kVDJ6@_@7? z5c3UERzK|9k9S}2&N1b%v~8tX-DTfLj1U)oD|As5lOZ=6r%+O-v*~4gP0PgU8~-W& zCRtrTILvoW=E=)fn10vTd;L6CLJ=>8x>1kFb+^UtmHs`!|jHfc14hLIaX7=DySXc+ueOM>So zA_H_|qp!#!hj@4`=oo7ng4scyn9cjZV5@@BxO61^&ULYLi7cH6m04ilK{z})F)2AE zD6}0&!Y)WcBbewPQ3pc^pt)oYu71)&B=C(efXi+p$UDy6`Q%9)7mE`-XlcA)F+IR0o&B~ z9kF4*&%sHGsy^=uQ^e<9F}QUWe{Gb;YI!qr8JXs)(;VfTt$Ic*UBT{z7IbUo_R1>^ z-F`9?4G{aLpM>)g;bC%1DjKdm1M4yWdkfcNl!H$bTguNu`h*Vj%m#9^c0ZyM&`_lQ zBdgvLP+~EZLa0~Q!d?3T!=@sIun{J{@e78x(mlRAAk-HIG%??YMK1?)?6(rkqbsV7 zm$x`te!#NFtjEj0gHHW})MFw3d@H2J|MLWJb^Ni(Zfn4B35{wnLx(u!x}(ybGpaZ4 z@RagniKjsnSxwbs)vd!M>77SnF$Ns5MCk|Q;tV@um$qLEB?6_jR>u(&Ui3A8LH= zlAXw{y`MR%xJQ??1T_!N9vBBywqkL@bT)}>Mf_eGcbM(_?v*X?!9r_a$~dEbKICAu zYOnNl@z$qD2uM+v8qf0b9`Vzv`*sus;<-g*Bw63^5da-v5{ej-> zLXQ7~aav$Dj(f_F_~8w{)0gA;GvY>IcxBeTr3PQ($#hoXiG-mM#XLwWdH`CAnZ2{8 zNo;Q|LZw>9hw{2Xj}oNgWyilk;IX3Hj!b_=9sFa&06t=WNC6l|6hlsN5=woe;wb(q zYRLAg2H@oOILVtr;+}Xl@G9d>vZkoC{6ztA4miv1wVOwYv@T zeB+KpV7o43>Vzka z%S>&=m0hj!uz)(3H433Slcz!kM7+@N~NDg);gOzP*@VKcZ6E4|mqkhs5JK)*zv zqpC>5a1;9F6-Q1GxM~WitE@2I;R!y@VYYHp+2N{+4Gx-HErOv$Z!M9-J*#Cb{rr;&E@E#lrmM0%;*xNJ*IHq&YuE8(a8X(KFT=Va(pY zPQ;(3mdVK78_ec>V{39b2z~`_*YtowP1Nl`kyhEOGMR%SY*xq-uo`)?>p) z20mi|^8<_fw4aJzMT>GqWex&jIF-}Uh@=bo+7QOT2jsDnmhyE|ym@h7@7y_0bRlj% z_t%sKmThvb-ylY!;R%pgaF7V@A?=UscaxtbJ@nj;>rS|?QM*9RQMu`D#)mx<~^4KU|%jv6jo3=gvm68&SC zHlh;GzL~+>gr>Mm=gEFJ_n4SKSSk^T(zH&yygc!t)OdrhbVisn{^Cal!Rf^@5ncW8 zs!Zag9c(ynO-a$~1PP(_3snpk0{6~q@V0&^jmXZ~Mi{4H1+Fi_*yTI*@*1wx&gFE> zrZz+|%zy;`cortga=sXhl5{p_?Jc24I##6}*0g}Q6fyBwdLj5c+{kpBih1+nHMU}E zo*O9BiExDB6V+gsS!tz&Ix(5R>_8Q1GnaJ7GMcwYS>>j6G!X^&6xfK9`L(q%J#nqk^qH*Z(&twDY4rCvG)RCB&Qjl+^f+rtU((`3q z2{Shy5p5ibJSa&4{WG^)$GU;81Z6Wc(6{++hBuM{|Kl?rA#}|KeXuT#V&E?ziFq6% zmoh0*^$>hdO$$<&crX>$ccbQa0lMqsxnC}MDtbM-YrB~uoa0zuuPWS8? z=O6hHf8ObNoVe8{p;KXJbz7lRXj zpuCnFgL8jGs&a5Vu6Uz-Mjcw*eb>3huQa>YGd82@+52wYGy+5{*El9pJ93t>Uq;g7c zwqogp8GyThkOsDkt2i~QFlF|cNz53KfX7-vmiGaWRc!m`MUM*7(w^59{RvZk_7LXo zEiB2EhGj!SNiCf*>l}tnUX8oy5U2%+xb9A#-_whaSfpB%8HE-o$Ar!vNu!#WALx#~R*?iZf1J{Qf>ZyH36&pxIvoK}&ke$KG*!Q$4^)sgiBL@CVt_|xm1xIase^Wj zkZ8_$$!}|I60nfTf!gcIc^uRaW1$E+b2=2&KWgCq&5ax>Bgt3Ybnb{v+f>1KsN-^L zDU;(m5tIAH*+izgo^zPl7YZ8Ak3gDo_09M#_j-Y85BDZoq`gz`{1h}G4qi)8EKQh{p}5|A>KKI&FE@1xYG({TZ9(~dYKme)Z&0+!T?Iui$y|Q z*b$x{zjOL!OV+5$9nN&(5K$HfLv!7P))eLhS#v^IoZyO;iV?}Wp`S}|Z({iSGr=na zc7)&pWbbhhprj0hFGE2&aS-0<(CrG^vQLJMplnz_sB%qMzMcpj->)0^FxRI!O3y?c zqGD)`d3L5{Oge5gNvaR^r9yF+t>+7mF#_CqRbvsnDjF)(7{bU zDXKUrf_|O7#Yh~3J(#+w#SxF-5Hd=by4GDb_ZPVskC|W-&%d5-h?NKsNsOKz9lJ5; z%Y+gvD}r)t>0a5>6_z^PJ&C*U;pI34y^Y#i=lNY?vq~!k zA$F3Dt2T?;`i*+-0-8;Vg)j{dyIb-xT7FhRAS>!^8cl1Veab<&<`^P>9OB7<-sm9wD2;`22>6-0>l;CD%j(z!_Y`MqzYJdVq^ zqzQmKCr!_fCGl*$*J(^JV&m-eHQM55^x471x&2BlnBu<5djP8IH~Pk;bGn( z+%qV-&u-c&6)m$5Rnx`i;zogZG|F7wF&HDXaTLn1T;Aiy0+IBu>4^MmmzF||l4p7E zeR*={Qk&Y1yzE5Q;DoN>lH`L;mNN;b%4+FwgfnnH#ZFn&vrlJO=klD{@SDqQi3ZR=Ta!N{AbBdX z-Un(klbcUa=!+UP{JcoG?+1z0;|tR6{^RgcCzBmn$I}8k@*g?;XN( z8OxopzNO_aaf6!rDG`?}A#Yae6B`L162uCNJ{ZGN$%$#ru%j}Y!M@}Q3pu=vTlQLyPpULrN$hskwx>(nP;Rp#9Xi|*HS_6RxgWH1Jwk=vI7NLm!~f-Y>>kc9ipx64ujHsyMcG`2LVhf2nfQ*&#%vAqt9 z8yP+D4|kiWpINP=sb=;XgJ0$`OE?d!zDc)p6dLsVc$i>fcKoRQUhI)lnqWO~5#z>g zyMb4Z7fxq|I`y{@-VgmTyjX(TaOakdqmZ~?#yxyARV4nAv+u{FMs=yOY}61sCy5AN zmX*7aY=$H$YBVBb902UcXU4(gghrHYBRyAUrDXgedp(^ zzfGPGJfR)eEz+hgp}b}57@0CCd7&PV4F4%6?b!V}Wu1VeEZ>p24RS?4XX(yi=I6sK z;02%V_mdF(5z1{x=2MN?ht0qNj^0hjZ-^6W%W9nWNb=+|VBun}Zj&H?xP+qVy0k|1 z)C~u|gdZ3s9{iLMMumQPz~NojsIq;IR#Q}!vF=+JcxV>!RIW;*@Y!W*24$+OsR z0=0okP1F1J$3R>{T8zl8vqy28YZz3|x^95ejxjK$QIs3CMYGt5XsUr-1jmhog>q?h zD#G@_(xvK|Xh-)UB>9O&+nP*gX_Tdq^kr8?S?J0_1aP@IHrSTuf{@?=qcPHVe}b;_h~ zyv39H+3ZS;Aj{f?z@CiUsT)xLqAZN~R?Sh3e;w`#SdZ(cOG`dLfgm63xvcD^@w9-& zbWHpaovC9|fL@_U{42N+`=mC9_x6y5>SZTVYNU6?f|Y&|0EL^AJaayP``Xg?duOh7l+RK)^yGa)*=plSolt0np5u+UG4)<@D3v}(>| zL^JosA7*;_DZjA73xhJG^(|zE5RcdnmitcsV3GuNSlrL?y-<2b=3fqpQn3Gi7l~J1 zl{A3zhvw(5tSUXprK9KyXu|Q7@i6v~$Op0b!8dW94sk8Gi8D5Cal|w}PoBv7=0mc9 z#L89Q&zq+;hd#G$`eruoJ1>`2o?a}@f}I%23#XlzN1{Y7)Jr=_OHQL4P#({mJ6^pr znpUSGuDV-wsvrBRJFjLeg1(d5i$j0N) zG*phGkubI{C0>>h5D2nlYNctUg;cxe()_tmZoT3QsmLs(6G&4!nih8Xa4{zkX46Mq z1v-GaE4$1iR(zcTZ)U2kip5>NKNs9q4q6Fim=@fkI`?sKnf=TL#`z7d zTM3Cvo#8Xd3u1xaCEOlTGQ}}Aw4GkrhnMYQ%V&a^zaM^5hh-Hwn}}rZ3w}GY%AYJ+ zOz>o8|5A3Ba`+p8A9g?d!0Q>Kw9y9Ri03QnL)6Jg=CdBD)2>1t>7)bmcaK^=;q}x_ zYu#-Nn5+X^tUu;Ks7)17V_9eI)d(G#iYz>gPW25;f-_3D=2m9AybPLMD>Ku$c-=2b z8tf}EB9bK9@&bJK+VvJxHXV6+2zR~&T6c5$I_dPYhZ&ZdGUGaVjo3Xo%H)S9h>(Rplh z`5NZvLoaQ`gp~Wk{bCOv#V+(Mag#@r2HI_-PM9rkI>VSg7v8J8;PTnlP;gafri|_m z!1VjZBEa<^rA|bL){DsW_xsM4nB`CtKm9Zux=V(o*T8xDS#B!wdgMuempQ(b7V#3R zUb+l8_>NK@@q$?QAZ-jv(5%Bx4h>cqNo?{1Rp$!ydSLF@CP^H1f0i9y#rzQsy<*=e zkfK9Sj;GfDl&pMQjeXqdL5bneso&+Is6oG|U~6+>s!_)1Q@l)=bv(d)oc80-?!wJ3 z*Z%CRbl29UvkuiP*jQUm72+(&uL;v6oL5CkrA3 z*Lni0gwqF!eB-02(n^)cU-EN$#>RSU~wBGeYnr6wrY(0O!*24!$>k z5F1!2j!4J=r@5W(8%#nKa;<8TWk=;WXg4NpLgGr<7D-wFeop-nX%?%E+_WJqb#}tW zS-|$9Y1E79Xdt1iuMlRSX2dmS7eSG|Vz3luGyBImeHZypHm%hnn z>OFPWf0A|D#NW~7brr(HQQ65#-LgWzNIchDm?pSDk?Y3gTw2qjRYe}&z)Kr^&u#g}kRWWqi zrPnp-B9K;@%yz^*qgjqIb!yRD!M2Y}1K#k4x2|C*E8U*G!Qpc2O(`z8`;xN&!Sk@5 z#cIH}le*Y-jH9-+e`*5pZbis?*g@51myMyBmIxVH9-m(pSQ?3(%hNjV-@4G;y%N*b zXwkC!AmrynxqJU*!~~*CR_|__vfU3;tZ)(FObIs_fGsNCtDt5^t;)pI@WW$ux*}`l zZM2T9nG(u~F(=HqKYS31LhdS-eX#iv{s9OU+x|R3fRe*#y!2o`6i16UG|=b>)qidPl3V(*a<(m=YX8!%E``HiVqj ziK%n@WRCzrPE(eO-H1#Xp;W}+Vy0^gDlQJ&a6$82{hk?4YR-w~7PxRP-NW$twlVxW z`Ld4oeKpCC&MEbpda8RZ@R!%$Bp~M{Fe!I$KglmH_A9N{wEl-fu$~vDvpUohu5>TQp z6VWLVQ2K7W^=-j|TJ`3uX3IR~4`CWUx~L^`+icizO~)NYa(5nCR{aSi_`#w6jMY7S z?ceEnU*c7SPp}d2?PZ19eBzT4o~&!kJQkH$b1_0kD|09h9|}40OGR(_D|r8WiKm2n z8d1&q>~t!VXCeCaMk0fgRb^4Z!=|b`GC=r5BbqAzWbvMJk+c5tW~1V=^PX1q#6gm| z;>-eh<`5gW;j`LJu0~oi9jxfAh7S{+^!;w8Y|EJ~gdFDlX!hp&9UXRP4m3H3?oZ{s z@Fi?}31j3(4L)ZD)xt9!mn%PGomM6Gq-ueL}IT|z~xUB##L z8J;Z(_DsVk(9Pzjah6=BML9<0cLK5(5!A0B)|kSk zmB!279)4yRHAG6PpAJ6*5H(Il&D*DgMfk%4BWiqlR^|ev@OvpzOy0ZjE5UIP*Ibfr zTW_rJmp*kv6>ZsYlc-Hm`z_klLmoKm$?5n*R3>Y~Uu&5-Lq)usf%3nnzBOaJs$L#? z_s?2<^?CnGYesF(A`&&KE6wfPzU~*@y+S3i-mAIZdQY{gSz~TWmc^Sz-o+JWPS}U^ zD(7!eU;)rPTL|GEfFb$-W|9|7R2_BTKQWGyh>re&GMGn=$FTy6h0*qo-sS~8)KJ^K z0Ab@-m6g<0^nscPuVHj-h`p=w*+z|W!tC`==3hHb7MY(#@}!`V%w&6rY&l|CldlN| z`5$z4=%Qtz9yLA_nKbjOYqj;~&uScrR=j^u(w*WeoUPkmBu23)%T@CXE1gGHe5S%o zw-S#R{nSPzj}rQ_o|s9CC!cp$hi9XlMhmFKZL0hDKzruQcwiKX^YAGsaotZqHa?>I zFQaU$C6KeMM*NkL>&A{BtB=&()bzej$-uPP7UNO~tR*RRkHA6nTdmjfZ8O4ULgT%n zCU-v`o_KXcH*n#lGVR&;tEv&J2&dOGyK?g=tBm&dtsws0c?r0mWKvw6_zgLO*D?V+ z(>@;02G|-Ks+d{-{@9cSF+H|Re_FA52SwB4b8MQ>NpinkM$7a6L(?|^S<(!HR4lE`7p=y4z{WYC&m8liu5BWPG+mh4tRN82qpc!H; zE0{cFX#W-lS-d3VCP`3@XhMg>FP$L_a@l9$(h^Yr>BtvCbAMU`vD6uiF4SFNulSUQ z$UYi0+Py^k)}|C-LB10MvNv?4w|@XTZQn1sg(XVE@@fHgcmJ?wut^JD#zimPh+VqZtMJAt)+c|jfbB`Zy(2t(aD@1v*w|H? zpiVx!9z{aLPu1Ts$>C(ylNumd%ybPJ&wf~zMcMQY$m(`+le3xXj$!jtNLg2k;~Kt; z4pm1#T^l%#C7bMY6`ZDzwF!~U_d;V`iA^!0czcA_HYM|B!d~`?OXbh%#X;xL-vim) z1v<%f2u%kicdY2Pq;HYiaddu}LV_Mu;cbZed!h;RaR*M=8cr0gRbLU5b#*XB5`nbk? zo86eMxwt9QMXQW*8ckyN(`7f)uBE=%a$b{kc(%)NM?uD{(J`Cl-Y2VDT-lpdK}K&U zNog3j&3$lZyvE~Ac+c#)ZZeRW26y|jQWcBneQneYeMXcFzrH{GuMA7osp5t{>=K6f z$dZOVM>%rIzGAa>zGP}F8@gHm4CQf&Co?|y_jQ!!AkLRaiqGUgV_)Gg>PLoGo_4BO zjc(%4vxkWynO>_3nO-T_TjOeuPlqOsPX_VBROhpY6}K~|$5dM_KAILV#CaLv*AcLx z6AV4+u%y>+jD;}xDUI%^o?_9d^S>};uD1Rw%E@i;Q*sB$C6a77A(Z4x`AEgWcpm;b zH`Kqw8~Gr6R-G;Mt`{wvbaWdP5zd( zVIxGEhz(N*{~WO1FcDXzj=N^le%b=NTfzdZRiwEchG9CajS^E?8MJ_6@NU8T z!9agfdRrQ}Cq3Oy2~s=@lEV3z?uwiD;e&ZKSa34glOoR~2@T_7eNOO{@0VrLotb?LtBrn{rOI$qDc0gwNASs0PPw_3VjDVPFCt~~tbF171 zYm318V)y|Rf&y#YHlg!Wy7VZoQlr>v07mlH@Q1(310QFwZujXzR^5Avn8wqkt3%48 zG%jm$^S+WC17bQAQG4AamV7$zPo%~ua9=Dd8L@Zq`mg(~SR0X}X81?4S9 zG8{EF{?q<6H!li-PrcuOd){<+ahaoSJjXM-RrbPnDlz?rWod(7yS-&iEDhg`AE9ZV zqY*T`HdtQPIj^)v0TNnnojm=Hr2%B~slPQ`zAG9h+0(9d4uO5CHH3Paf6I+XSugte;@$l8aNefJP& zekis8X=tk<&qNf`g2DVC+O7Q_q%GY)y+wcNsv|x1ET={#yJWIlQ>e*GX88l3wv74> z(o#QAyAv^Fih)tXCKC%Tawf_q+(e`mEX^4S_n_4L76C3P`4wW^hl99oh_)^X?im61 ziu4aWmg!ic3I&%r$!$?GT`G3Y15NdtYx9}Es<418Q$DGNd8mdw+!o2ApDEEo{WL8P z!1q&XqJCUZAoYdjk$&e}7=7)XjW+NyOiPFHC~IE|*6mmPUqlWFe1;_Ok;ajFhbt9jM;!Es^nJy5p~Bu{*Xm)_z$JLpba`{POKaJMxp zy~TLtmY7;0{UP~>P7V5~jl5P-L!*0R%Zlg z9|f0I@IW&IX^rwwkxF@JEVt3=O+9AK6c-%IHR4{2E zm_EymZRRQgkjaxcobX@j{z%=7cKBRElnxsVEuO2Ms5t&wDd`}j+MVgUs)0vb-6uhx8Bu{pGa*^E0L{p@2w86KQS+D zWBg)eJ?q;Slde10n6_P+)hR2RZ4wt}UwbOe`D@Sz4n!K=!;p#Eo5MUI1Q6-olR$?` zyM&6au?3H|=GKW7pc$c!9)?tN1;O=qe|u3w>cS)uT+AVli|Fw-ryp3KcnWVmebvjU zPa15##kqZtz3Y-`Okfl>d-XS`Sh0BeBUh2ju?fM4MuPXB_=@K~kkqk6b#Ld>>Y_gP z4TV*R=qfNQrPlTS_5Pmk4eeD*xcALJ_&8?#mUHSL!Pl1VvvncgDtHT%%=JW6ZF5uN z=~UE)UGxr6gm}F&9V!xwrp~`d6wRwjv@^vB(EY1ryZ6=Ie4pD!7tZ)Qs&RItAyotN zwt{I&SQA;iYjld}N;rfh{0cEtm21pW8t{eLLkVU5Q&7|FsIE0HhWMaPSF$|M5+Di~ z(C9b3A8F#K3lQ?m>X`9i2IYjNNqM>yv;dE>DMcMjkk zxk%GLX&#_lq9lAY<@mg8I`m3fte9(1Y#pMupb-xVREDaHM;(jiSe6x_^d^ETk1i9~ zlh_s;nn3*e11?~Q)D}O-&ANBGu8#cxUCClK7-yQvRPV302reb=T80IMV@7U+2zv+; zkMLoE%RBfIbnkDuaM&WQFOwO@ZXGcO@s$C7U}?=`$8rP`I*{BEv{OqCc(ELQt4l?+ znsEJKc2`=J)hrC!(O*nPoW$-5SnJj(x4?nMuy73MT{-~urFICwlMwG@r3quYnHq1h zEivE5B~NULvu`Eoh}c3e_>L8*DtN|Kp7oT`6`p}R5Oo#C>Mp(CQ=VNvkkUuv-d~O$ z|D(LE9pB-i}w$0e9K!j;k;8RD^$zc_`0W1 z$nQFV#`77E{j^tcV#~Xf-1AvGVq(jh$n>>^&G@>aeCLJ*`E-5qe-yJ*X3NE9OId^K zdRVV@fR^*9H5ifp=4!~FY zcDMj=q69AGn2JR;9Pw_y&SK9meivIqkXa8bO_bqkAz@QEwcugyA0+k)rhhOu-S4nZ z9COsV+F9;ui(X*0;L+4MLI+B?ZztBhrK(Cy?vz@P}NMIwA`ChQ906>6zV*?yn48SagMhwQSDkr8fsmLRR&y-|C8^<=ZDwW zIKiK=?!To)ruqN)TAmT|sK`1HuJ-)~%?BQY3_S#_mOphVqn2;V_$@OZeDkP5c>OR$ zvjzsDGQZU>40Hc%ia%FqtQI)bL6#Lbmnlc{mJKMW=s=6WnwysrlEUjh8G~ODc@=Pv zL{|hx+(+XI^v%dHLO}kJ%oA;%#N*d->_l1&GNL)OMVz_{SoK#?8k|E2TTx4VbP+?| zf;3+MIJOJjik64v)m?_RKJ-oh5z;hqqXJez4lH_RrUyh0giOp3+*67E>+qew_O52-;;01s-5 zN3TCy+$uzz(hn725+KxsH8e>M8J9Q-o6QZg^7&;iB3i$yBkxkBMVOpBR{5Do5y?Gd zVzgBZbZQm$pYSL6OuU<=4byuLLIv*}#$J;o?!s`|ENbB*1tLGqX z>Qp@Y|C^|VLYx#Qn#jzu0`a$u7Ke8E2~x64zQo38Z0qF214ef0MIP5AZ5mpxp8 zr3lX-@Ovrl4m|`5oZAFU%6Zfr+X!-hKlqgR>DEbjVUzJ-wmto<-at5rl#cy)`6Ms> z41(@jAK+g=h9nRm3%SuiEoXns)pQIB)rCq!J`IGS;sFD=v)KC2df7tbpaQt704sy! zz!kyo@;$_=XbLHp-e*Ll3xHz)w-BhOm}FgO0p!#F2>!a*3h)>7PNc$ht04EY!M*wO zCeboVQMW!?eJ8J=7`detB)Rx-!kDu7!f^YjlJzPaX$=;KplNolUl4&M+#&WsP47fI zS@hxfc!1+cB(@c$ze7k|PMJ81qNwk8{K<$m&7)ymXIgYoqUI{G6pw2;;rlVCw%#Xj zVJBqnS25q3ou-*Sd4`o(i&iaBq4=1D0B6(&-#K()4hfr)!nW;Ux(FvF&D_f}0Z{XO z@V?at6!)l;BQ=2yzQ7p8FZw<3Q6(F@Ngmd^onH%gCx%0CF^iC_Y$6*wYOpqTQEmP7 z)U0eJhv1_M{csEIdYG~ERrs~&Hi*(fqv1HXwMaI0S#ABdm>WB6_JRB6-}Sy()?@I| z=8c^;0nq18ID6dfCDLg-e#SZ#4!v_$hz=x{O*W z_F*VH@0Ar(C6KlwaNOvX;T2g5h)Xe>B>AJxBmFaS-YqUw?Fx;-j&Q1#wN!K=oTs{+ z&?zG|x(Qk0Uv$PhoTOZs+*?6g{(~)-rc6*PIO|2whB* zCDb7*`-`mqufD+xm)MpwoE~naKd~*)g7mz(3V=@~qxv94BZl0)VPvn~cKh9|MlY@r z?9E$DLWJ=lYMfmwh8;~)bDx<#|4Ve)aW+YD64Usei<~6;l*!x1wh1Y2a>GnUo$#sn zX*UvTQKGK zf;?+Xh%=_UI1YQ8PM6GUzat0ea;wSWYnrAC^ z7ez-k&~UCgY+3M^oXd3`(zxV{c4f#UaiNXHCl}+n4%0XPp5-Mw<+qz+yB_;x5$`$_ zrzI4YX^Es;gg6}ty^DE(r%sG3?ez~pi1Rd0x7tNSDG(|a+`N>8Jw-ppX^`se)Y?48 z=^^%dg3v&gYhqAx=#{bA1)E}PjZ3c=dN3>Ewh&AY|1>WodS^RY?&J~boH)cM!7D*L z;IkHffOAhwRe_N-mZ~jkt(E&FNUBzus<7{#A&3|>D!{h9a#{1BgdmLN&r076sgk>R zfsZMx?IJs%(wU5wrwV~@pIx+;%5kudMU$Y z)hjc;$3OO|Ikv=~z}j|4biOZE#%M>>68uY8(Kr&*&OzpYp620ijJ@!MwB)oodkE%t z*osQ%<5^mBcjs&->)%Y)6U&oT9`imKC^+I3dZwwHLb}N+==QEE$j1iyp|KTNm|*p} zMwZ}#6R|TH$w?Dpd%351G6~$9EFqU#DDFMg9=jqD+^B`Hz##=5Q8^eQtt6#2%|*j% zD3GjDqz96l!c;IKG4v=oBY!m8d&7r0Lz>HxYSn#@%n6iR7G3=$E{i@p8HJStVqVnf ztf@axv585_j>ujw{N)&B>!}ua58_b#KguHK_ETou z)1nj5GadmMo&uwd7Hk46DOTb2dB>!907JDi#DWjxK{i6pCv$%_`KYVN4|oXCZ;I)s8Rs_ z^&IX$sOyz*`6!6B`BAbFl*i~=th2}z?!>S+;A(!4I1^JMazT%{kawa*&>Il>QCf=e zQ5fM}sGuj@WPvq4#H}FZ4Y*d+6F&Q$E3+-g;|YXJubz5IgxWSrU5V1QMq+rUpo;DcKRSR229jU_|(#_Mq*g%i(Z(XVnzovD$eeU-x1a2PD~omUThsc zjKS3t8qqyU$NXA}JqCH{&Hk38rZ@ zfFSKTOb5fML&eJFb+~r*U3fMn+0MG9rTI9sxTm=Fc_!J5QtPPi!~wM_?jgCYnTZY|=iaD^N8eiIi@SDl*|#mW%!MHSzt`9FL~=)z7A6OveiKER4QBYH*q`=^1ZMa^)Tv92+1M)I zHd`F^Y5jgN__$8w=4wdUd@(2-XnjhH+HU|D;a2vRZiGavenLfqRq)qf)q@$u(*6h;zqpkeiB!-NDO?_ORP5EsF!@-J;vBxO z|51&4^{VDqLtQ*QwjgCo%~D`RH9p{wMgK`o^lA789eqj}@*j++(Znjk&BQoKv(0z` z*mKb<(kFI84mW%O?Oo^tNoJp>_Z7_)ks*9KfM4#z#pF9a^5mHh1K<9_x_N3eR1`Gu z%3zc*|L{i~$#emmpJ+D|38bGWk}-$j=Bpm{aky770E9FImcpIhiQ#0YM!3z3-&*5) zHFl)E1=={ZX3HawJ=`&F_LWda+h*8*Bn~K#QD}2&cZbhZr%83Va_1X<6r?L)u`Y9dGucu)#r1uK=0RNQ*E)zk#HS4?X9;At`1>1JDfucJ|RNQ(bK^a4n;7cx&vi zx-9zHo7aP-#G$tcEe^Z@?j^-QGzX&H*o^S04LS}BH7&kJcQS|%NRooAx|8fR0&pcy zB~vXLE6;`TMp~12Jjrb?1+&Z6riU~oI%!vARR%?ggFq6E`E4d_!$ zT+gs1%#j#DB~W`BqF$iCBUsVAjIl2NXo9atO}$10-^{f&nO#qqHB(3KQBIioQ_tU^ zrB@nEgw;|hzC>K-3qdQKQp%>gBw-Y3h@iH^k}04T-Hi=Kz<~76#r+;O##Yc8Y0qTd zCbP(H4$}fLbM6+GxX#pDJ42mjorRKwKE^59>-lL8QDzq)Yx0*9K(=6pi5jsXJ!)#7kPysVS?U5RcZz z&mZZLcE7jXH_^>5c**#DTl`nthIQS-kHOBGui8Y=F6asOzX>jcdIfh_;U{12O z-$sR}+PiZ(&TolCP?4u#_$JSf(Zh*2sFiqBLaj9ClbT72FX(KbPFZSHyDwUdtdZt_ zgq4kxgJ=@}F3RKD+reRhvMzW9%j1zR=p&A-r=|`UzL8&Tr{6xs2s~aRF7~4EKT`gg zYMDcGzN)xg#z9YUy|25HfcCC&B0sJH8bBEY3k)n})HS26P;{-1KOEe$x6~O5Qr1h! zYU+xUvr+=?SBHa8TG9znr3oe*r$AdGvw3nk4c8*)vn2ZVf$hlDqR?M&5b)0s6zjpC zbUp50KlRx!Mhn+>!2fwMt10}@R14W@1pD=~2lEv)+3(?*`tjLx^Fb9A?8oh8{y2`H z@+4Jq@Q=#_g!&#OSmRdC*H;O7yVHMx-1P3Vy95*Qxj8GFy|033NM$QSUQ!>IhSH!X zhWJ0h z?GX$uD)LuI|r}jIt0m!g8as!t}{P})}<{8 z(t|D_4b<&PY4BSL*b4@12$8x{yKgV=Ar2h4Jal+ML`=o>_MilV zNV6qOZ-O#HR=8qCPiBUBO*8Poyt7#ldCpK+x~Q9!7Lv{^6N=PPUl$6tS(zCQ8D9gg zeb)a8hUiOZXbHy^Lp|Is5_QrglUNb>lrRC7w80= z4|>3UM^Bj2^y5kT1Ea^F^zILlU%QUbr_~>Gy)Z~pal)-SFJCd(3FnAl*<7wpl~MJ`}#t#m1DNLM$3SY zK>b+4?J}aiK9E43H2uw{E|G>tF)+FehA<}o(ha2}*{(aMhrdJwB40}wgbVJKw_7>{>7D#FedmV2}t z$G>ln*yHl8FQnk_LAiq2r1*+aIdc`)99}o0Y8BS_hLM)y35qQlm0K5(N;ght5N#r#mf#eXCDR-7%xYX>qKYMl1LXL}Nyr}r)q3%r~< zIhqH%0Wdbd4Q&D=r=Jzd%>zf$G(|@?IIQP%i&z>Ga@50X&|y!ck}iVjeIb5kUnl!b zKcoJA)d6*KyBe_BgXmoJiwV=SDl_B^sF%I?Si$u?9*u+_*59t}qLRZZd0<^4;`QVz ziWbp3!s$hUPa4ax6IIIlHShSLpA_N~0)%aY8-PvjGhVmEJ|)%wg~fB44eAyW_GJDB zu8f=TP%g;pPHN2>l*Irnf>mzTnYNrL`We;ba~_T4cRS?pq^1 zw_wu)>KH@D;aWgb>_d`w99>Q1&(ZCww%8`QmswjCTwB)1kreTv zj@1mC7G+t#(P-}t!mZMK<}ahc<+e#AQ5WC^+N$hA!!}9GZX?mjP~8RTp(k$`Bu(-Y zrGmLjSI@Rt?R9{NZ*k7QGvv}y=jdVolVFht0mF*+;dnXL>Qhy+t}a&)5kQX*;Px>x zE;i!jO_-?12Uv2~cq4qJ(wbh)wDe0FJ++yF8i{K6hx6?ZFZMc6!9hwEQ8qq{%b)lW zyJF&>ip&8Z59tt2=|E+1;tVa5qzqkBSmYMBFfDdA(27%<@t>>1i&XUlO1?vfG|W?m z0n*hwu5|=#ML`_f1%xqqX(IKL-l=#8g?!%!777P>WEuZZt4Uxml2-?b>#>} zp*-3QNj3erzgR)Ie=I^PU^p@S1D(Buik{@{H~#W*4lUFc8799b2Jk}Na#Y-)i~7k_ zEL}Q!4ah3ceGw5FA6os>l&Z^y_lb=vB(0Kk5i`5az#{4ju;PDN39Iwet0=(Pw@nD+9oTsz^i zJ(hC=y)$yvZEt+L#l(ejuL_|M9D5{hFcl9a=h$d~iCH$~Dw=)9$YT>jjp$xg81Ik* zxwf`|Hl#U>z#4}{c~oI+PRUT>JmNxA`FCnluv9#G@uZOc$+)CXxE8)hsG60jBG*1c zY@dl1RH!t{B6ZW;SV0oan;2>CHTvxqhZOH}w}@%6w^)^kO$fUm&@GZzUNHRbEv5|3 zWAo)Zt!yVE4?P{HUJ9p8aSypcdyU5}0e#*;hE<|XFv?labu;{56uwzI+5pD5fQgvn*jtC4juF&?ITng^I`tV)m>ou;vyNd}n z5X!P(%KvF4V(M5C(J#UgmjFT0$+y9lQLFHBAnFJx>-xM&8?9LjbR%+npV-JMLX(iH z6%g0*tRi?TvXYw^ns9pQiS70CfJzEXGWg`N1t%KjXnoS{$EEr<%xfTQDssrQRy9~w z6jV}~R3FnKsmFD=&lx5~c${vL6rR>T6rwO2uTwQT_vk|27o0aKgPfd~CA+CH|DFVE z?ofQwUy+XB-V2ygZU4qKq|prhGXxEwK>n;IYyBb88u!w{tXP5QOFdDn$iSZJVSM%= z19RX?n1;=GxH)kHYeT$V7_+eNG@Aj<(94}o5YBz$imYRIS0j<(1rElCrYNGO0q-Lm z%ta#(+=)-jz=fz#eKnHMD-r1W+gP<37vm;I)d=HwG)c$qD130VfP0mk+Fd+yNFDoQ zVX=0!DQY?7P2sYefzrG<-fa}&$e54Pgwi6)%HfF9a%tQ&Sw9O{)iR0*5?-`J>0%t0 zWXOTKqF_Sj=uAb(fm-I);dcMh>a2~v*smm7nlk$1QL%Rq*dt9_F&~@l&Mq0u1WH4k z1Vd%PhMBtWWoHabq`uWe2L>X)2mf?X!fZ@j4BFwpWvy}#ZrY(5^^~jf8a~$>(2DG^ z)Zvbzz{M`B#q75qrpW~J;pPcI_YZCR3(&=*286Sq1%!`4`{T_l3n@SXE1>&@q6O&a zhoSpHzq3W^0K`$NcmcY8DL@+EH&-n{2axzqM(PdprU=l5qXvk7pFq4^1rqpOi~MJ> z9x^#-;#?JCm|}w%AkC->QHn5FFF^&6RwNGz6s81C42uC4s{%0hMS}f4&Cmim$`I*( z{(kuBqI&{DS;{e|q1^Y@@^PjJ38aMXH~PMW&PQdi96}4X@#-W3bY(Cr)L)22!*xsm zBMf1spho$E-;t_3gkDonx;1^OIegmw6NbtI@)BEhDl{b{Ygo!?Z1l4}*72JKReftK z#$Y+h6VPn2cn(u5$qS$1Ie z7#Nu-)?sTJsI4O?)={FXE37!pIkRj9sV7bYMYH5bf$9r)4GW*cGx1tJ@J^^o47s9Pa_=G@)Z9bLdbEqJg$brGY;nkQLbVs;T zVu)lv^>wg?>vR1`#cLd#HEy_^*rUUsLkA9og1*gnAd||?x|Z*0cC~^SRH!2%%4ZH~ z%7D)TGA@K-{r8}enkLt=W=qUWnI>1S%r3fYja}Eyw0UkU?;A?fB`S*2B3uYCw(gV_ zPPJBJ+Wics=)7xZ$%yGcecp$l=KQ84Y7UgG(ABk<|H3oIhWXkyd+1u`{#WYeHhbm) z+PnDq*YxTT{dbGv8&Nxrc@2C?A7-6=O)J>DrQlc!XoN1Wv!pq`tXw6YT@@rVKC>Ic zYo}aFjeL)(-g4BhP``_XDJH2{x3~Y$Oc9fANwv1TO@+0Iu5Txg35@Ztj$?bL^qi}+ zmf)g0zSW`ceE$1yfwrZm`D`ahemd}EOf@5EO}9Tf_^OLqkGlKprTpW)iDOY zm>9yyUm`}40eoR(mTI8>LE7=u$dH^r2YSYP!uaBPE4C^;D>=PeuVh>q_V3fyH;QdG zIj-5ATR4VYK9B>Pn7AYg&9A$Q&J?xxFRZ@}f_13NBs87#&&;|8~Tm^O&LVZ9#2xpC~JORgw0&viY0SMfIBRdr8O2D@0->5}SYO zcU{N#Ez_j=VR`(@&R<*uqk;JaHvdcJ>8d?=q$eZjxV-8ILb|Qq#ysL@`1mhLr|kSI$VPyAm@B!-e(FbQoQLiXmc5Cv{+bFlgI+w^dj7y{vX<6QtZkCje)Niv} zO9X@x2crX8OFVX}%l%fB`ZY6>9mzS{3(9uq(^s<<6cw_pCIEFyNpe+pO7HZ-2GxJ` zs{aHu%Y-o0E`!c%25HNsc7Q2dNJE&Hyo-NBPN}4UZ3o1h)Hl&-#2CY>G}_@&7{h(? z#-E{`3vQDw(w^*sx0cZ!aVn*$2)4m=VmV({I;ly`6-s8zuB01@^tQ(2>TQip$0~hd z899J?G?G5DVM}rJixk%6hdpCckUx$;Qxi+D72|JM6R`ok5Qw3vAA2TkzpuDvRZd;a zRC*^6)(Y|n?-r%<%o;_IPzG9yl!+Fi^DC>acQfIWC)CD+VoFc)#Zgun@)U9R^;EW zLh}YI9)Y;syK{jBvX`X{qHKpY%nff7c^!71-g18sZ%fvfl2(f)R}$hHWtBhLJ8E*s zsp=9D!&t8hH+UgfUcJlpu-@o5dtqv$m=|MmS1J4cdI;^HfrI?p9n|Rp*qQzDF_WK&g|PsQbOZL z{DX+A&1nw=yEs3{``@= zN!EoVMn)7j(~rlx2cLtDbl;PHiNsw2jlF4)B6y%&eLE65?2@e?wA;TOPyrVw@TZSl9VpJr`cLuU(kuWr1r9c8+nZ{OS`?eX?xiadC0Lc zg_rI@>*W9X4T0t(r|=Td#%fUJZaegaSlh~zzgmQf%FFNiB95f!4V#4|$b`|+h^Bc| zU^1x$${WwR?GL|kYC!U}==B{!bc^z zC^|KYLPnLtXa;<-wlEQE!LC~SE)JU2#{=(Kh#EZWki)+=K*}IoJr|ix*rA;u)$_%F z^eSBaMj(y>`^K*cki!tQ@aO6(K^w@(lzE2l`d}+?^{_T!#`qhE-%~DXB^`j2*FSLj z9O~9slTZJjJ`A zbDpu7MtRn+?j~HYALGh=A%jl#cQnI0XHy2cJr2VZ+E_JaqT4~5{Hn@jDK=9jMTQ*o z4wnJK8Z_L)plZ>!>UiU=>o;Y`X3d20=FC~nmBEcQPee}#%GS3dqMv&#n$ga1mE{#D zy{1 zf9}+z96YFa@{sV6)ign9hp~?;fSC0P29m;S*lIGCWh&9>el3zx|4@&r-zt4TB_|?_ zZT?_>Kkpg7X-(-TfuxpjQR~e4D-Jc(w%Jk|*2)8I66cGEE|uC1hAtuC+?*!BIYyH= z%5Q8iG8077u_CRcXY~dRz-+hCfO%9yZyv2MkNK zahNDH3Z%s`u2u=XZmR7~I!O=6mq!t25gydzK{B6Xr4=rXVB%UBU!9k#gT#rKa3p~= zqO*KzxW+2`Ip{43AZSvSGDPC0HIKb&MLv%34QjMs6(f&m!faF}FQeC@In#PCNG=oh zhTtLx>2xd!;_qmPbu@qXPJ?z?R&?}|CAz&O%+I@Ox#VY1? z%5J2*)`7TN^x zw26!K0ZpJf-{e{Qk++DK{FYk?oME_4&1=cok{{{ z5Zs?QD-!lE!Att%TBJ({Nugu`wgO>j%=QgBKzIKdtXP+lcypSqryG+B*g%zFtH z8E8KD5ooy@K!_K-yzbtB8KI;F{GzHs5>>VYDgjJ$lWodcBiln2m?_;-M&_LgRDepy z0>i{@e#$C$jzzj~5gX@Wcd}S1;oY=;nGs7AJ4UjE$r%%!6tzCfgGM2=3C_Rur;(M4 zBA~Uf#hgkjw8{Do_8NMHRpF^L7-LL=wOmI_yy6H-pQ~0H%>k%j3 zSfRU!h3RscGt}Av&5AUlP_&AQO*1HhPYO`8%9F`ZtTKKb6$yo!6Gca@F6Eb!N`c`T z_TeqggT%({4>-=7MNTrJhT4_{A|*puaYpIcHE9Fd5j+XsW7a2*Vd3BUq#m7#>OY%W z*K5Ndty43yDuv@An6w17v$z*7!CdvBw}h)zA^MjyZLzpUlkYa&Y+4^S48Q+8m^IvH zdI-wF?jRaHR7-wBt=(=;w8E9`qG*@C1o)ez+;4GPh5Ffyjq`~oiSM@uK1U&aiBKN0 zA*H1yoaGe)Ioy7x2z1WllZ~4VijIjK#j~aed#izSje=2G0Xi2<^)NJB+DZh;Qw6(Q zC8=uVw5>3N-WX`715_32XPy|pxH{FqQCMSL=mmPPWV^YW>8U!ux5v=pl&axP%#=;+ z!2$;Z*eiul<5ngre8->7dwMx5P3$WV(?Ne(_y3f7KKPi8ENZo}U~8>swLV}({)9)m zMbWt6X=+0tB&I)`a(f0cXZ%%(dM489$TiL?6wmfDq0Vz>#K7;Zo3e{< z)4#=ZA=JyFm8i+6l%k1gne=pR19R9`fVpZrJ6hoeEKRF>%XPZUaZg*Y)JoZM@8+c;t2VaF+C>jo>Z+kpqb?(>@>5qHk~`*&?s_ z_s`wZ!rF~gnVd5cBTL!fhcSyc>0KG0RgnYwhf~U3Vw!iM0z;>9QG6&$q62dM4CqxC zylcg4t*PcLmlblI{AkvaIm+E(eVM+hZoip|%I79H> zfs6Ky-JDgHNrQ5^V8eS^fCs z!b}i#sAMitn*mbB%-Y5wXIsGd#Wrr2z81Up-DTO;mFU{RWky0$T4qZ694YhiLIUU_ z@TJnUdOyB!gHsZ6uY^IY7&vK!kx!-VRL%-*?^nsXU!h_9PyosOHyWEu0Uw@~;?nM& z+idSA07&B@w%-}1Hz3^Zce#XJPjwlm9IP{D!3Wal)yyqL1c}T@ALYnDRWY%dcp5Uc z*2x{Tv0MW8Iy4anky@^E*03#T9#(d8q3-UcW0;jb(X1Yc3r$0@oPL4M_(Nv}m)6TK zMXJOyRLOG8QP{*bDjxyY3hEd8#x()XYy`c~)*3Etulgt^VXAGvwlym5JgIuOIrJeq z5`fmkqlVSmb~XSbt0y|vTtG;v?3uG$4GJsR$+5xPcQ@uEyFz=92#4o=oRT&ibM$&F z`BR(zDQv*pfP1B|BSJx_v?xhUSoQsHzZ0`Vo1}WM$OGiO375 z)#7&XKws?aUJ5wK&(Bo3S=QA^2F>wS-GMCMaBH*OR;wDxt6>{j zesCS+VVkL2!bnf;TZ$5tpJ5gcZ5a{HsZK{0iHEEaQIF^Tu1A?amCDi_HX z*^Sh59IJC#MolsrnoXlWE2P?c4aPr{H4Q0JrU8sMS-z4j*(mA$x#?ob4-l@|f#2)r zP&n~8EVI*{v5%L1?bkR`ZCO&#zGhjb?69xk9Si+h&|yBc%`YzMRelrqwSGi36;+UN zu%I}d5`}NMNWn$cVm{_paRiT!$n}`qfA~M5&H=c#rHS{kZRfXZ^}m1JwRY9oYwww=S<|z{r4jT2;Y7vJP733z(FGYag*S3Rbo*wt zv{5z}kRB1OmARjqm9n14TY3ab`9{mJiR3ogwKMUcQv(69=%(ZOK#t`hxgNKpz%(j7 z!!m4>7)3>xAYDNbv4jcuEiUvTR@fe9HHjb1t!;$kyEV0Nza6mMRhcF2(waK+E{{A*s&GojH^&f`^;^D~(AgP}-kQH;h zhCfbF3QLm=S$1WjUyho&4cnB+us0JVUsj1{_r3$|=ja`7ZM3wLsAgE48bvzA z!y(C8%8PYRlGc^6Xt+oEtESdcp=ZjCor%(r8 zZwpNT!=Jl`&O@L0TrCJWfh;JdgH2{Vb${$hn%ES>fRhh+AV%KfvaR*v;Rf1Vwnxt#w9HN^-hC%WgQ!ZtVV+!EeQnmy;su z?IeSCm6M5MHDi;JEX|)9x2FTL$0NKA%y9w9!%mc z6G(kV_YFyPISo!>dG-QwU&{YQ%5qR3+GnCSO%?)ABuX^sYDz%UBEWU?r}7Wo2>;+Z z3(fF2@@pwuZ=%*dl|#rZ!hDCx)4`Ep&zBz?&RHmKPt}&pdsd?N z_9GFwaYS$IZ9%A*F#-@V1psS7?&4Xas$?8e>_GAFYcl`>P}8D|x4l6_NyAJFm9yI; z9bI^)y0;D$#cBNF8E>P9yC(KEODOT#*4@($L9*H#;yB)I4j1~1p))2zG>aboK$Y-; zm+<{bP91fN4|9MMsc5`BJR@=zJC23SpYWL9^R4I)q^cIrXI%)=thj@x-6?4woH|B_Tz`SHAewfM zApmiBkzHB;^(?E$9;1qllp)YXznIS7&I2Hn%Rk6Pd=12x@r5gfr1vD1r_pO&2-7T? zy+=DmrcH;!H4lUjk;Q}#;sAQe=ayyHSa*JdX`?qC4?aS zI-V-+ih|Y&e3ISHS1h~3`xiP+c=J)K0YB$tqzyh;=&{h9iS zOS_hq9PiDLw%?O>EW(CJPWmP}$SOFujnO=SlV^Youts13*J>^iKR6{up)G^t(lG=X zsM>{Vd}`K;>qIjs7WYDY(kT^_QbjHqlPe}|qf{8a6hvi+O{j@fV!#y`POjG`h(RX^ zPKc9(L8C4P)9mP~)`&nwA+pnjIu#{&^vepi+omT@DTTk}SD+FS6%1?=K z$A@jFkc-d~Zl|GQqN0nbRaD%ND75qVI)?je6k!;jCOvl@-IF7_t;l%KS;C=p!rr8m zw4k`_P`X86<+&S>(VN8FbNzuXlNGRHwq(j|16F9k^noVRbWoKC808x(`$M!fn2yXh zU^p^ag)%bBRwK+eq=gqv-|%FzAn#_Ci!7K@%rygh>EsiinFuCK759kvsSu2Wqa^Q>Sr>4fp;Iq_LtElwTbQ@uZq9Lyi;>S zpVM$`RrY)SEC|@E&ngZ4e()&LOb;>7}=B(c^&y-Sj*p27HB21k~!tenb zYgJeRi%*Mw-C3WP?G5;^Po91%n-x+z(R(|o^Us#;X9ewN6~XP|n7rh!{*9AbtMvN4 zRE>w3bMYYMi}j#)!W9%vZL7^}4Guyr=g-fK%A3xEEkOvA7@fw~&PQ)tW-YtqD&K~#F6xt=~+>7$0P%%`~yuAvzUW7!X zCh3DOILt!SR=f}&On8h`(kr^}>Jqf5IH9CR>2=;yrS{IVb+X&lHQhIc^44RY_1<3T zK^tAy!J@O~bX(X#JGz>}GFFK3jJo&&SzQN9hozwh{=HdU=MnkH0>84#&30kc3+(mC z;jOtor2|=-X=G|iP~n&DZ0 zGRkMQp82k;?j-WIqOBIUo<&iyK4Z-2^gwU0z;GdR=+m0MX@*Yd(>m+Rf5Auow0gMYV6(QmrY2swa)GeYh^IAsS>lT%d?D@ z<^EXh?O^Tg62ug+PiBBPaNvP?n}9`K-dYd)F{TY5yY0gSsQngG*+a&}NGkbbd!#Ie zO~VB%eGSb5)x)5x{?LUUH7V{o8vxJK0bU`$LT9Ok{8Mpik!N2t2TVCKwXbgU6tYq4 zW!2c13fbvSiCQwM3X^v#s8a8TFR|!)MG--#RAREx;w^#$L zF|&&qlK~q{mLiV73p1KsTJdb-w%w%;y$KkbDdV&p+wD&Xc+A7Dy++&Q3)vWpVAe-X zn70)*1amTzUR*6DBbk8Ni1$LtP&@?i2N&p^mY05$dp58%-B$i=BP#Lv+KaEG`pj?N zuDmj_ZM(lU-L~DWJb7lUinx^Xd|33D;dcv{QTxrHk>Yf*rpemf42s13>InWMvk!aE z?Mk(j+FpU<*p{8Sy|t@UtG=Dq+--7_%=PW@TXjgGVZ+UC-7?;=<-A;-ro*cmbsv!` zZcdy!vj=wLA_%bn#(|VGZ0wrQ`EoqIM-%f~M@=-CJ0}B|sYpVYog-5c)^af2AY+J{ z$$Ni~RLm}{*UTj0gi*(I&le5iwi#}q)5Woy>xiaRpr4c7VP&2Ij04WY)DfpTIYZu6 z2Vl0^tV4S|$9NE1BCALGFT)4A8uX$~8@tR;k)b2j{=W7A(agc{Gl_dzZAYAExHfVp2p_?h`Zc@#AuLQ=dGr1B2tjCB zyX34cZqy>QCTCFVZzyoryI?^kb*8T1Wu+U!LUlmk!oh8S2=Rv!pnI7Rp~`&~rmkWj zN{bL{0p$ z3xQCl>S{PMk|(l@gm_g79b*)drs7;rlTkH znN_U!w~(1p%8DOeG5{!@<#hIEJMzZNq60yW#z+Rs9N+ln< z-a&c7qvgUXT4q+Rh)e$T>6R&)O4;P(b3tKa>gZP8v6x^)A{s}HR`a;P(7gEzX-VQr z8=C4yLF>vcO!KJwXY(-YsW=d)AY0xW_``_zw_R-$rivKr;#8+UUEWS~Bw<#?o(eK= zc=ogdY2{}ZZd-W>=c$olsYxHM@j2m;QD-Z931ikZdvbAzA>YQ-brI9}iOUn~m;;Hu zcE#Qcua4K?480~~v}&WaI-Xf|a^Jyi`d4U3oA;)O1F~^HX{t*twL?>*U-;T4Btf$b zoXa-?pG<4kmRe$k&!}Xhkfq`iVHN9x5!#fV7NN(hYPUC)+>Qb(F3Tdn`PF6xhdB|F z4fAp6uIjS;lNl{W*|ci?j;h-<&96cOD(_gWiZXs7=6EZ%Jv0@Y%}8;|MAKGE)6;=b zdLc6%gAJI!UZ7daPhlB3%cfxtOwQKnycDkHsTz;dFfD}%Dg!Vzclx2K0vc6{)AEKW z>Gj}ln=qyNR0jdS$8hBvUFo?}Aix88ouWY3xut`+m+pf@HS-I&)~yYiU!N*D=`q(1xG^t?+ z1s`<)M1s|o{Ih}|EKM@QTsLlD?1G{BHC@yAAdRFqfbBKJUkU1 z@D)m+$|65_P0#+>MLwI1^$k+$hQGC`Yp&!bn<*F7DT@=|3{Vqw9F8ds~p> zhGgCv6=_x3)WGxT>IQ?=P?qfwXwSXStra@slOmC~p?{-}GNmhPr6hU2@BR_=7m{%5-NbJg#r zh#9*U24neaF_GfWSgbA=WgR090_*Z|ufG*~vvx6^3-8O+oz!e%I`v6e5^3(M>f6)K z46UPCpVbaLMM-s4Z7Cvq++q_8JHL#HUu<{ET?%O|o6)q!O2P*F(0}jmUjGqf>cNOEZX`|6Wen4$^ax*6neW zBKYZ+*NNcA20e_W6vA)9irdl3Nn`@W5@GML<0|A?K6z-t?y)b$9X48^h&GXF>})xx zi;QXLh>PAhDuKAgxMt8VQ=^JHUVLw#?)P|ewS_<7BIRAqec8!KsO>ooUa@$)oBsjXa}o@g{R1|*0X;Ebasc^qDl3nl6I~>rY6Y!cg-HuC z1et#tOwCoSU><745ZTU40RIZ9q799E;{SbNn;jhXhd77)2+>obPBv<1kY-`Q#{9-% zb%6%3qS+B?jq^+qk06f1IiguLPSfI;c#r|?<&pNJH`s03e6D` z`8!1b>N=_VZra!ByGF*lG<^@9g@$fgfr!qdqT`Zzsu-21e_3lc?L_J-NyrN}c76KQ zBtjFzHX?y$7YH7T_Q@k{0DdS#8ZQU4%Qi`URPIUIjiGMr+DAcH{D|;V=j2r}4*XzB zcN3XMS(PG2xXxc*2%`c3$M(_tTGt~(`M_&s@!YySP_bIXa@h zjv9rwFQ?8EOP?}2W=(ELWH5hDy6ZujDyr!^y3-B?>7_oPQpz$++uBh}-yv}$vV4VS zq=9~VF{wN3-Scz|AKkhQ7;v_+T-90F(lKv|q4Ltwbv&LAWYOIb-0=qH%(rbq2(_`c ze^k}(5!$zV3nnZ)D;FPV7ce+Ux_PQF_87p@z;gB6>lJQrStHAb=FkT3^fu>5cgpHa zc)Aj8x*Z^Z@wksU?We(m5Rz@{S8@4c`4%jgE(c;@IP(tr^+`YViSmN|S7mjl`}{~y z?k;r2eLiSlt-2QfB>-n>+Y-CLxq4|x%RZ{}TAVL~;w_}($QjI(X_?nx>gfuZ^a<+5 zw@Yk6lsL0NaC?(ExXun-miIFBHZM4 z_{e4Tpyru^J{f z00Zho0l2oVXhr@5IN*TL0E18ma7QWRt%|#qetMRxe&2T-w-+4dckXJ@t$nA5hkN?f z2MRQ|J*~u?01qconY_uqZSoWPr#_k!dU9#?oCmdT_JUCyu&{<-H+%GXk&I4gWHnpC zqsfI+eyv;SiQH@$o{##LH|?PiP_~QE3dto5BIk<`MF3 zA^sw|vB2!}^ObiBr>Rd1Tb3P5mol|@okuoty^>ctz=)WIGNS$Bc}W6Qa~C%)^DB&~ znhREk5yC#Ez485$S84D8*&9&Q7Id zgj$Up(YPI$#8GHAaHzmr$${951IrcWl+o027x+N5JHCPTKY>05yDQ61q+B=zbrJ{g z9+hVM0>XU;TGB+exhj`?%*sZrLB44P-Aw-WtWCezq*$mIakW;&qUqTpQbiOQ8dA4* zXKxYG#$nbS58tns7~h9P$5gOLv3%WtSJ%)o`K1yX#D8SZBBuZ)@!K03y>Rd5mskPthslX(%HPXcGB-y0XOKxI$EfMg3(asvH=udd# zO=V|#K7gujfW1Av=-v2Q?kZLJ{Z&2vNeRAa1Tmc&M(Arg&Ymp1qV1j9>8P4Puh@7W zQ1FC+@OU3o)%2F}!rQ029)e?1U!d1#75!fbjt*CFTLq${2L=(b7s+|VnWC=+wo+iC zKBz9oxNlhd*k}a<>arM|D+kOSAJs(&zP4LtVV!rq`lD!femPCB>S=3%y7-Xxx*~pqnS8QvQCt`(Ci)cbh zM#8NLoiPRKh0_e{BN~x$Dt8@~Jl}>+x%~N2d^!t_OqRvet&>0o{W6BD#n#mwE+!6U z%Ash$q38mb!>b-5vbxb0Q>e1V_YvEmTpsnKSiU`qUTn=Nu~Pfzz*=B|GE*zWXg$Xl zBOsPw@{`)V*goNNS+qH9B8duy%4E-M)!K^XKw_eSNTQ)(TP$j86I&7XCZKJ4CZx;- z@kzO(uJk+s%kDGM?ulh08#r`ngFeDA&acG9oDMs) z()W{;x6#zm*d{t~^hqE=bKsf#>=*Gf7fg-3~-sVE7|Bm8x@(6DP;OpZ~s?O(jy z-I<@A90W@qVKo+kG_-`5BU8!U%w*fe2OSU9Q`YcVqfPaPM2NA)%}d2N=hHHi1_@YK z#nlYi4~2<-$q0^rbXvKDh-`vI3EXz(lRXs(Z%WG`>x*oo+a*8Ms+?Cc%I!PL#B3|c zIHs6K!Xd;m@AYPy$+fc5$+%hcQG^wB`#aI^%a`Mve7QsCkVb{Py9Y@J8%{qdE;+#*(8lKx#5T|S>%M+@w#Zl9TO;TO-a26) zgcf%2eGXr34?C`tF=c7D97FEY<>S64+ZevrMD)&2gTw<{iP#Q-cMWYFff>E;a5?Er zIOoRD_?84?wND*at*1gf`Z=w29D;R+=qwH*`uW2F5piE`;TWCy;nV{oY>g;0TuXBv za#UQX@-BHDi)lwy>*!?wZ9P0;=mD8;gRl+(WO(uRe8O3!G2wTZO+rjW&Bm_};?H>~ z6^CrME%YWsYhhs{7wzaA3a90f0l(IE-u(!*Jn#LApJO0rHVLy3P^$!9t{=d68NGeN z1w3eLgq2G1xHONNtrLVct}Mu8SP5+Wr2VvOf<@8RYlvYiR1SsbU;TPhvXf+Y9w~fA zT#~I$;g5<&s9P*J!q0|Q?%%7aB(nzzql@uA4HOh|T?hQUf>YZ~GwC<`#FbJO?-3+>a~BIGx=Cn%Q^oJV=ISmEVHQ zR;R4;t$3d7Zfu3EaW>yLX$hVl!`If_&K#*-Lo>pUtZ`)gqPwzk++5vA|0pOiF>o-^ z5v6JOfTM?(<_M>aCNfonzA0;JDyqgZg=8DL%%7e;+q;?S+N$c-x|8@(aLAG1`54LC zLUn*%E8D8Q2~k^`DRw%h)TATOB>43O`vh1t;-t1J_{lihp7&)pNEe!tkZ>7QSm0$P zq~$IXu~HC`q)tr^8_&u}{e2J>#lboshr6Ed!=1m#p6b3dF1xF2N7LY0NaRdFmp>cI zEu5{FYvzo^^suln1xtj&o;_4AimRqWtiHaA8FeAxw=bPPCO=?tps3)`u#n(E0)^B) z7mzNCUi0!uTTEo!nIuwz>q#D=y@fvT&F*HCjrPA2HnbG|BYn)`jIy9)A)cs$tJtU5 zoJ84%hdqYAnnd+1xhsQgiv+ozgZW{oT~&?OEo7KS_H#~fk;`)`CoUS9g&>ND8m&dj z%+<^#R-5sodlbnTaVg@RiIfhiP_Zk^)sE_OTj!rzc6t9N4%t)RqjR6TKIBCuCPoV@ z8^RqT)$OGDgxd!@ayoaCb}Yw2?k)11Q=%^Xg$PqEIWoyM&hIv6maH(*)*auLxo=+b11#;c{6Bc6YIw)FXqt-$*UZav8)B(iaB~mLH+Y|U9%ga1O`*QpLQfHG z_OtL@LDk@hy9H&oMr_;DMO=f3(z7jwYQi9JaZYk}7=iqkM|o>Da$Q@a%3vSGohHAz zG)IiZeo6opS3##QCa1(Q^IKrcJ>p_FkgRMiqhh_HFo2xyikHrWwss$?FcuxNJLydu zLeX>$4QwZ%!hZh&nQRiw=l=a;Xn~GjPfc$hu3fHxNTxdZ5jQuh8P;}6b)k~c)A>CH zg7`fGAd}HQsN|=P7C>b4yZb>Vp%eK%3QGSAa6|l?CxApwCk{k1l5t`TbPP%=uZsXg za^l0(P>9|Wrw}Uo#nEZO{pnSMRX{8$$q0{Jp*P_NCYjGO2D0Hq3smD!%S6$oW7-Bo zu$Zf4yx}QCQjxX|PeS!)i6G;tFy4ZW@yBG_<6^Dz{;Vv&u}#X9!#2_`MGD*Q@&jWw z&r zreRSHmIPO!WMBK3p5f`gl9V?V15csg`o-|!(m_peEmz&^vD zNj*aO`LZ6W%pSCmO#Vk-jENp9^*g1}x|)v0lbtlscMRG)Kpn4@M~$+$tS;Hg!g4i< z+wAYM6&^ZlLDZA)8FQ}s1cFQ|QFWS>f(_`}^+)#}{fL5I`L=m8=I%zvM&)-Ie@84@zv6@b|-%-60+l!J@BivI1sR47) zHfts-&tx&3SHGCo&d0RmlFICC#@`BR*M9!`?STTz*001K6p>wpnwJzt#9#gB6?+kX zVOj$uw!HoFkhep`M7FMD%I?PdEZ1{T)Z*N{fFY~9I5=_%OJO;NY&usQ% zy>VqNVPkFq>x`2!j*C(EwEQgN_x``W<-MTTC^Rm~ zPu`uN*v+6p9owu5%>$c;*bd4C1Qn1exW(wd3cvj|x%GUiIS(egC+WXmWaH2;(7%0qn zuFU#(TgA{be9E*>U~Se)NT{MGMm>zjeEDQnENR7|2eJz2%!tg(Mv7okF+J3O#)L?D zT+!#<=v|7p2KTpTNYuP7@HvW3+^)N5#+xHuH4betvI5z8w#MACb7{ex!oxM5XZPG- zPLbW;v~4N8U!tpVZQb_ksKfbO3=G>5R|&NbO$~dovep)cQwD&K z(_r#+b75~HKIPvEZOYUko`h#fNe0`P+tTnCEzCt_!hCo?1CIvq1;xBv1Jx|$P`coA zN(H#OT!ih6r*9M7ic+DS!Sk@!sHfzE&*fdw!WbKT;BCTSeJ-KFw#>eyfxcY9h*3%` zrM}z%r?w{gV7JfnvfQ6V5`^G z{8KYKrq4?o__y^Tv8NMcDJ1U)S;si_gpW^k!C2Jw7QTCsj@PhbCIb2j@=&W666nZ8`w zPf8#Uu09`WfA2KFrQTW4C-5hK8Yn1n6ajD41LsZntnrCkDfm5CQpRpZ;%iQYo$o5< zZSKU>wY1KEQ1ir%l&O!2c6;3>2PHed#mw__NT*e2}uIZfP*z9Mar zo?VqbQMdQ%UNrfSoPfiXUovf$a|(*Pyfm&jBNMM+NVZ)R7XATXlATP+&S~-sI`||l z#*@xw#cf%Cxtr|$UW{mY#I~aEh8vIWt5$&4byw(mU zTvy_}+UmCy$;7Xf;~qDSd7Z#9JxEK74LDo&?Fz6-FK)P#`eUnQ{);-3)&l7>Xqr|b#V?U_Rz~-~Y zpNEWgHY>CH!p;U7tn@Wh+V)+p6-b_ahkwHQ4v$HB|D#&d@A-@^8z(8g75hwaoc_5)*3Sw)c0|C=?Nk;oy zI|=JEoJSMbf zm^lA|q5lC22MM_u5~bi7u0j4k+xd0(GxT#FK6yM|L7<-CzE77i>rHKC>rHH?vpc#| zxjiN8i`+v`qr{SMZkncRJFIN5?Xis3boADAKB0d>L;seH)c5q)_ZWx%MaB6K4E+y~ zFj&*g)RWn;u*x*I%P%9YgKran_bR*OuNt%1!PP&D?6+J+sjN?!wvVP&6{d-A@0vcl zI@GSM*V6U2*U3~zwHwEG7q)x`C)>?rb^+FO)@VF>((zGQ5w1lM-TFSOAYK|XX-6sC zEUlJONB76;yN^YsAPT{{6+T*;1QXpEWxX$iE3{0ydc*L7E%UWvQpf7IF%RUhQ4~Ek z4Ex|aT~T#mK@*b-XXyTNP$)*lW4@_FdrcOr%3{V_ph;uY_ru;i=;RxTg4T6;+4U2w z&x`phCJT2Z(R^vlrR^n(>F3#3Rb5-OhNet)T{|A~Zr@YG4Kf`> zuk#pGxjb8%qBCAAvY(25v_rXR)41#WnZf@}0Q}(hS4pvk!r_x*@4I5!CqSNmj}PDm ze-H8t7~mU_@dJDb{RJN21N?Nz4`7dvANqe71pHscd4mt|0w2H6#~=6&0QW5g^c4oX z$ES+_#q+qoyD}U}Aa-Qd|82(l%p#pcm`|#1>sK3V z+G1of2k0%pev?k?PZVQPOJnIKnFYRPa+k_v0+725(O1AS$)A7F;I^0JSVs9W=i+`T z8z~Zz%4Y^g%PK@S(RqBwe& z%NWJ{>Ou+Os!ijn8ow)0mC58kb=Cv;(!)4)ZUOlQ>hJ4E{(uJf2EzOTeFXgi_4fRG7NA^ny} z33lMCd$|1B(@XV241W|6N=j{7!JVP20E5*d7_+aJJogGw{#wjU@mh?(A)BD;rRaF% zcDO&es~+f>+9&GxS4xB7Gb&0~o$<&Ol|;jPB$exh2SeU<;YEod8y3^%=^nXj5q(q4 z9dlF6;1sK5?z-r5T80gZX{*8;(mQei2XSdoRrZuEhb zgI$6niM~hXj=4u>@PSn#cZYNt9oG)2v`yk|ag$iuE~)uPrtgqWEN&L>{kXpt$SmZ3 z%nI2LA(p6?tBf-JLh1LDw4eGg?C)Dc{)htnHpKiI`3mjJXyn`J@58vETM6@H*wd>9 z{r<^N5A}1f-_wJ6LAM(ECziUeM?KKDRvCdz&*X>YuCMuXp^9;-taA!F8_K(d(n${s zr3)$D6Xl~4ndMojtjO->3e6vukjAPqBb9}f{p0TiRB-;F0T89*9}fvx`dIoVK|c`L zWpw5nPWwj3{_^8&FYqk4U7xdEzjF$7F=zr*^`mHU0Ce%O<6nk`E)9+@oz|h>35dR= zXl(gU-p`7}|7F9VKj#V**o|aoMhv8%tnTQ(y1;V=rzlzg1Q?gDj@F3eJFsq_dfEhHdUhm+L}~C)Dzu#VOIhi{;X*x)cQN4 zrH;%LPKH~N>@~8{pabENqdZB0sX&`usL?bRmN$TRXSkSc{>*>Z2YSFzxn419J5*^u zN^B_+ni^o-TE7^#*40C~v~)r@=~>8b|1Kq?n1Ru@4IGD|;AQ{qQ)yQ%4P4j1S?8>> zHyb)*)Y1*%(sJyaI+0C76!d#p%Wofse|sCk2s%I-{eAEUz|P59QDMEGI*P2$8^_L- zj_S88Z-^Bd^zVZpZM2G2g{ySntMsB-T&I3qC+2n|xOSu1DO{((CQ+Y%B%Iq6pxabJ zhf(Y>t`pD{u5Oxc10joKzdlJWh$ke7XM_w5m3bi?QpcwTw@FL=FHa@>_9Boig~C}} zlD$|Y*eh~Cx8(6H1tgq*)v3K0WP6eDNi0$Xmn6HJHWQaPy>+cuO$z5^d5Qj#qK_yIXi=&q zy(I-ki?Zp`!rp%>OhIa(yo9kRRWe$067vz5AqJPl9w}xqZ}jb@o2T)#2Lmy9FKbIGJ3K)87Q9r=rr!hiL zLBstA4E_g_)b;Vq0vG~8?_ilbPz5Lp%|w#4Af7Z%E5dgK+au9DRfO#bB2Q@^N`L%k zLQj`u3ADAsx+pX7zlQUdj!s+43&=_F_cac$0-&zV`@cYz(ENWgm?cG7h8b3^EkZ zeyTocfQV`o&yq*=lSeUQm>^=9ButS<1rGfSnlb+eh($GqF-=s$6FJ=aX`i>WzqF?S zx>Vk#8#D4qEoZg9f(~fD0e9~3x4Mt{48#43g{omQ@ENA~6j?%coKOX#m>5Wy*j6(V z#SaNM+RsS0mNHl7p`$z)3g?kCGA#ULNphqXkYp5Mh+g1N9O8`p<~MrG{{{rltztD0 z_u!`x^Z1n{ye5k;MnRX!H5Sprr znOS;`?fXAniFmW*8h(L9=d4)nrapNV`Q}m-g8vN7fe9YXum46PV*)n&Gx?k>`B*}^!sIy zeWU??n=pR;4vg@l`f6v~>yyU+{wDzT_1piO5%b&RXPSn2qgNXA{ZDMvFC+MyG4^}X zlcB425WH?7Pe*M(OYLl?wu=8)<*PwU&A?WNWV5C!xno%m^M;q?yVX%5Ko1tODbpeZ zFwfvOfx#q?M7pDRx}(Nm@@g7pD-Kerw%%Yeu-7pbQt3ZVD*f?50{yXmyCW{75}Ykn z8i{lVvBBg;%2zKKez&OJVD$i7-FynY75_2I*TU{9|7dl><1>qk#%Og&v<5Z~rV=2g z(!Ru~GXJRZIAdvH?|3@YPxJZV4=U#WIU1wNAY!A+{>PNg#?qiKHG-)OsEN@Pg7t%C@WM1X^#*c)o#!o*8J2?z%@#Ho%{yh5eUDjP$HMdSD02T1=$yV zwvt;E3t0$m6;w}-ULJi zd}{_Dc0{9?J!it66qk^4u{6QGiXT0V7iRZOpy6WA!qFfJLL4JqDqtGdvhfu|RkIqn zZV0bdCLkW0y__Qlf-cnherZTrwlZbtw}a#L0LN@!K$ZJbdvAiktsfZ}dRAKLB7JX= zwu|JXvq~T@lJYY6piQ3X+3Q%ss?JO4*i zF6~+-yd8bHH|KNRuOj{ahn>s&QI+?plzyp3ylHVf&O)7Ev*v&4RxSAr`M+s)tsE+0 z6Ps}*Ibv?N1zc^K$2?&!T-9OEeG2GwhPlj4$;uU;{C17^;i}pCgWY8ZzTQ@4yn7|< z`nh2JJvA}aujE#c!z%FW%N4<|3+}cp=w;L7z%DJkF9+GLUjCVnp8OYrxl5mxtEH`C z8FS^HO2CIoZ`HfXFTWJYFG{z&zRUAe$45R3w96p1N+?P{RQY-G{_=7l%1UQ>1yE(> ziSqJ9d4&;$+Y0`XkWNGe>ZpNeKB%eP|RCdG0?IIEIAtY?vM> z-V=s+M9mvR#@>Jl8OW1tWyvd8S`NUDEM*J;y5}e;tz3E_bcwCDIF^&l^|0ec4oXl# z-IolrSCdtIQ25l7$C;+@wAam)kog}1!hkQta;RDyS1!P=99mRuz%H|NxHKA&d07Cv zGVKAEGLY^Z9QPvII0z3;CN5K_sBn8n=0kg4x}}UyutqB{e;T!`?v0pr>$a%lxAc(t z){>%>6LXO% z^BA2iYdWJ*i7$cSo)v61e0q*~*>#@gVUX{aO~(KF4J+Bu)HM|MIA|DQBG|5lweRv` zJumKs`arz>72?se2lkP+2Y#0s{ga)J{){y~`YGUg^%s2dc`-0)r^uU6zE@A=)z>W< zo85i<6Fxr#Jm?qrbI{$p3wa-Y@-Cf{Z=dk%XZ9T#=y4ScdEcU?_^MrO4{$yQ$JSrq zX?Xt=aOF(=dhvT;aaKD{7E*n9Ww==$z#NUJnm!1g4T;xetPzDZdvs3m5xmFId(@=lmB%8U$Ez~H@ z*=CUXz!tK3K@M;oZ+q2AcL)%!<0ZR=1PK0Bc(-r??qS9UxQXFz;mEVjU&8q^2onXK z7z)Ord|7hWh&{@;(g$ZAqyec*5`Xti?y*vHP#fse;6@ zu8PM8mFk@*6*vTcR@6E?wb~$uci6MwW+BXLn=6}diXC27cza_X#>gOD+GLTgZMH;K zG|~qr$9<3uyA^2e(MsCvdKY%_$LTOPp%F>U5(l?NC2Bg$ymznEOwbqm1TuEf%RcaH zRBiukcoDYi4+(9bBPS@{W|A>_Vo!cpK*}0Jwpw%x)+i5MK=K@f zC(Lop_tg5#7ayyz&0Eed27~Im_aU^e#mpN+-|s?+=Gx-qXN%$s9(>jM+;C+kc>*~O z!hIF55T%jDEQyct1b!q3#dCOevRCoWj~^m;Uw*x=E{0gZQ~Aac=b4x6h2s0SXh33` z1FDv*5;=}1l73u)1J6qtDhQ`WOrm4VKftJ0>?{rtcuW}v=Uz?mis1z%4s@)|2PT}o zG$`DNf0zLiag10P=P+EI4549M{{%u&@ya28zx^6%pMB0=TV%+=jTyxFIr0#JbgZIp zP=}3_OlyCmhqg-%1Vm*lpz@HjoD)tNH%-AsTX5MSwvrMNfPx9aAB5uK8ARS?&`mi9 zpi;KHoO3_6k`obx18^zlCb$!se;YEA^IibK@jb}>+~BtW!UlHEFL4?Z_B?=oxl@1- z5CC66uopPTL zyixDC<0YF!?AOTA>S`!rfuG#850_~GaUtQC0qL}h|LuKtoywU5Ah&10@g1nHr#8Ry z5zrBm+b@-P1e8K;=zR9O!1<-E9|JpJBglSv#^XS7HHG3)(8En_guU|K;Vv6d&(n*> zf*hp`A}2t|zlXPNX8jHVl8Zfkt?6(+4Lk}`Xko&_^Z>}+f40aszcs{#-Ps(Oy4lXx z!$&eSmR9nTQ(x(rhY!I6ffbJ!ChNKdSz0< zIOS3h1}6*8Pvw(zGwusg$kz(PJOzO=n0VS*?F=ci3@N+V{|{Qo z7*t>xRA{0q&fkejD+x<0pN9Vz{w04$O=JZ#lcA)Q5u>e>2}1dA`=ZN0^iM=O^Aina z6{*6?j@&3prr?9Zb0Xno5m*Hm#KLn!`Cl`OrXi#>?ci&5*mvo7dPK)Jb_U&$on0Kk%RL+Dt}T9m`NFwmAm7J}*cmgI>-l zAV|%FbIwnPg66_M78i^?uZ%@FU;8~^qN9tRwzH&`JgchhTTb)eu;xFsD;@%7S$ebb ze*XsgDhJ&#*CNu+<|vth-mj+ZX+#0vOh_%-Wn48dPsayHKNj@eb))UM=YygJm5qhl zn20%;NLUVP%ICT^XW%VhG}_`%4t?-f&oqx#bm5l*jdTTp-x324}7CMAjENL@)8 zIT*s6{jcDqY%K!0mO)1!EeQA4?0^ct>O_LO61Rv=`*(;2#^RB}w!cbNqYR5n z$oz^Rx)oNW2GB*`A*M&;5V=Gd#$y-S=XER`5O-F>H0~5H_D1wA^l+*Y#gQF8H8QEx zq-~v)16dSuXd+SZoMJ*Vh+N|l$h8P_-3dUDzW{xCt+lA7z5tz#3+QNUOA-0d$-d_j zl{z3jpFS8RdldWWsRPM1w*dLl8qH29=8A0qTKvk43XRl3X#Au2a$ITY3%qHCYfcNyK z`Naq?yau_CE(*+Le*~tC70AF&XJki@wtS$XyqGuv%5ScSfD;oV$9LuS==b(;Uap{D zuBe&z_8@&c9bLBSO!ZU2{x*^?bM%Gb{amG8X}XPs=L5s{t-jwIz4kU23D=YhIWcAP zxBxyK9o($9h)+uyV8R$*)}nkduj--i{t2l0_pKwcJTHw zG^5zhn}b|t^Y#IfxUa9j@$z=EmI&4LMF z!PKA92CPjgYxIfPnS+!804WFuDNugW0zjXhUyKq400?YM(ZE009}ovnmsXFy;eA7I z5}g+mU~G>T|Ki>c3c$M(XjjRqR`lFzK&fVa#z#QC=awS;rvmPd5A=>NCE}+-_*0JK z?n3C>lXoodrn8Cx|8Q1qJnu%xAhE-2DFpGZ$sq$fDiJ2tYC zGrjjaL&un6PVa6%IJ=h@ftVjoA>8SiN!{#nhn>vB4oLS$8=;E@m;oQQ7^}Lo)s%o4 z%~76>qvT~Jik%M zoDy+aQ^o`zgo161Hcje*H6^Ec0Vrk*^Hup-K>t*LlgI`^c%49_7&h@2Xcr`fV^2Au zISP%0W6gc}LB|pDBXQQ>B;wT-Xu=2>^uxD=1WE`I7=L*STv7cyp^Vp5(qqKtRR|(#fzan!n3n=i*NsG-Q$o{`bi}GTQ^7mA^$lfPlC!XL z=OymkVS72SaNv@L_vmdL*kShV(-FcTpVKslAK@d+Kb2pM%~Y?OBrqu~W-c?yeX-HY zTV~?P=N$UL`qdw*DwJb1^cT_7QXiVS$GP!h?eJ)+dJKe#-HtgOSfJFhD1h0u7d*n8 za$bI`r?H(gpW?HV(}+X;*Y9Vp$M^N{MhRyUWbjF9V)*q$Li&&8)whoyzPftQF~Z#87qSYLtF+OuA^YxFG~P6F^yJrarhxeI7OEW@2?6AjZFSxMg7M8x;q@H>xuWQHt_^BbwvA3i; zXEqRgtK;s+f!>Wy4B=BD`c-#-E6;p_9sZ2CZJc4p|6r$l?zAQCq)ips>GSy*!Al-Z z+4)D>Im_R{%HPFAN80^S_8$EI+d|}Lg`&v7T^Ww;C1tbjugggg8P*hdBi;7O} zKi2|PFQB|UZ(96&T9{C|3rW48cw>8_pzu4A?yso_du@G#tiTivDtwWc+-=c9!sfRTRG*~tC{Wd{0rz1;VRIc74jQ6)aa@bvPa@Cg1PVN6715@G4X@bac7m$our zq&fO=w+MPj^Yr3EgG83aBvOyP3a&ZXDuQ@lBym28u*W3+cL~NbGKu26AqhXtvhU*rU&bkqNm~+pR6~a5iWD__Z`iPuw&@f#0`IR7o?qdW?b^&^lNMFr z2H=xQ!iz73XJJ9{x$KxY%o*!I*A(bUh%D*SsOEs%~R-*Z+g6OEv5H6)hP2->fi7y`g-sdf5ALzHfu{mtn;F@YCoh zAE$p=c5a~=@>56aKmYt5X`e_YtL68{SY<5rotbKbyN5C5ExqE1n# zVY+Nf(-vpxd##(?Fi~^G(h%s@876uMxUs^3@|<0zbT3sfdaaLXDDn$)KhFvUNH!&+ zJtf($w?JQn3yU}t>|@q3*N!akS82ts#rCcd(IE5doXy)|owCW4?l4FAE}ko7n{fWQ zao*vu{|{8|WnavGnd+Uh{)i@NaO%>F@S`*SVP_3$xioo(oFYRt@vX_(Z`twLZ@9O$ zorq8COjJ4aqM1H`DS4c#w@{T)m*_yNbcHz%CYll##n7(Mh-N=xT_-zv%cfa>KPNC9 zu1nplob2e4mJ3v%+4!a;GjiTp!-g?;cbk8vc@kffv@L65TDT=<)%G2I1Je2iq{D%} z4z7-qDQ!%un2i}oa#M%g3U6X4W;+l0XZx2Yf&r8j=prK)@VZN_-0@GjO>f znU_tcLgyWNI3bFpb#XSJONRGh_JA@_GY24 zNW6q!sNx2x0L7eKWnSL0#ThG#C`Aw>`Q0uJj4IWILR_y>=v^_sN14>8Lik-#_Pqm7 zBolgJ_9^GlN#MPm^6aD|$v1TcbbVUI*e!}OBZ4S_mp4|I@A`&(LA!dHAndQeT<%F= z(6MLbBrp11V7sHvbGzd26sY+1wb|6dE{+2)jwgPt{f_qRi*p$i3%pgk+#8p*lYp<| zic7ZHoG+L{$Om}^A6V2Io44EtftNSR ztrM-{4{z#&ytv{ieyZ*NI8Nt0_>S4MGhV&Ik55mfB3Hn%o<5@(O(|nNfJQp`@t(fS z!eO;YYX2mY0D+iwO~ZFkf^Q()eL8?U^sym50z^Lno=SyIghRd4`E$mie9A*Thg6}#MjcCr$s0!Vi z4U|TVik~Qpe&Ifo1}O&hI@L((P<_p*0dnsW2p%(&5^?U_cKoZ&x>m*FRQ*sh5fpA4kda-uYjSbH448A4k{Ct@SQXYLJUyN z{XIAk>a{`sNBn(`n1GWcAk?jzE5vizvq6Sb{9mZNj^9x7ej$=ysQ*+oCpgTg`o5_G zl~#;E!+Qn8Ua9y$Q5o;l#E10yBm8uOho7;7hk<0g@dz~__v;=LUa15=Qz-A$I*@ze zY&E6dUqL*-gllXxyO3m0Qot%qT8f9OLGWve735xaS0#(pYLx#5?!7=m)H%<0Qrq&J zvE0;v+|(@Xb*Johr!d~sB)i*@oOHJf-p0PTu?Fr$cQd*pPIzZ~OvYE}iP|{V>uJcV zY9L;3PX zN01d#Czx!73fr`@+UADp7@;-~>+-t}b3vVJkrb<~18P$pcl1dCsa3(z76NJGZ3e0B zBjZUPywkqkANUJb>)56|Q6UAoOX)@zeRQl~v{~coAi4o-!L)_aMipFSy;@Y*u0``%Es?dVA<(Zy)1AhD z!IY)GMN@3O8mwA%^7lfJs%0G2Hy~AX8jA$dQpVnXOW4CHtPZLI_+46vXQ}}S{*5`B#(;#g;2?B{7=Ym5MR9;JZ1rPr1d9DM^jq?u}?!sla9?xE# zZqT3`B<#tB$9WX@6&oM~mUki$)_(vD6$+a=^qO%|@pcJ+F1A{ z^8pXz{UW+2rT_qV)hh7e2QyHZ8nasvll%FMz1O-!{&B@7>H85QaJOLxvSYKf*BP_h z8N;|^ll*FH^z};yUbrR2HQkW5`Lb2i?#Ku@eAxosv7WX2vITf%8L!-sMtEkO$L#nr zg}1I1p^rqz0`A*@-LX~;ZB)y3X-3}t=UgD#{f0G2w@oA5vuaWUVEXi=w@ZUDu7r0yD1TK~T4srVKR!55mwxov`YZ%ER2GQn3Y=fz3oZ_d2i zEswGAzZ5#x$Qd2Y>-&}ETwM3%MI^#VSB zMZ`#h|8^C%&>Ro`LyXBK)hR*KaAVW-)$jHd(l#xS$SHWVps5-pSjJjP~W4m2y zBOE)@Gzbd`C+h7Rp5f>wT+FO#T1lGIOcC)=$e%i1DPb8?edRyonh6(IlL@T2YsSS` z)!J0Zz4{_|t;nR^8E~z??f-)G<$q4viSk#q6_T>B{nhdIG z@>}R{uvVrr)?Zi1f)b=~AKk@{bffkXlg8VHSvyQ#Epww*Nu)wi)TODDc!zQm_OGo2 z&QJTlUL%R6uKMwAbIlH{V`Sq5tx_^G%UFjWEMVvVp-XU#ZFW5- zx9Ya7(%fi4qnT213QTvJt)m0#datt7T=d$H{CFQ?OIdf#L|Wtrd@Pin)`@pR*BhNn9dBu7KA=8CVmfEZ5yL(705pZrx&+kOD_OF zrMW<=XNq5($gf5q%*a-ri~)j9S`kt^1P>%|DJ(mav9y7Ru`Av$G&$PexoUT589=`K zUuae!-?o=;(>hzGDUvt#_K&pV zUlff-l5fF&Z^)pVgKft(Vq2NJPC^F-79QOhSY+u+af^nm75#AM!V9&co8U(`<~yv%ha)!sg)!{Gh5=!ynSD@Z>FQ_p8?)@zdX3Nmw?X?o#Nnh%pExBo|{E%EktIijy zhckg;-Ku+8V)c`S)9VaV7Ve5jDjroO~069c1fM^{02|P}K zUA*|FFgWo(p1uGtX_l@H4W_QEobsXvf_EV}#J@S~S8x@PWzT6f5M0l*1C8>k+DcqckhKsac2L ztO`s2f}pZ|$eh#W!P-YvF@Qr~2&bUK%JHXZlH}qJRTJSckKqjBa)ho+D!^ALc~H3S z5dMr4YCa1x+*Yh*2Sxey4RfJN&Vg$uAR}BIBd9;b&pf(9%yIsavkUt};UMnXqeqc_ z69L8Cx`(=<-VJpew6ivl3BSG7X*)wv_qxmDvB9FRYQ_6H7zzJP?|mD#<$c>r@c9n? z0N(Ms`?0eRkKZK`P{U~x=Wi_rbQ#14#5RH`$NNfb--DCjeJh5N(b8E8zkSFCZMLG| zeWx%C{*y5K##W=)&jj^^qRbDNsOm`H_lK!Zi4o;s1f?RywVR6 z+O(`6hF4uL`=*(4!+Gd0e!m+G@;3uz3t-RfGp?@3c?2A;Zr?KBuBFtquVPAoQFs=9 z(WKB21|D?m>@$YG_tdJH7fj+;dNm?W(w`58+MlP${%wlRtc-20qxu`$ZDf1bT={3& zGqc^}tExB-=uJhpg2y;Ry;|`wXe|mXva+-&hvt{;&E^zfb`B#h^HxWmH^2KpmLC; zL2;8T|KS>ju2}|`yRqQkZN9+{%LW6HtwLbGf&+ZW-M*&D80(HwW8sZ?Z#lg6Mj&v& zif69@j#%^`vcNb8In9V+Aq0m1gw|}7IV3x&e5tx+0P(Jkb#NJb`Yx`|#(1dGG(vJQ^dKem1hQt#l843f&)f z0N%mt0D;kBeuUBX^EpdA!|Nn|3Z=FAopy@e!aqT?UEx!rvM#a*K`8n8l*Nj8>yB8< zz|Dg7H#lKpU7N^p9q5W)6;9sIw53^HMteQ}w7NYRD5zX~V?N*Ry|M_BDv&Cuszw(2 zC}@@{XCAkUU3kp+T+cHNlqdBe0IR;#G4esg|BCUt)O_;C>J{K6-ZLn`2-S*xC@#RQ z#FnV5Mvf{~P`K56ZFtRc{Z^Wp4fgTp_^oDXc=>eSbZ$R=bX#4ubeigyfi~Gjv1fXN zxoYmdXd0t1X4lEzXLy7*zf^&mSBLl#wuVU0(jfKu3Yn^Ta9&obdf~P!A&f@_ipQ-U zkv4{Za$`f$7H>x{ZKTjX;0UNg881JJDs^GZ75!CC!;KT(Prx>}LIsU@L5el*ILmqT zZsZ}paM1FUtmIkb5@rL3{B&Y9ESD5zkwGecyvBK<)&Vx332Q!vuH<*(1sSHb&F9F2 zu;c|nWtv8M_sSY1@g-+iOT9`I={s@xco^qRz)2I*HYc5(ko3Z|F-Y@`AXF;akB=hh z`eSeQ)%g={_@cJ=TsL8MyfP7Fo;5?DIbUFACo$5>k6_e!$tXP{9LLRa9Z_r{jvd?F z$wU6{MKo=Oa}=}D^_kzv%Yt1fbrW0;R@p6-+5`a)qcD9AsGsL8@?|P`JMpPaSS$|T zl56lNCuj~0iU?9aO@Q(`{>vL#(N2YP+6kl|mo^v+9cr74*x0QNfzvq6d|gvS1eyfZ zhf?Wqi!-+>$+;dv$0&{07I(K>xmYvd+&4iV@obr>LhZ`$%!XmTK^UaR)Aw-ArA8;WW zF#_d3VHhT{#>wrf78M<7JIck9r9@UP<>H-s#E#fzwFLJFt`bl{@CM5xMXq*uzt0U} zW`p0Gb%VxBgeFL)GQW@4_HWYeM~5E=B{xeePzHnYoJ8#)gdYcnA6Ggd^f-0IJ0kQ5 zI3N^FP6sDBwz!SwHm8rfkLNDT=avx2n-uC)H8XEDhra?Ajmxlx97`K}0KFUIaK*$+ zrl7_r9i(ORk}FCm&p7*xGWqLlnJ+>w?}~|u0!$MafL4KNjtJ8nj%AL>GFK?eI%gt4 zKL@fjX=9Ml6F?gjKwAXX5sh)mIL=S^1RiI3cEJ|6P8n3g=G^zs+ot{qpOq3U$S_Uc zJG>GDrcymd05wPeRS8UGQwKwt;dsr^223Toy?2!?h!QM^ycDJ~-9N|>5~quTsoWbE ztVJ$Mz~z(7`i;9D$lVURW*I7%5XmRSC2 zMfua3oVcl|lBflhs3j?3Yhe%OMuf|U5-S71Zh&NW38e%tS$ z2X%+7*?gyTj5`dj3r^~j6Z*V0^y&igqE|L>#9_P_`I2alj=5>Q76Q`0btMU5SkghD zDu_T;L7<8xP^}Eo@21rQ)?|{_L*xN3y}1jU{2r>dxjXyEwjSr!qn)44Q*$^bH@E&6 z>E9e(1RZ_E2lvWRrmqt=&koAr?~;>N;Jd$j{(+|Yu7||k=JC_Lfr6ZiM@?X`DtN{x z!vpp6l2X8=?1VBpRwdBu7YXfuqeY`tA#m$w?O2mlmxb+%O@q4Tx-mz~%a)T>RfOxy z&K@o-I=VVru}8~)hd}>&Kv=m7m4b_gSI!o?Lsr zH4&?D=HWtK%FZ$lfv=LtbMeGj<538C=S$P@%7?!4W5&QIdkll8p60k$62dy1V*xru z#~(Jp-Rq}C2&g;gf_qdeC5>{98R4dkc3dOTQ2_UlFUF=N8{m->-z7?O{cI+{P){Jx zQ54-JG6IcFc1%Pz=>_pS@*U}`9-ULo9GyCH9H}1l8%Q6?8@7WPQHFs{Ql!$5K~b(~ zqdgvq^gt@lrGmzkyYLjZ==x~b^MJJE(n8mg1#05Yne@Y&PlD08|DXfitQcZxQ_(@^ z+OoB+ha<63?w+IKKI*UP*nxDdkKqdH7ZQD!ufo=`0_oZ)2dQBL*0CzOwZJga5t`(}~XHudSr7{x=& zJ@z95n;W5Ww@M5+>0BI5PoJ)suG6)etwr-TNF^_nO&7~$mGI&^<*B~rj?cYbvrl~f z70lQHGx60boRXBT0P?9jEsp_1RUfT^_$L|!6&y*q^v{8+K2}R$#ewRS(^iD*pTkBY zuB+7UKZh@e&PYV-07UC~z0s{emrWXHTsLS;7_H)d`wky1efM}fDy$ZjhAAA}dA?GU z44tsD4)5#mn-P7MuD&oA=Uo z*wXfx%wtx=%#A;b)KP-e210f}t(KDw!7ZE(!T#0^XFrUI~FE4SbY7$0!;I;&`;i`Xibw2sJ(GcC6LwjHcKf~3uQ80Xkl@N3#zvqg@ zu`0{l+`VBDE(fkMC!f+0TA5!!Sy@oIYu#KP7+T59tPBdPq5!Lc2&?kt#iDIhe+}Th zUeE*41>7HqtK7ylMsypiH*5JS3t?E+LEtKgz*RxuiX?FPs1EA~Z~CFDuQs?NR&nDM zGPzB%*Vqem_N8Ki?t?vO(kZ`BOQQYH_g8=`X!BaI*;=iB?27icL2`x2q^rP9r+KqA zdQ9rodG36prSN3sa*@e$I)0@^b2TUXFQcy4a$~N+LL{}!tf?x4A?1=5oq8qO>slo? z=~^V$Y0+%WcD87H10aAc_JY@JBsU5=_@eKA1F7n-4O0 z<^~(;7V~~v+kabIFxW}TKG+F%tajk#zM@aQG`FWC6e=i_hO_N8U)Oy;=tcM5oCf!+ z^DXC9!pqVTw0Gm0qW=mmN&g89GRPPI5+|WkC$eIN8kbmhpg?Nrku1)To3uan=nvC! zOVMK3MF7EDg2r2-I$gFjP`0F)rzqtkX+wq&-$2Lyk@nmV_k4Dvw;r}vA9t;{E*3h- zYr(E8{*i|E(d$>?WEATX{&yLa>{va_Tq{Sjxdv|jFDI5w)kS{#fv>oM->zY-XdveY zVaS)I;wn7FWtrkC{#1HVLhN$D_p-$(&IJNo#RH~Owa`>U>nI6)2Yc~%M>zdSfO zBR4;Y3qgKX%6I7B&uOan``$nA_k_n?@JFYU8LkgaLF9M&fu;8@`2)zBvYb$|9W2?L z5bQ4W>UIveAlaPdU}bPXvbe?e=L8{2;GJY5Tp*FI;yH!5fkIru6zc`M^36i^eL_7H z>x8?lLe2zuM6Xxz!8?)zDf9JJRdX3}D4Y+7r&D4`ISs`dd07SOr@vA`ox+9~su$k4 z`%wsBYq)1hjNYrOLcF0Cl|HJf+I428or#N~aIUZ>G5RfjKsyhR_`n7Urr$2aF^dpP zxJ@Uf0pV1=f zj+MZ9Kn8qUA<2mMYFFjcvBH@jHLALK`y70#+-5;I+;9A5>vSwoiOPIOmqO&ZLf1+# zzkOrkA9zm^zr0};xAGJ}l3%msNb(*u`0PhsJm04=dm+<;9sX!*VFVONiREt+uVO8& z72iRmcCQY9IJt=FXN#3bzdht8$(Q6`oo(M=LAEzBJ@S|{3H;-bV_gB91O&WaW?tpjumx6-2R3L|+vu?=;>uofLs{UgoNTH9P+UF4a=P$Q54->>&G65lBs+Zp2P`h=I>#;CD*l%f;-R%EYAc=6=dRbZ=Ax z3|&dI0>{<{46VnHqvsBa#!D6uG7AvOn*A3JVkFv6`8rWk(^Kjz>f=Yo@a z?#{)N0zdiStRGp%2`A^W*;>Xai{hxQ0I`$$?8ctZTa}%>ACbE~5M&2AAwgl1(AyrQ z*rQA&q({W~Xi^oV_#NZ8R&`C?peMm$~$G#^$ytUknqiK)-GhT<0%&ZAW8A2~ws@)Xg0YQYZcdz93VIgU^*&WlZQ|1(SePB}p#@ra~( z$j<*VxLs4o=9JV=Rgjb1GUvZj|53&{ql|+6*Q{?R-T8%EFqz~C)&z!~rJfvj++zR5 z(-n)S9QU&MvPEaIZHtAn#Z%;bMLR@F2YB`%nJ|6;9)Ey$DRv6BJGQW-9tHaawmylS z<^hQvATqy6jUX~v&llZ6_6k+F8M{Tu8Fc8YQHdQuG9NMfg)hywF}C$yv41!17UCK= zixXI--pilhho1olP_x-ReHj{oS37`dK4d_Oh-t&zg*i>LQ_36J1rzwqTv%5{(Blw zFte`c`tD$~SJABW+`z3KBiIcV6r)z}StGyZQQqroV&5dKIRCRmpl)y1~HdWhnF{ z0Cu*@Xz-yg^n`+y8z0Bo9naKln~jwlKFmJXaPYx+H7_mnOUxi#tJmh4%XW*X>X8dK z#kaa5UZdc} z%l79c+31ZHU_@J916bOVbt5%WjOcC*RvAR{SE3M-jE?!$)nUaG_f!nxKT zlMiI@2|LESDAH2{EMCb7%mB%*byGPLmVZhxIf_AyLAUa2;L9%D*Gn?w=ZUhaHa!Phx%z<=97mFr6 zPvb1cZ?P^W2Nsg9nxW7Ukm6lpcV%hAMKfnhl@0w& z2z~#K8+VwXP{oQEfVUp<*;6*m?lW-XU`;!hdc{q3iUMLhc z3b$(BLANi^htC{#?B}DZTR)1%2T@{l0QGv2vTb73`c~@7@1Cf3FVx8OdJ6|vI;VWw zqtYelEFYet{7=xk-`+)5w?!sa=#X}O=h^gL9-1DehwIj-SG491&_e3y=JqZ`MU|n+y`XoX5MU;@fx4CpMc1?lR?sBZw8%~D z&T{Rpy3ElixH`)fT%R9e#9fqJbBQ66b5u5fW$r}QG!*f<8T?sQG2ixiei$p7moK?~ zS~I)p>`Yhln=<+lOl}BLX2KR$MdBKp#B4{BY$s-%yeipt<)c*Y$SZxgG329`z4$8e z4`5qX>)?qUxKYQy1YY3)s{XQN?lbN7E&TUAZEk^@N4xPu1q=g47y zBU>3~bSt^_IP-qTD2U15ew*P>M;RiyS!QVPTW8j}la*H?>)jr-9}V~mo7`!9J2d}F zoOUZ6r!u+Oz=5N6G#|8|7YT~nJZ#|vIysK}YNSqzYTk}#w(8U4G=EwC|^F zbF=E&l8~>C+ok3XNK7+Mda4JVU}4Hh(izr{{^om2hpAYLd~GwkK@LsO+GbAXsd+ym zIkgtIgXXt`o-?-sL|6DPYzFSmZ31EF2H0h40i?xZoPd1xP(ZxJV1Wkk4V^M%)n52QL)RTO8z$hjiE}|e* zh^-rmX9<@ek(XM55TFC8qFT~#2~L5AB%h%ng{rN-Nw)^MYNH5Xh@4BeLO7Ba7WIaY z6^sR)^+4I(SAs9bI5X)0$)fMgixi%mShFeAH-&-A7{T)$JI~0rJX9q0Cpz#`B#}fT z;VIZPKF7kzLBz37+kuf~UI|GnFzjT}5F(qtxqGw@7(?dmq7!>7{FCX>&0fnRM_G~2 z=9oSfH{+pY6$>K`PYV}*P?C${&-@{cQtUd>DiMSCA}1BA4~}yxHCbg;L74@`deBQH zKMODz-(Ma%JX$!@&4$3v$N22T*H=ALU9x!CWOia^%#l-x<3i12S9G$arjtP>+p(ApM*=Wn z0M{+k5U)OXT@jZRJ??c?TU*~zepKp|dma^?GL^uzhnSjZ$ubg73L@%su-0cSPN>ij z%5KhE?0e~HJ?Pl$CtC9H!b(Q8hH{&(3MvZ292RkU;s%CIvt)~1aU0~Qj9J2%+9ZCN zx^&a}Xiyzzhr#K~BkKte-z=?9DlC#rqv8IbreGdT^?U1b0*3{={Bv66?iKZJSfvN& zM$CqZ#j4ZF&~*g*@MqDd`c z&UC78vc63672H#)IV__Y5gjKVC2tX#DPl7RDSVfXre>rYN}(oeNhkH^e2f}2*5B}P zuaP@2s+Bh(=Vq&G)QORz)Q?q&a#pPSBODqUn6*Q%uWPZt0=`UdU1&EG(s-FjaS8}{ zcR}OpNqeEU;Ut8Y$F3y;g07`Yv4OG{qm>9&bPDvHZ_^O1`Y3|=%;rIQu$g6lAEv{w zpHO*F+!{hAWIox03yONGBVa#Ed)Zr^Qtp4=>%o2$tnGzEX*_7Lr)im=-NX4}wh#>S zTj0%GSU)k@asJiY30C_IJ+nS!0UCO_FmkDHbIdrWz5&f54>JA?*zL!2C{Q`AUK(F- zs&Av^n$S;nitDqv5#DrbdW5Wv)27~`etUDXiTW}rrruiU;w98mvx>Xt?ZyS`A;O5S zs!@wuTj4)CO_?9_-Qcojx72CZKgZzpWp?N6Y({>^E>jk@MsJKpDg4kq5`a@u$unoJFie3V)7}v=I${(C%Sg0sn{UdjiiP!?4~h5{O@6+P@{(HbsE7h zbiWw#ATc*PbBM8b_F;3FapfKxV)?{Xmcc1*hb#t{kHbP83qQt%d!?KDgY46^x;m-% z_6KZi!+wzX-|Y`^Mr`Tejijx$Xe7-f*cT4OgBaiBAPmfJXRU4w=G;LqGcfc&LDGM# zt<|gz4KK%53mS}+n2uN5rZa04_AgtQlefejzvOBJ?Q;4E)gpOFT&-Or2Xk|+PfSQl z%7%E44DHE$yS7-^CNugczVqZSSPa*^-lpySCUQXl2BoqYT?8*zVMOVU8!A_SVBUu$ z$Ez=c3jTc?uKnz@DX#g=QF_rEkvB2fSXe~o4#_r_%+4GGB%*$D%^N**f*?+JW0-cU zQhVa|M!-wY6A#aKvrKXFQ+EXHfK;C*RG%Un{4;Mj&9-+%t}krY(?YZDmZg5@@g+v( z(WyG+D&=6p8Se_Nn9g= z+Lb$A!J0P7FxejsbFwcE1KtnyhOhMJp#KNHvWG&0SU^cwlt55gPDqMCLRMHr9g0Ll rKt`H?ftCS^hK7bfi=Bgmnt+3qg_?kYo|XCclYxmt2a1-K7V7^27yL-# literal 0 HcmV?d00001 diff --git a/role2vec/tests/uast.txt b/role2vec/tests/uast.txt new file mode 100644 index 0000000..280aa45 --- /dev/null +++ b/role2vec/tests/uast.txt @@ -0,0 +1 @@ +role2vec/tests/uast.asdf \ No newline at end of file diff --git a/role2vec/tests/vocab.txt b/role2vec/tests/vocab.txt new file mode 100755 index 0000000..47e21cf --- /dev/null +++ b/role2vec/tests/vocab.txt @@ -0,0 +1,539 @@ +RoleId_18 5226 +RoleId_1 4165 +RoleId_85 2939 +RoleId_49 1908 +RoleId_45 1728 +RoleId_41 1095 +RoleId_47 1082 +RoleId_4 863 +RoleId_89 833 +RoleId_87 774 +RoleId_2 719 +RoleId_86 666 +RoleId_48 624 +RoleId_99 568 +self 530 +RoleId_110 349 +path 344 +RoleId_6 328 +RoleId_7 326 +RoleId_3 321 +RoleId_19 306 +RoleId_61 281 +bucket 262 +blob 228 +RoleId_105 211 +RoleId_46 211 +content 200 +assert 175 +model 154 +RoleId_50 147 +RoleId_80 135 +RoleId_94 105 +test 103 +RoleId_11 101 +RoleId_107 97 +manag 91 +RoleId_109 90 +RoleId_63 87 +name 87 +RoleId_62 87 +checkpoint 85 +return 83 +equal 82 +exists 78 +true 78 +file 78 +RoleId_42 71 +get 67 +RoleId_79 67 +RoleId_96 63 +nil 62 +RoleId_100 55 +false 55 +type 50 +RoleId_35 48 +format 47 +RoleId_43 46 +RoleId_68 46 +the 45 +delete 45 +RoleId_103 44 +notebook 44 +old 44 +directori 44 +RoleId_95 38 +new 38 +RoleId_81 37 +RoleId_21 37 +other 36 +dir 36 +list 34 +RoleId_20 34 +RoleId_5 32 +string 32 +raise 30 +RoleId_83 30 +from 29 +txt 28 +not 27 +save 26 +base 25 +google 24 +for 23 +upload 23 +blobs 21 +creat 21 +instanc 21 +last 21 +modifi 20 +web 20 +RoleId_64 20 +error 19 +debug 19 +storag 19 +rror 19 +text 18 +RoleId_39 18 +folder 18 +max 18 +httpe 18 +RoleId_91 18 +isinst 17 +client 17 +hidden 17 +ishidden 16 +mimetyp 16 +parse 15 +default 15 +cloud 15 +args 15 +applic 14 +RoleId_82 14 +class 14 +param 14 +size 13 +log 13 +RoleId_93 13 +json 13 +parent 13 +nbformat 13 +rename 13 +unicod 12 +RoleId_27 12 +cache 12 +RoleId_17 12 +RoleId_71 12 +endswith 12 +RoleId_70 12 +fetch 11 +throw 11 +errno 11 +create 10 +RoleId_26 10 +help 10 +config 10 +ipynb 10 +uuid 10 +startswith 10 +obj 10 +writabl 9 +RoleId_101 9 +result 9 +none 9 +will 9 +hook 9 +xdirectori 9 +pickle 9 +prefix 9 +RoleId_77 8 +isnone 8 +ofthe 8 +updat 8 +RoleId_15 8 +RoleId_78 8 +post 8 +acheckpoint 7 +dict 7 +dotted 7 +project 7 +read 7 +RoleId_30 7 +hide 7 +str 6 +download 6 +messag 6 +islice 6 +broken 6 +jgscm 6 +bcontent 6 +gcs 6 +ospath 6 +found 6 +pipe 6 +this 6 +utf 6 +blah 6 +data 6 +request 6 +valid 6 +datetim 6 +encode 5 +files 5 +asstr 5 +url 5 +epipe 5 +set 5 +delimit 5 +member 5 +value 5 +raises 5 +staticmethod 5 +info 5 +bool 5 +except 5 +such 5 +afile 5 +ofclass 5 +with 5 +run 5 +python 5 +nosuch 4 +blahblah 4 +cls 4 +languag 4 +program 4 +saving 4 +untitl 4 +encod 4 +reads 4 +builds 4 +bad 4 +decode 4 +version 4 +slash 4 +adirectori 4 +bytes 4 +tornado 4 +plain 4 +used 4 +licens 4 +kwargs 4 +sfor 4 +tuple 3 +notebooknod 3 +servic 3 +gsclient 3 +xipynb 3 +forbidden 3 +current 3 +ascii 3 +should 3 +one 3 +dumps 3 +tmpl 3 +stream 3 +keyfil 3 +anotebook 3 +generic 3 +you 3 +RoleId_52 3 +replac 3 +RoleId_34 3 +ifcont 3 +len 3 +output 3 +isrequest 3 +which 3 +fold 3 +main 3 +force 3 +node 3 +and 3 +unescap 3 +author 3 +rsplit 3 +vadim 2 +sfrom 2 +develop 2 +apath 2 +github 2 +restor 2 +raw 2 +tokeep 2 +traitlet 2 +copy 2 +beencod 2 +reason 2 +jupyt 2 +part 2 +com 2 +socket 2 +djgscm 2 +non 2 +provid 2 +fmt 2 +anoth 2 +sys 2 +togcs 2 +ifbase 2 +requir 2 +execut 2 +exc 2 +paramet 2 +form 2 +gcloud 2 +two 2 +any 2 +bydefault 2 +setup 2 +src 2 +RoleId_51 2 +packag 2 +alist 2 +keep 2 +tothe 2 +key 2 +atthe 2 +descript 2 +wrap 2 +decodebyt 2 +isnot 2 +nofile 2 +while 2 +int 2 +includ 2 +append 2 +convert 2 +check 2 +bedecod 2 +case 2 +asbase 2 +attribut 2 +asutf 2 +popul 2 +classmethod 2 +amodel 2 +mit 2 +ifyou 2 +iftext 2 +decod 2 +orbase 2 +anon 2 +ifnot 2 +donot 2 +https 2 +can 2 +just 2 +adict 2 +split 2 +exist 2 +octet 2 +properti 2 +cells 2 +join 2 +state 2 +may 2 +sub 2 +mime 2 +encodebyt 2 +gets 2 +ext 2 +mixin 2 +bbytes 2 +splitext 2 +only 1 +asingl 1 +thereof 1 +relat 1 +single 1 +mark 1 +dot 1 +root 1 +sown 1 +tochang 1 +script 1 +unhandl 1 +call 1 +tosplit 1 +inwhich 1 +super 1 +where 1 +inside 1 +asconvert 1 +rfind 1 +interpret 1 +bepopul 1 +ordirectori 1 +use 1 +process 1 +ingcs 1 +try 1 +beused 1 +itexist 1 +redefin 1 +end 1 +snew 1 +writes 1 +always 1 +same 1 +called 1 +api 1 +uses 1 +revers 1 +ralreadi 1 +pre 1 +sname 1 +RoleId_44 1 +nbconvert 1 +jsone 1 +ifformat 1 +ifthe 1 +repr 1 +desktop 1 +splits 1 +count 1 +into 1 +approv 1 +ifunicod 1 +upclass 1 +ajson 1 +unexpect 1 +importstr 1 +ifpath 1 +setuptool 1 +level 1 +reader 1 +tointerpret 1 +serial 1 +jgcsm 1 +miss 1 +change 1 +items 1 +bysave 1 +failed 1 +double 1 +asunicod 1 +isunknown 1 +interact 1 +start 1 +when 1 +beeither 1 +time 1 +callabl 1 +itertool 1 +own 1 +instal 1 +iffals 1 +behandl 1 +librari 1 +indic 1 +status 1 +noconvert 1 +sort 1 +needed 1 +faster 1 +either 1 +becal 1 +someth 1 +isneed 1 +tear 1 +via 1 +unknown 1 +specifi 1 +open 1 +RoleId_98 1 +alreadi 1 +classifi 1 +consid 1 +html 1 +handl 1 +oper 1 +agiven 1 +retriev 1 +empty 1 +orhtml 1 +ortupl 1 +tocach 1 +down 1 +next 1 +acont 1 +offile 1 +names 1 +tonew 1 +extract 1 +saved 1 +pick 1 +touse 1 +greater 1 +ashidden 1 +limit 1 +nbclass 1 +agener 1 +but 1 +was 1 +namespac 1 +ascript 1 +intern 1 +spath 1 +defin 1 +orimportstr 1 +loads 1 +update 1 +email 1 +stdout 1 +metadata 1 +object 1 +astext 1 +softwar 1 +users 1 +intend 1 +otherwis 1 +context 1 +structur 1 +readme 1 +toprocess 1 +apart 1 +cell 1 +dirnam 1 +unittest 1 +ifempti 1 +sign 1 +isused 1 +ofcheckpoint 1 +like 1 +toopen 1 +asvers 1 +keyword 1 +ondisk 1 +ingoogl 1 +onthe 1 +isipynb 1 +disk 1 +markovtsev 1 +explicit 1 +must 1 +common 1 +osi 1 +topic 1 +whether 1 +RoleId_111 1 +code 1 +iftrue 1 +trust 1 +ipython 1 +errors 1 +ofnbformat 1 +wtf 1 +input 1 +RoleId_84 1 +ifdefin 1 +collaps 1 +tech 1 +sourc 1 +nump 1 +given 1 +account 1 +escape 1 +audienc 1 +alpha 1 \ No newline at end of file diff --git a/role2vec/utils.py b/role2vec/utils.py index 84226e2..c71092c 100644 --- a/role2vec/utils.py +++ b/role2vec/utils.py @@ -1,6 +1,5 @@ -from collections import deque -from itertools import islice, tee -from typing import Dict, Iterable, Iterator, List, Tuple +from itertools import islice +from typing import Dict, List, Tuple import numpy as np @@ -19,36 +18,6 @@ def node_iterator(root): n_nodes += 1 -def consume(iterator: Iterator, n: int) -> None: - """ - Advance the iterator n-steps ahead. If n is none, consume entirely. - - :param iterator: Input iterator. - :param n: Number of steps. - """ - # Use functions that consume iterators at C speed. - if n is None: - # feed the entire iterator into a zero-length deque - deque(iterator, maxlen=0) - else: - # advance to the empty slice starting at position n - next(islice(iterator, n, n), None) - - -def window(iterable: Iterable, n: int=2) -> Iterator: - """ - Create consecutive windows of elements from iterable. - - :param iterable: Input iterable. - :param n: Window size. - :return: Iterator for windows from the input iterable. - """ - iters = tee(iterable, n) - for i, it in enumerate(iters): - consume(it, i) - return zip(*iters) - - def read_embeddings(emb_path: str) -> Tuple[Dict[str, np.array], List[str]]: emb = {} roles = [] @@ -65,7 +34,8 @@ def read_embeddings(emb_path: str) -> Tuple[Dict[str, np.array], List[str]]: def read_paths(fname: str) -> List[str]: - paths = [line.strip() for line in open(fname).readlines()] + with open(fname) as fin: + paths = [line.strip() for line in fin.readlines()] if not paths: raise ValueError("Make sure the file is not empty!") return paths diff --git a/setup.py b/setup.py index 032a857..2a179ed 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ }, keywords=["machine learning on source code", "word2vec", "id2vec", "github", "swivel", "nbow", "bblfsh", "babelfish"], - install_requires=["ast2vec[tf]>=0.3.4-alpha"] + typing, + install_requires=["ast2vec[tf]>=0.3.4-alpha", "scikit-learn>=0.19.0"] + typing, package_data={"": ["LICENSE", "README.md"]}, classifiers=[ "Development Status :: 3 - Alpha",