#!/usr/bin/env python3 # exclude-prefixes: exclude lines that are prefixes of other lines import sys from functools import reduce def exclude_prefixes(lines): if len(lines) == 0: return [] line_objs = [ { 'line': line, 'index': i } for i, line in enumerate(lines) ] # sort lines in reverse line_objs.sort(key=lambda x: x['line'], reverse=True) # exclude lines that are prefixes to longer lines in the list # e.g. if line_objs[0]['line'] == "The cat sat on the mat", and # line_objs[1]['line'] == "The cat", then line_objs[1] is excluded. def is_prefix(line_obj, line_obj2): return line_obj2['line'].startswith(line_obj['line']) def fn(acc, x): if is_prefix(x, acc[-1]): return acc else: return acc + [x] filtered = reduce(fn, line_objs, [line_objs[0]]) # sort by original line number filtered.sort(key=lambda x: x['index']) out_lines = [ x['line'] for x in filtered ] return out_lines def test_exclude_prefixes(): lines = ["hello", "hello world", "ding dong", "dong", "ding"] filtered = exclude_prefixes(lines) assert filtered == ["hello world", "ding dong", "dong"] def main(): # read as lines of bytes, not requiring utf-8 lines = [line.rstrip() for line in sys.stdin.buffer] filtered = exclude_prefixes(lines) for line in filtered: sys.stdout.buffer.write(line + b'\n') if __name__ == '__main__': main()