A multi-label classifier, part 2: reusable functions

project
nbdev
Published

March 11, 2023

Modified

March 14, 2023

This project continues on from part 1: collecting images

Introduction

In this blog post, I am collecting some reusable functions that I am using for my multi-label classifier project. So far, they are mainly related to the image search queries. I’m exporting this stuff to a mini library called ucm.py using nbdev.

Useful imports

import sys
import os
from fastcore.foundation import L
from pathlib import Path
from fastcore.xtras import Path

Avoid warnings from tensorflow

I also added this to my ai.env environment file.

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'

Collections

from itertools import chain, combinations

def powerset(iterable):
    "powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
    s = list(iterable)
    return chain.from_iterable(combinations(s, r) for r in range(len(s)+1))
def seq_diff(s1, s2):
    return L(filter(lambda x: x not in s2, s1))
def test_powerset_and_seq_diff():
    labels = L("bird", "cat", "dog")
    for comb in powerset(labels):
        print(comb, seq_diff(labels, comb))
        
test_powerset_and_seq_diff()
() ['bird', 'cat', 'dog']
('bird',) ['cat', 'dog']
('cat',) ['bird', 'dog']
('dog',) ['bird', 'cat']
('bird', 'cat') ['dog']
('bird', 'dog') ['cat']
('cat', 'dog') ['bird']
('bird', 'cat', 'dog') []

A helper for search query text

import inflect

p = inflect.engine()
def join_a_foo_and_a_bar(comb):
    return " and ".join(p.a(x) for x in comb)
join_a_foo_and_a_bar(["banana", "apple"])
'a banana and an apple'

A button to confirm deleting data

import ipywidgets as widgets
from send2trash import send2trash

def confirm_delete(del_path):
    button = widgets.Button(description=f"Move data to trash: {del_path}?", layout=widgets.Layout(width='20em'))
    # button.on_click(lambda b: shutil.rmtree(del_path, ignore_errors=True))
    button.on_click(lambda b: send2trash(del_path))
    display(button)
mkdir deleteme
confirm_delete("deleteme")

Logging

import logging

def setup_logging(args):
    """ Set up logging. """

    # get basename of program in upper case
    prog_name_uc = os.path.basename(sys.argv[0]).upper()

    log_file = args.log or os.environ.get(f'{prog_name_uc}_LOG')
    fmt = "%(message)s"
    if args.log_level == logging.DEBUG:
        fmt = "%(asctime)s %(levelname)s %(name)s %(message)s"

    # if a log_file was specified, use it
    log_file = log_file or os.environ.get('CHATGPT_LOG_FILE')
    logging.basicConfig(level=args.log_level, format=fmt, filename=log_file)

def add_logging_options(parser):
    """ Add logging options to an argument parser. """
    logging_group = parser.add_argument_group('Logging options')
    logging_group.set_defaults(log_level=logging.WARNING)
    logging_group.add_argument('-d', '--debug', dest='log_level', action='store_const', const=logging.DEBUG, help="show debug messages")
    logging_group.add_argument('-v', '--verbose', dest='log_level', action='store_const', const=logging.INFO, help="show verbose messages")
    logging_group.add_argument('-q', '--quiet', dest='log_level', action='store_const', const=logging.ERROR, help="show only errors")
    logging_group.add_argument('-Q', '--silent', dest='log_level', action='store_const', const=logging.CRITICAL, help="show nothing")
    logging_group.add_argument('--log', default=None, help="log file")

Redirecting file handles, such as stderr to /dev/null

import sys
import os
from contextlib import contextmanager
from functools import partial

@contextmanager
def redirect(fileno, target):
    """ Redirect a file descriptor temporarily """
    target_fd = os.open(target, os.O_WRONLY)
    saved_fd = os.dup(fileno)
    os.dup2(target_fd, fileno)
    try:
        yield
    finally:
        os.dup2(saved_fd, fileno)
        os.close(saved_fd)
        os.close(target_fd)

redirect_stderr_to_dev_null = partial(redirect, sys.stderr.fileno(), "/dev/null")
# apparently this doesn't work in Jupyter :/
print("Hello", file=sys.stderr)
with redirect_stderr_to_dev_null():
    print("this should not appear!", file=sys.stderr)
print("world", file=sys.stderr)
Hello
this should not appear!
world

Run an async coroutine, and exit immediately when the user presses ctrl-c

import os
import asyncio

def run_async(coro):
    loop = asyncio.get_event_loop()
    try:
        loop.run_until_complete(coro)
    except KeyboardInterrupt as e:
        loop.close()
        os._exit(130)
    finally:
        loop.close()

File-based Mutex

If lock_file is None / “” / False, it will be a no-op.

import os
import fcntl

class FileMutex:
    def __init__(self, lock_file, mode=0o600):
        self.lock_file = lock_file
        self.fd = None
        self.mode = mode

    def acquire(self):
        if not self.lock_file:
            return
        self.fd = os.open(self.lock_file, os.O_CREAT, self.mode)
        fcntl.flock(self.fd, fcntl.LOCK_EX)

    def release(self):
        if not self.fd:
            return
        fcntl.flock(self.fd, fcntl.LOCK_UN)
        os.close(self.fd)
        self.fd = None

    def __enter__(self):
        self.acquire()

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.release()
# not exactly a thorough test...
with FileMutex("/tmp/lock"):
    print("hi")
hi

Exporting code with nbdev

from nbdev.export import nb_export
import ipynbname
from pathlib import Path
import sh

def git_root():
    root = sh.git('rev-parse', '--show-toplevel').rstrip()
    return root

def export(nb_file=None, lib_dir=None):
    if nb_file is None: nb_file = ipynbname.name() + '.ipynb'
    if lib_dir is None: lib_dir = Path(git_root())/"lib"
    nb_export(nb_file, lib_dir)
export()