Skip to content Skip to sidebar Skip to footer

How Can We Use Spacy Minibatch And Goldparse To Train Ner Model Using Biluo Tagging Scheme?

My input data to the spacy ner model is in the BILUO tagging scheme and I wish to use the same as a part of some requirement. When I try to train the model simply without a minibat

Solution 1:

You have 2 problems with your minibatch:

  1. tags should be an iterable of ner tags with offsets
  2. your data_biluo doesn't account for a , in the middle of the sentences.

As soon as you correct for those you'r fine to go:

import spacy
from spacy.gold import offsets_from_biluo_tags, GoldParse
from spacy.util import minibatch, compounding
import random
from tqdm import tqdm

deftrain_spacy(data, iterations, model=None):
    TRAIN_DATA = data
    print(f"downloads = {model}")
    if model isnotNoneand path.exists(model):
        print(f"training existing model")
        nlp = spacy.load(model)
        print("Model is Loaded '%s'" % model)
    else:
        print(f"Creating new model")

        nlp = spacy.blank('en')  # create blank Language classif'ner'notin nlp.pipe_names:
        ner = nlp.create_pipe('ner')
        nlp.add_pipe(ner, last=True)
    else:
        ner = nlp.get_pipe('ner')

    # Based on template, get labels and save those for further training
    LABEL = ["Name", "ORG"]

    for i in LABEL:
        # print(i)
        ner.add_label(i)

    # get names of other pipes to disable them during training
    other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
    with nlp.disable_pipes(*other_pipes):  # only train NERif model isNone:
            optimizer = nlp.begin_training()
        else:
            optimizer = nlp.entity.create_optimizer()
        tags = dict()
        for itn inrange(iterations):
            print("Starting iteration " + str(itn))
            random.shuffle(TRAIN_DATA)
            losses = {}
            batches = minibatch(TRAIN_DATA, size=compounding(4.0, 16.0, 1.001))
            # type 2 with mini batchfor batch in batches:
                texts, _ = zip(*batch)
                golds = [GoldParse(nlp.make_doc(t),entities = a) for t,a in batch]
                nlp.update(
                    texts,  # batch of texts
                    golds,  # batch of annotations
                    drop=0.4,  # dropout - make it harder to memorise data
                    losses=losses,
                    sgd=optimizer
                )
            print(losses)
    return nlp

data_biluo = [
    ('I am Shah Khan, I work in MS Co', ['O', 'O', 'B-Name', 'L-Name', 'O', 'O', 'O', 'O', 'B-ORG', 'L-ORG']),
    ('I am Tom Tomb, I work in Telecom Networks', ['O', 'O', 'B-Name', 'L-Name', 'O', 'O', 'O', 'O', 'B-ORG', 'L-ORG'])
]


model = train_spacy(data_biluo, 10)

Starting iteration 0
{'ner': 17.999998331069946}
Starting iteration 1
{'ner': 16.6766300201416}
Starting iteration 2
{'ner': 16.997647166252136}
Starting iteration 3
{'ner': 16.486496448516846}
Starting iteration 4
{'ner': 15.695325374603271}
Starting iteration 5
{'ner': 14.312554001808167}
Starting iteration 6
{'ner': 12.099276185035706}
Starting iteration 7
{'ner': 11.473928153514862}
Starting iteration 8
{'ner': 8.814643770456314}
Starting iteration 9
{'ner': 7.233813941478729}

Post a Comment for "How Can We Use Spacy Minibatch And Goldparse To Train Ner Model Using Biluo Tagging Scheme?"