MedCAT model for SNOMED-CT

Hello,

I am facing some issues loading up the SNOMED-CT model for MedCAT. The purpose is to run some text through this model in order to get the values and SCTIDs for some pre-specified pain terms.

Versions
MedCAT: 1.7.0
SpaCy: 3.5.3
Python: 3.10.5

I have downloaded the snomed model from the https://medcat.rosalind.kcl.ac.uk/ website, as suggested on the GitHub repo.

When I try to load the model using:

MODEL_DIR = "./models/"
model_pack_path = MODEL_DIR + "/mc_modelpack_snomed_int_16_mar_2022_25be3857ba34bdd5.zip"

I get a validation error:

---------------------------------------------------------------------------
ValidationError                           Traceback (most recent call last)
Input In [15], in <cell line: 2>()
      1 model_pack_path = MODEL_DIR + "/mc_modelpack_snomed_int_16_mar_2022_25be3857ba34bdd5.zip"
----> 2 cat = CAT.load_model_pack(model_pack_path)

File /opt/conda/lib/python3.10/site-packages/medcat/cat.py:372, in CAT.load_model_pack(cls, zip_path, meta_cat_config_dict, load_meta_models, load_addl_ner)
    368 for meta_path in meta_paths:
    369     meta_cats.append(MetaCAT.load(save_dir_path=meta_path,
    370                                   config_dict=meta_cat_config_dict))
--> 372 cat = cls(cdb=cdb, config=cdb.config, vocab=vocab, meta_cats=meta_cats, addl_ner=addl_ner)
    373 logger.info(cat.get_model_card())  # Print the model card
    374 return cat

File /opt/conda/lib/python3.10/site-packages/medcat/cat.py:101, in CAT.__init__(self, cdb, vocab, config, meta_cats, addl_ner)
     99 self._meta_cats = meta_cats
    100 self._addl_ner = addl_ner if isinstance(addl_ner, list) else [addl_ner]
--> 101 self._create_pipeline(self.config)

File /opt/conda/lib/python3.10/site-packages/medcat/cat.py:109, in CAT._create_pipeline(self, config)
    107 # Build the pipeline
    108 self.pipe = Pipe(tokenizer=spacy_split_all, config=config)
--> 109 self.pipe.add_tagger(tagger=tag_skip_and_punct,
    110                      name='skip_and_punct',
    111                      additional_fields=['is_punct'])
    113 if self.vocab is not None:
    114     spell_checker = BasicSpellChecker(cdb_vocab=self.cdb.vocab, config=config, data_vocab=self.vocab)

File /opt/conda/lib/python3.10/site-packages/medcat/pipe.py:66, in Pipe.add_tagger(self, tagger, name, additional_fields)
     64 name = name if name is not None else component_factory_name
     65 Language.factory(name=component_factory_name, default_config={"config": self.config}, func=tagger)
---> 66 self._nlp.add_pipe(component_factory_name, name=name, first=True)
     68 # Add custom fields needed for this usecase
     69 Token.set_extension('to_skip', default=False, force=True)

File /opt/conda/lib/python3.10/site-packages/spacy/language.py:786, in Language.add_pipe(self, factory_name, name, before, after, first, last, source, config, raw_config, validate)
    782     pipe_component, factory_name = self.create_pipe_from_source(
    783         factory_name, source, name=name
    784     )
    785 else:
--> 786     pipe_component = self.create_pipe(
    787         factory_name,
    788         name=name,
    789         config=config,
    790         raw_config=raw_config,
    791         validate=validate,
    792     )
    793 pipe_index = self._get_pipe_index(before, after, first, last)
    794 self._pipe_meta[name] = self.get_factory_meta(factory_name)

File /opt/conda/lib/python3.10/site-packages/spacy/language.py:679, in Language.create_pipe(self, factory_name, name, config, raw_config, validate)
    676 cfg = {factory_name: config}
    677 # We're calling the internal _fill here to avoid constructing the
    678 # registered functions twice
--> 679 resolved = registry.resolve(cfg, validate=validate)
    680 filled = registry.fill({"cfg": cfg[factory_name]}, validate=validate)["cfg"]
    681 filled = Config(filled)

File /opt/conda/lib/python3.10/site-packages/confection/__init__.py:728, in registry.resolve(cls, config, schema, overrides, validate)
    719 @classmethod
    720 def resolve(
    721     cls,
   (...)
    726     validate: bool = True,
    727 ) -> Dict[str, Any]:
--> 728     resolved, _ = cls._make(
    729         config, schema=schema, overrides=overrides, validate=validate, resolve=True
    730     )
    731     return resolved

File /opt/conda/lib/python3.10/site-packages/confection/__init__.py:777, in registry._make(cls, config, schema, overrides, resolve, validate)
    775 if not is_interpolated:
    776     config = Config(orig_config).interpolate()
--> 777 filled, _, resolved = cls._fill(
    778     config, schema, validate=validate, overrides=overrides, resolve=resolve
    779 )
    780 filled = Config(filled, section_order=section_order)
    781 # Check that overrides didn't include invalid properties not in config

File /opt/conda/lib/python3.10/site-packages/confection/__init__.py:832, in registry._fill(cls, config, schema, validate, resolve, parent, overrides)
    830     schema.__fields__[key] = copy_model_field(field, Any)
    831 promise_schema = cls.make_promise_schema(value, resolve=resolve)
--> 832 filled[key], validation[v_key], final[key] = cls._fill(
    833     value,
    834     promise_schema,
    835     validate=validate,
    836     resolve=resolve,
    837     parent=key_parent,
    838     overrides=overrides,
    839 )
    840 reg_name, func_name = cls.get_constructor(final[key])
    841 args, kwargs = cls.parse_args(final[key])

File /opt/conda/lib/python3.10/site-packages/confection/__init__.py:911, in registry._fill(cls, config, schema, validate, resolve, parent, overrides)
    909 exclude_validation = set([ARGS_FIELD_ALIAS, *RESERVED_FIELDS.keys()])
    910 validation.update(result.dict(exclude=exclude_validation))
--> 911 filled, final = cls._update_from_parsed(validation, filled, final)
    912 if exclude:
    913     filled = {k: v for k, v in filled.items() if k not in exclude}

File /opt/conda/lib/python3.10/site-packages/confection/__init__.py:933, in registry._update_from_parsed(cls, validation, filled, final)
    931     final[key] = value
    932 if isinstance(value, dict):
--> 933     filled[key], final[key] = cls._update_from_parsed(
    934         value, filled[key], final[key]
    935     )
    936 # Update final config with parsed value if they're not equal (in
    937 # value and in type) but not if it's a generator because we had to
    938 # replace that to validate it correctly
    939 elif key == ARGS_FIELD:

File /opt/conda/lib/python3.10/site-packages/confection/__init__.py:929, in registry._update_from_parsed(cls, validation, filled, final)
    927     continue  # skip aliases for reserved fields
    928 if key not in filled:
--> 929     filled[key] = value
    930 if key not in final:
    931     final[key] = value

File /opt/conda/lib/python3.10/site-packages/medcat/config.py:34, in FakeDict.__setitem__(self, arg, val)
     33 def __setitem__(self, arg: str, val) -> None:
---> 34     setattr(self, arg, val)

File /opt/conda/lib/python3.10/site-packages/pydantic/main.py:384, in pydantic.main.BaseModel.__setattr__()

ValidationError: 1 validation error for Config
linking -> filters -> cuis
  value is not a valid set (type=type_error.set)

Has anyone else come across this, or know how to resolve it?

Thanks!

Jaya

That free demo model is not compatible with the latest MedCAT version.

If you downgrade your version of MedCAT.
pip install medcat==1.5
This version will work with the demo.

1 Like

For future reference, this also has a fix for newer versions of medxat as described in the following PR:

Though that’s not yet a part of any release. Hopefully we’ll have the next release up soon.

TLDR:
If you’ve got the GitHub version of medcat installed (or you’re reading in a future where a version after 1.7.0 has been released), you can run:
python -m medcat.utils.versioning fix-config <model_pack_path> <new_model_pack_path>