Hello,
I am facing some issues loading up the SNOMED-CT model for MedCAT. The purpose is to run some text through this model in order to get the values and SCTIDs for some pre-specified pain terms.
Versions
MedCAT: 1.7.0
SpaCy: 3.5.3
Python: 3.10.5
I have downloaded the snomed model from the https://medcat.rosalind.kcl.ac.uk/ website, as suggested on the GitHub repo.
When I try to load the model using:
MODEL_DIR = "./models/"
model_pack_path = MODEL_DIR + "/mc_modelpack_snomed_int_16_mar_2022_25be3857ba34bdd5.zip"
I get a validation error:
---------------------------------------------------------------------------
ValidationError Traceback (most recent call last)
Input In [15], in <cell line: 2>()
1 model_pack_path = MODEL_DIR + "/mc_modelpack_snomed_int_16_mar_2022_25be3857ba34bdd5.zip"
----> 2 cat = CAT.load_model_pack(model_pack_path)
File /opt/conda/lib/python3.10/site-packages/medcat/cat.py:372, in CAT.load_model_pack(cls, zip_path, meta_cat_config_dict, load_meta_models, load_addl_ner)
368 for meta_path in meta_paths:
369 meta_cats.append(MetaCAT.load(save_dir_path=meta_path,
370 config_dict=meta_cat_config_dict))
--> 372 cat = cls(cdb=cdb, config=cdb.config, vocab=vocab, meta_cats=meta_cats, addl_ner=addl_ner)
373 logger.info(cat.get_model_card()) # Print the model card
374 return cat
File /opt/conda/lib/python3.10/site-packages/medcat/cat.py:101, in CAT.__init__(self, cdb, vocab, config, meta_cats, addl_ner)
99 self._meta_cats = meta_cats
100 self._addl_ner = addl_ner if isinstance(addl_ner, list) else [addl_ner]
--> 101 self._create_pipeline(self.config)
File /opt/conda/lib/python3.10/site-packages/medcat/cat.py:109, in CAT._create_pipeline(self, config)
107 # Build the pipeline
108 self.pipe = Pipe(tokenizer=spacy_split_all, config=config)
--> 109 self.pipe.add_tagger(tagger=tag_skip_and_punct,
110 name='skip_and_punct',
111 additional_fields=['is_punct'])
113 if self.vocab is not None:
114 spell_checker = BasicSpellChecker(cdb_vocab=self.cdb.vocab, config=config, data_vocab=self.vocab)
File /opt/conda/lib/python3.10/site-packages/medcat/pipe.py:66, in Pipe.add_tagger(self, tagger, name, additional_fields)
64 name = name if name is not None else component_factory_name
65 Language.factory(name=component_factory_name, default_config={"config": self.config}, func=tagger)
---> 66 self._nlp.add_pipe(component_factory_name, name=name, first=True)
68 # Add custom fields needed for this usecase
69 Token.set_extension('to_skip', default=False, force=True)
File /opt/conda/lib/python3.10/site-packages/spacy/language.py:786, in Language.add_pipe(self, factory_name, name, before, after, first, last, source, config, raw_config, validate)
782 pipe_component, factory_name = self.create_pipe_from_source(
783 factory_name, source, name=name
784 )
785 else:
--> 786 pipe_component = self.create_pipe(
787 factory_name,
788 name=name,
789 config=config,
790 raw_config=raw_config,
791 validate=validate,
792 )
793 pipe_index = self._get_pipe_index(before, after, first, last)
794 self._pipe_meta[name] = self.get_factory_meta(factory_name)
File /opt/conda/lib/python3.10/site-packages/spacy/language.py:679, in Language.create_pipe(self, factory_name, name, config, raw_config, validate)
676 cfg = {factory_name: config}
677 # We're calling the internal _fill here to avoid constructing the
678 # registered functions twice
--> 679 resolved = registry.resolve(cfg, validate=validate)
680 filled = registry.fill({"cfg": cfg[factory_name]}, validate=validate)["cfg"]
681 filled = Config(filled)
File /opt/conda/lib/python3.10/site-packages/confection/__init__.py:728, in registry.resolve(cls, config, schema, overrides, validate)
719 @classmethod
720 def resolve(
721 cls,
(...)
726 validate: bool = True,
727 ) -> Dict[str, Any]:
--> 728 resolved, _ = cls._make(
729 config, schema=schema, overrides=overrides, validate=validate, resolve=True
730 )
731 return resolved
File /opt/conda/lib/python3.10/site-packages/confection/__init__.py:777, in registry._make(cls, config, schema, overrides, resolve, validate)
775 if not is_interpolated:
776 config = Config(orig_config).interpolate()
--> 777 filled, _, resolved = cls._fill(
778 config, schema, validate=validate, overrides=overrides, resolve=resolve
779 )
780 filled = Config(filled, section_order=section_order)
781 # Check that overrides didn't include invalid properties not in config
File /opt/conda/lib/python3.10/site-packages/confection/__init__.py:832, in registry._fill(cls, config, schema, validate, resolve, parent, overrides)
830 schema.__fields__[key] = copy_model_field(field, Any)
831 promise_schema = cls.make_promise_schema(value, resolve=resolve)
--> 832 filled[key], validation[v_key], final[key] = cls._fill(
833 value,
834 promise_schema,
835 validate=validate,
836 resolve=resolve,
837 parent=key_parent,
838 overrides=overrides,
839 )
840 reg_name, func_name = cls.get_constructor(final[key])
841 args, kwargs = cls.parse_args(final[key])
File /opt/conda/lib/python3.10/site-packages/confection/__init__.py:911, in registry._fill(cls, config, schema, validate, resolve, parent, overrides)
909 exclude_validation = set([ARGS_FIELD_ALIAS, *RESERVED_FIELDS.keys()])
910 validation.update(result.dict(exclude=exclude_validation))
--> 911 filled, final = cls._update_from_parsed(validation, filled, final)
912 if exclude:
913 filled = {k: v for k, v in filled.items() if k not in exclude}
File /opt/conda/lib/python3.10/site-packages/confection/__init__.py:933, in registry._update_from_parsed(cls, validation, filled, final)
931 final[key] = value
932 if isinstance(value, dict):
--> 933 filled[key], final[key] = cls._update_from_parsed(
934 value, filled[key], final[key]
935 )
936 # Update final config with parsed value if they're not equal (in
937 # value and in type) but not if it's a generator because we had to
938 # replace that to validate it correctly
939 elif key == ARGS_FIELD:
File /opt/conda/lib/python3.10/site-packages/confection/__init__.py:929, in registry._update_from_parsed(cls, validation, filled, final)
927 continue # skip aliases for reserved fields
928 if key not in filled:
--> 929 filled[key] = value
930 if key not in final:
931 final[key] = value
File /opt/conda/lib/python3.10/site-packages/medcat/config.py:34, in FakeDict.__setitem__(self, arg, val)
33 def __setitem__(self, arg: str, val) -> None:
---> 34 setattr(self, arg, val)
File /opt/conda/lib/python3.10/site-packages/pydantic/main.py:384, in pydantic.main.BaseModel.__setattr__()
ValidationError: 1 validation error for Config
linking -> filters -> cuis
value is not a valid set (type=type_error.set)
Has anyone else come across this, or know how to resolve it?
Thanks!
Jaya