From b117ac7f168a37c09bc55a2c62262336b8256e2d Mon Sep 17 00:00:00 2001 From: Arthur Zucker Date: Mon, 4 Sep 2023 19:10:22 +0000 Subject: [PATCH] updates --- bindings/python/src/tokenizer.rs | 4 ++-- bindings/python/tests/bindings/test_tokenizer.py | 3 +++ tokenizers/src/tokenizer/added_vocabulary.rs | 6 ++++++ 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/bindings/python/src/tokenizer.rs b/bindings/python/src/tokenizer.rs index 1cd43186..cfd5dbca 100644 --- a/bindings/python/src/tokenizer.rs +++ b/bindings/python/src/tokenizer.rs @@ -183,8 +183,8 @@ impl PyAddedToken { /// Set the content of this :obj:`AddedToken` #[setter] - fn set_content(&self, content: String){ - self.get_token().content = content + fn set_content(&mut self, content: String) { + self.content = content.into(); } /// Get the value of the :obj:`rstrip` option diff --git a/bindings/python/tests/bindings/test_tokenizer.py b/bindings/python/tests/bindings/test_tokenizer.py index f8e2a0b1..c47c3d73 100644 --- a/bindings/python/tests/bindings/test_tokenizer.py +++ b/bindings/python/tests/bindings/test_tokenizer.py @@ -17,7 +17,10 @@ class TestAddedToken: def test_instantiate_with_content_only(self): added_token = AddedToken("") added_token.content = "" + assert added_token.content == "" assert type(added_token) == AddedToken + added_token.content = added_token.content.lower() + assert str(added_token) == "" assert ( repr(added_token) == 'AddedToken("", rstrip=False, lstrip=False, single_word=False, normalized=True, special=False)' diff --git a/tokenizers/src/tokenizer/added_vocabulary.rs b/tokenizers/src/tokenizer/added_vocabulary.rs index cd656cdf..a633c9d3 100644 --- a/tokenizers/src/tokenizer/added_vocabulary.rs +++ b/tokenizers/src/tokenizer/added_vocabulary.rs @@ -673,6 +673,12 @@ mod tests { ); assert_eq!(vocab.len(), 5); // Token was already there assert_eq!(vocab.get_vocab()["another_two"], 4); // Token idx not changed + + // Just checking that we can set the content of the string in rust + let mut token:AddedToken = AddedToken::from("Hey", false); + token.content = "hey".to_string(); + assert_eq!(token.content, "hey"); // Token was already there + } #[test]