diff --git a/load_data.py b/load_data.py index 1a7ed4869ddba45ec635490bdb66d1a8cbd5f2b9..24311a1e44fd2a0f4044214718623c75c8672afb 100644 --- a/load_data.py +++ b/load_data.py @@ -44,6 +44,7 @@ def load_data_finetune(args, tokenizer): dataset = load_dataset(*arg_map[args.dataset]) def tokenize_function_finetune(batch): + # FIXME This fails for GLUE MNLI return tokenizer(batch["sentence"], truncation=True, padding="max_length", max_length=args.max_length) dataset = dataset.map(tokenize_function_finetune)