yukiarimo · November 28, 2023 19:25
diff --git a/cnn-fine-tune.py b/cnn-fine-tune.py
 import pandas as pd
 import torch
 from torch.utils.data import Dataset, DataLoader
 from transformers import BartForConditionalGeneration, BartTokenizer
 from torch.optim import AdamW
 import os

 os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0"

 # Define your dataset class
 class CustomDataset(Dataset):
    def __init__(self, csv_path, tokenizer):
        self.data = pd.read_csv(csv_path)
        self.tokenizer = tokenizer
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        task = self.data.iloc[idx]['task']
        summary = self.data.iloc[idx]['summary']
        
        # Tokenize the input and target sequences
        tokenized_inputs = self.tokenizer.encode_plus(task, padding='max_length', truncation=True, max_length=512)
        tokenized_summary = self.tokenizer.encode_plus(summary, padding='max_length', truncation=True, max_length=512)
        
        # Convert the tokenized sequences to tensors
        input_ids = torch.tensor(tokenized_inputs['input_ids'])
        attention_mask = torch.tensor(tokenized_inputs['attention_mask'])
        summary_ids = torch.tensor(tokenized_summary['input_ids'])
        
        return {
            'input_ids': input_ids,
            'attention_mask': attention_mask,
            'summary_ids': summary_ids
        }

 # Set the path to your CSV file
 csv_path = 'yuna copy.csv'

 # Set up the tokenizer
 tokenizer = BartTokenizer.from_pretrained('./bart-large-cnn/')

 # Create an instance of the custom dataset
 dataset = CustomDataset(csv_path, tokenizer)

 # Define hyperparameters and training configurations
 batch_size = 1
 num_epochs = 50
 learning_rate = 1e-5

 # Create a data loader for the dataset
 dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

 # Load the pretrained BART model
 model = BartForConditionalGeneration.from_pretrained('./bart-large-cnn/')
 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 print(device)
 model.to(device)

 # Set the model to training mode
 model.train()

 # Define the optimizer and the learning rate scheduler
 optimizer = AdamW(model.parameters(), lr=learning_rate)
 total_steps = len(dataloader) * num_epochs

 # Set the directory to save the models
 save_dir = 'Yuna-trained'

 # Set the task-specific parameters for summarization
 length_penalty = 1.0
 max_length = 1024
 min_length = 120
 dropout = 0.01

 # Set the model parameters for summarization
 model.config.length_penalty = length_penalty
 model.config.max_length = max_length
 model.config.min_length = min_length
 model.config.dropout = dropout

 # Set the task-specific parameters for summarization
 task_specific_params = {
    'summarization': {
        'early_stopping': True,
        'length_penalty': length_penalty,
        'max_length': max_length,
        'min_length': min_length,
        'no_repeat_ngram_size': 3,
        'num_beams': 4
    }
 }

 # Update the model configuration with task-specific parameters
 for task, params in task_specific_params.items():
    if task in model.config.task_specific_params:
        model.config.task_specific_params[task].update(params)
    else:
        model.config.task_specific_params[task] = params

 # Training loop
 for epoch in range(num_epochs):
    total_loss = 0
    
    for batch in dataloader:
        # Zero the gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(input_ids=batch['input_ids'].to(device),
                        attention_mask=batch['attention_mask'].to(device),
                        labels=batch['summary_ids'].to(device))
        
        # Compute the loss
        loss = outputs.loss
        
        # Backpropagation
        loss.backward()
        
        # Update the model parameters
        optimizer.step()
        
        total_loss += loss.item()
    
    # Print the average loss for the epoch
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss / len(dataloader)}")
    
    # Save the model every 5th epoch
    if (epoch + 1) % 5 == 0:
        model.save_pretrained(os.path.join(save_dir, f'fine_tuned_model_epoch{epoch+1}'))
        tokenizer.save_pretrained(os.path.join(save_dir, f'fine_tuned_model_epoch{epoch+1}'))

 # Save the final fine-tuned model
 model.save_pretrained(os.path.join(save_dir, 'fine_tuned_model_final'))
 tokenizer.save_pretrained(os.path.join(save_dir, 'fine_tuned_model_final'))
	import pandas as pd
	import torch
	from torch.utils.data import Dataset, DataLoader
	from transformers import BartForConditionalGeneration, BartTokenizer
	from torch.optim import AdamW
	import os

	os.environ["PYTORCH_MPS_HIGH_WATERMARK_RATIO"] = "0.0"

	# Define your dataset class
	class CustomDataset(Dataset):
	def __init__(self, csv_path, tokenizer):
	self.data = pd.read_csv(csv_path)
	self.tokenizer = tokenizer

	def __len__(self):
	return len(self.data)

	def __getitem__(self, idx):
	task = self.data.iloc[idx]['task']
	summary = self.data.iloc[idx]['summary']

	# Tokenize the input and target sequences
	tokenized_inputs = self.tokenizer.encode_plus(task, padding='max_length', truncation=True, max_length=512)
	tokenized_summary = self.tokenizer.encode_plus(summary, padding='max_length', truncation=True, max_length=512)

	# Convert the tokenized sequences to tensors
	input_ids = torch.tensor(tokenized_inputs['input_ids'])
	attention_mask = torch.tensor(tokenized_inputs['attention_mask'])
	summary_ids = torch.tensor(tokenized_summary['input_ids'])

	return {
	'input_ids': input_ids,
	'attention_mask': attention_mask,
	'summary_ids': summary_ids
	}

	# Set the path to your CSV file
	csv_path = 'yuna copy.csv'

	# Set up the tokenizer
	tokenizer = BartTokenizer.from_pretrained('./bart-large-cnn/')

	# Create an instance of the custom dataset
	dataset = CustomDataset(csv_path, tokenizer)

	# Define hyperparameters and training configurations
	batch_size = 1
	num_epochs = 50
	learning_rate = 1e-5

	# Create a data loader for the dataset
	dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

	# Load the pretrained BART model
	model = BartForConditionalGeneration.from_pretrained('./bart-large-cnn/')
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	print(device)
	model.to(device)

	# Set the model to training mode
	model.train()

	# Define the optimizer and the learning rate scheduler
	optimizer = AdamW(model.parameters(), lr=learning_rate)
	total_steps = len(dataloader) * num_epochs

	# Set the directory to save the models
	save_dir = 'Yuna-trained'

	# Set the task-specific parameters for summarization
	length_penalty = 1.0
	max_length = 1024
	min_length = 120
	dropout = 0.01

	# Set the model parameters for summarization
	model.config.length_penalty = length_penalty
	model.config.max_length = max_length
	model.config.min_length = min_length
	model.config.dropout = dropout

	# Set the task-specific parameters for summarization
	task_specific_params = {
	'summarization': {
	'early_stopping': True,
	'length_penalty': length_penalty,
	'max_length': max_length,
	'min_length': min_length,
	'no_repeat_ngram_size': 3,
	'num_beams': 4
	}
	}

	# Update the model configuration with task-specific parameters
	for task, params in task_specific_params.items():
	if task in model.config.task_specific_params:
	model.config.task_specific_params[task].update(params)
	else:
	model.config.task_specific_params[task] = params

	# Training loop
	for epoch in range(num_epochs):
	total_loss = 0

	for batch in dataloader:
	# Zero the gradients
	optimizer.zero_grad()

	# Forward pass
	outputs = model(input_ids=batch['input_ids'].to(device),
	attention_mask=batch['attention_mask'].to(device),
	labels=batch['summary_ids'].to(device))

	# Compute the loss
	loss = outputs.loss

	# Backpropagation
	loss.backward()

	# Update the model parameters
	optimizer.step()

	total_loss += loss.item()

	# Print the average loss for the epoch
	print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss / len(dataloader)}")

	# Save the model every 5th epoch
	if (epoch + 1) % 5 == 0:
	model.save_pretrained(os.path.join(save_dir, f'fine_tuned_model_epoch{epoch+1}'))
	tokenizer.save_pretrained(os.path.join(save_dir, f'fine_tuned_model_epoch{epoch+1}'))

	# Save the final fine-tuned model
	model.save_pretrained(os.path.join(save_dir, 'fine_tuned_model_final'))
	tokenizer.save_pretrained(os.path.join(save_dir, 'fine_tuned_model_final'))