Spaces:

RiverZ
/

ICEdit

Running on Zero

App Files Files Community

ICEdit / icedit /peft /tuners /lora /torchao.py

RiverZ

upd

3b609b9 2 months ago

raw

history blame

5.68 kB

	# Copyright 2024-present the HuggingFace Inc. team.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	from __future__ import annotations

	import warnings
	from typing import Any, Optional

	import torch

	# from torch import nn
	from peft.import_utils import is_torchao_available
	from peft.tuners.tuners_utils import BaseTunerLayer, check_adapters_to_merge

	from .config import LoraConfig
	from .layer import Linear


	class TorchaoLoraLinear(Linear):
	"""LoRA layer implementation for Linear layers using torchao data"""

	def __init__(self, args, get_apply_tensor_subclass, *kwargs):
	# this is not strictly necessary, as kwargs are stored either way, but we want to error early if
	# get_apply_tensor_subclass is missing.
	if kwargs.get("lora_bias", False):
	raise ValueError(f"{self.__class__.__name__} does not support lora_bias yet, set it to False")

	super().__init__(args, *kwargs)
	self.get_apply_tensor_subclass = get_apply_tensor_subclass
	self._check_dtype_supported()

	def _check_dtype_supported(self):
	# TODO: Not required once int4_weight_only is properly supported by torchao
	base_layer = self.get_base_layer()
	weight = base_layer.weight
	if hasattr(weight, "layout_tensor") and (weight.layout_tensor.data.dtype != torch.int8):
	raise ValueError(f"{type(self).__name__} only supports int8 weights for now.")

	def merge(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = None) -> None:
	from torchao import quantize_

	adapter_names = check_adapters_to_merge(self, adapter_names)
	if not adapter_names:
	# no adapter to merge
	return

	self._check_dtype_supported()

	base_layer = self.get_base_layer()
	weight = base_layer.weight

	for active_adapter in adapter_names:
	try:
	weight = weight.dequantize()
	except NotImplementedError as exc:
	msg = (
	f"Weights of type {type(weight).__name__} do not support dequantization (yet), which is needed to "
	"support merging."
	)
	raise NotImplementedError(msg) from exc

	if safe_merge and not torch.isfinite(weight).all():
	raise ValueError(
	f"NaNs detected in the merged weights. The adapter {active_adapter} seems to be broken"
	)

	weight += self.get_delta_weight(active_adapter)
	# TODO: once (if) torchao supports directly mutating the data, use that instead.
	del base_layer.weight
	base_layer.weight = weight
	quantize_(base_layer, self.get_apply_tensor_subclass())
	del weight

	self.merged_adapters.append(active_adapter)

	def unmerge(self) -> None:
	from torchao import quantize_

	if not self.merged:
	warnings.warn("Already unmerged. Nothing to do.")
	return

	while len(self.merged_adapters) > 0:
	active_adapter = self.merged_adapters.pop()
	if active_adapter not in self.lora_A.keys():
	continue

	base_layer = self.get_base_layer()
	weight = base_layer.weight
	try:
	weight = weight.dequantize()
	except NotImplementedError as exc:
	msg = (
	f"Weights of type {type(weight).__name__} do not support dequantization (yet), which is needed to "
	"support unmerging."
	)
	raise NotImplementedError(msg) from exc

	weight -= self.get_delta_weight(active_adapter)
	# We go through a dummy module because overriding the weight.data does not work, the tensor retains the old
	# data. Therefore, we need to go through quantize_, which takes a module as input, and we need to delete and
	# re-assign the weight.
	# TODO: once (if) torchao supports directly mutating the data, use that instead.
	del base_layer.weight
	base_layer.weight = weight
	quantize_(base_layer, self.get_apply_tensor_subclass())
	del weight

	def __repr__(self) -> str:
	rep = super().__repr__()
	return rep.replace("lora.Linear", f"lora.{self.__class__.__name__}")


	def dispatch_torchao(
	target: torch.nn.Module,
	adapter_name: str,
	lora_config: LoraConfig,
	**kwargs: Any,
	) -> Optional[torch.nn.Module]:
	new_module = None

	if isinstance(target, BaseTunerLayer):
	target_base_layer = target.get_base_layer()
	else:
	target_base_layer = target

	if not hasattr(target_base_layer, "weight"):
	return new_module

	if not is_torchao_available():
	return new_module

	from torchao.dtypes import AffineQuantizedTensor
	from torchao.quantization import LinearActivationQuantizedTensor

	if isinstance(target_base_layer.weight, (AffineQuantizedTensor, LinearActivationQuantizedTensor)):
	new_module = TorchaoLoraLinear(target, adapter_name, **kwargs)

	return new_module