Source code for flagevalmm.dataset.data_preprocessor

import argparse
from mmengine.config import Config
import importlib
import os.path as osp
from typing import Optional, Union
from flagevalmm.common import get_logger
from flagevalmm.common.const import FLAGEVALMM_DATASETS_CACHE_DIR

logger = get_logger(__name__)


[docs] def parse_args(): parser = argparse.ArgumentParser(description="Process a dataset") parser.add_argument("-c", "--config", help="config file path") parser.add_argument("-f", "--force", action="store_true", help="force overwrite") parser.add_argument( "--cache-dir", type=str, default=FLAGEVALMM_DATASETS_CACHE_DIR, help="cache dir" ) args = parser.parse_args() return args
[docs] class DataPreprocessor:
[docs] def __init__( self, config: Union[str, Config], config_dir: Optional[str] = None, force: bool = False, cache_dir: str = FLAGEVALMM_DATASETS_CACHE_DIR, ) -> None: if isinstance(config, str): self.cfg = Config.fromfile(config).config config_dir = osp.dirname(config) else: self.cfg = config self.force = force self.cache_dir = osp.expanduser(cache_dir) self.parse_config(config_dir)
[docs] def parse_config(self, config_dir: str) -> None: cfg = self.cfg cfg.processor = osp.expanduser(cfg.processor) if not osp.isabs(cfg.processor): cfg.processor = osp.join(config_dir, cfg.processor) if cfg.get("processed_dataset_path"): processed_dataset_path = osp.expanduser(cfg.processed_dataset_path) else: processed_dataset_path = osp.join(self.cache_dir, cfg.dataset_name) if osp.isabs(processed_dataset_path): processed_dataset_path = processed_dataset_path else: processed_dataset_path = osp.join(self.cache_dir, processed_dataset_path) cfg.processed_dataset_path = processed_dataset_path
[docs] def process(self) -> None: cfg = self.cfg output_dir = osp.join(cfg.processed_dataset_path, cfg.split) if osp.exists(output_dir) and not self.force: logger.info(f"Processed dataset already exists at {output_dir}") return spec = importlib.util.spec_from_file_location( "process", cfg.processor # module name # full path to file ) # Create the module module = importlib.util.module_from_spec(spec) # Execute the module spec.loader.exec_module(module) process_func = getattr(module, "process") process_func(cfg) logger.info(f"Processed dataset {cfg.dataset_path} saved to {output_dir}")
if __name__ == "__main__": args = parse_args() preprocessor = DataPreprocessor( config=args.config, force=args.force, cache_dir=args.cache_dir ) preprocessor.process()