001/** 002 * Copyright (C) 2006-2025 Talend Inc. - www.talend.com 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016package org.talend.sdk.component.tools.validator; 017 018import static java.util.Collections.emptyList; 019import static java.util.Optional.ofNullable; 020import static java.util.function.Function.identity; 021import static java.util.stream.Collectors.joining; 022import static java.util.stream.Collectors.toList; 023import static java.util.stream.Collectors.toMap; 024import static java.util.stream.Stream.empty; 025 026import java.util.Collection; 027import java.util.HashSet; 028import java.util.List; 029import java.util.Map; 030import java.util.Objects; 031import java.util.Set; 032import java.util.stream.Collectors; 033import java.util.stream.Stream; 034 035import org.apache.xbean.finder.AnnotationFinder; 036import org.talend.sdk.component.api.configuration.type.DataSet; 037import org.talend.sdk.component.api.input.Emitter; 038import org.talend.sdk.component.api.input.PartitionMapper; 039import org.talend.sdk.component.api.processor.AfterGroup; 040import org.talend.sdk.component.api.processor.ElementListener; 041import org.talend.sdk.component.api.processor.Output; 042import org.talend.sdk.component.api.processor.Processor; 043import org.talend.sdk.component.runtime.manager.ParameterMeta; 044import org.talend.sdk.component.runtime.manager.reflect.ParameterModelService; 045import org.talend.sdk.component.runtime.manager.reflect.parameterenricher.BaseParameterEnricher; 046import org.talend.sdk.component.runtime.manager.service.LocalConfigurationService; 047import org.talend.sdk.component.tools.validator.Validators.ValidatorHelper; 048 049public class DatasetValidator implements Validator { 050 051 private final Validators.ValidatorHelper helper; 052 053 public DatasetValidator(final ValidatorHelper helper) { 054 this.helper = helper; 055 } 056 057 @Override 058 public Stream<String> validate(final AnnotationFinder finder, final List<Class<?>> components) { 059 final List<Class<?>> datasetClasses = finder.findAnnotatedClasses(DataSet.class); 060 final Map<Class<?>, String> datasets = 061 datasetClasses.stream().collect(toMap(identity(), d -> d.getAnnotation(DataSet.class).value())); 062 063 final Stream<String> duplicated = this.duplicatedDataset(datasets.values()); 064 065 final Stream<String> i18nError = datasets 066 .entrySet() 067 .stream() 068 .map(entry -> this.helper 069 .validateFamilyI18nKey(entry.getKey(), 070 "${family}.dataset." + entry.getValue() + "._displayName")) 071 .filter(Objects::nonNull); 072 073 // ensure there is always a source with a config matching without user entries each dataset 074 final Map<Class<?>, Collection<ParameterMeta>> componentNeedingADataSet = components 075 .stream() 076 .filter(c -> isSource(c) || isOutput(c)) 077 .collect(toMap(identity(), helper::buildOrGetParameters)); 078 079 final Map<? extends Class<?>, Collection<ParameterMeta>> inputs = componentNeedingADataSet 080 .entrySet() 081 .stream() 082 .filter(it -> isSource(it.getKey())) 083 .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); 084 085 final Stream<String> sourceWithoutDataset = datasets 086 .entrySet() 087 .stream() 088 .filter(dataset -> inputs.isEmpty() || inputs.entrySet().stream().allMatch(input -> { 089 final Collection<ParameterMeta> allProps = flatten(input.getValue()).collect(toList()); 090 final Collection<ParameterMeta> datasetProperties = 091 findNestedDataSets(allProps, dataset.getValue()).collect(toList()); 092 return !datasetProperties.isEmpty() && allProps 093 .stream() 094 // .filter(it -> it.getType() != OBJECT && it.getType() != ARRAY) // should it be 095 // done? 096 .filter(it -> datasetProperties 097 .stream() 098 .noneMatch(dit -> it.getPath().equals(dit.getPath()) 099 || it.getPath().startsWith(dit.getPath() + '.'))) 100 .anyMatch(this::isRequired); 101 })) 102 .map(dataset -> "No source instantiable without adding parameters for @DataSet(\"" + dataset.getValue() 103 + "\") (" + dataset.getKey().getName() + "), please ensure at least a source using this " 104 + "dataset can be used just filling the dataset information.") 105 .sorted(); 106 107 // "cloud" rule - ensure all input/output have a dataset at least 108 final Stream<String> configWithoutDataset = componentNeedingADataSet 109 .entrySet() 110 .stream() 111 .filter(it -> flatten(it.getValue()) 112 .noneMatch((ParameterMeta prop) -> "dataset" 113 .equals(prop.getMetadata().get("tcomp::configurationtype::type")) 114 || "datasetDiscovery".equals(prop.getMetadata().get("tcomp::configurationtype::type")))) 115 .map(it -> "The component " + it.getKey().getName() 116 + " is missing a dataset in its configuration (see @DataSet)") 117 .sorted(); 118 119 // "cloud" rule - ensure all datasets have a datastore 120 final BaseParameterEnricher.Context context = 121 new BaseParameterEnricher.Context(new LocalConfigurationService(emptyList(), "tools")); 122 final Stream<String> withoutStore = datasetClasses 123 .stream() 124 .map((Class<?> ds) -> this.findDatasetWithoutDataStore(ds, context)) 125 .filter(Objects::nonNull) 126 .sorted(); 127 return Stream 128 .of(duplicated, i18nError, sourceWithoutDataset, configWithoutDataset, withoutStore) 129 .reduce(Stream::concat) 130 .orElseGet(Stream::empty); 131 } 132 133 private String findDatasetWithoutDataStore(final Class<?> ds, final BaseParameterEnricher.Context context) { 134 final List<ParameterMeta> dataset = helper 135 .getParameterModelService() 136 .buildParameterMetas(Stream.of(new ParameterModelService.Param(ds, ds.getAnnotations(), "dataset")), ds, 137 ofNullable(ds.getPackage()).map(Package::getName).orElse(""), true, context); 138 if (flatten(dataset) 139 .noneMatch(prop -> "datastore".equals(prop.getMetadata().get("tcomp::configurationtype::type")))) { 140 return "The dataset " + ds.getName() 141 + " is missing a datastore reference in its configuration (see @DataStore)"; 142 } 143 return null; 144 } 145 146 protected static Stream<String> duplicatedDataset(final Collection<String> datasets) { 147 148 final Set<String> uniqueDatasets = new HashSet<>(datasets); 149 if (datasets.size() != uniqueDatasets.size()) { 150 return Stream 151 .of("Duplicated DataSet found : " + datasets 152 .stream() 153 .collect(Collectors.groupingBy(identity())) 154 .entrySet() 155 .stream() 156 .filter(e -> e.getValue().size() > 1) 157 .map(Map.Entry::getKey) 158 .collect(joining(", "))); 159 } 160 return Stream.empty(); 161 } 162 163 protected static Stream<ParameterMeta> flatten(final Collection<ParameterMeta> options) { 164 return options 165 .stream() 166 .flatMap(it -> Stream 167 .concat(Stream.of(it), 168 it.getNestedParameters().isEmpty() ? empty() : flatten(it.getNestedParameters()))); 169 } 170 171 private boolean isSource(final Class<?> component) { 172 return component.isAnnotationPresent(PartitionMapper.class) || component.isAnnotationPresent(Emitter.class); 173 } 174 175 private boolean isOutput(final Class<?> component) { 176 return component.isAnnotationPresent(Processor.class) && Stream 177 .of(component.getMethods()) 178 .filter(it -> it.isAnnotationPresent(ElementListener.class) || it.isAnnotationPresent(AfterGroup.class)) 179 .allMatch(it -> void.class == it.getReturnType() 180 && Stream.of(it.getParameters()).noneMatch(param -> param.isAnnotationPresent(Output.class))); 181 } 182 183 private Stream<ParameterMeta> findNestedDataSets(final Collection<ParameterMeta> options, final String name) { 184 return options 185 .stream() 186 .filter(it -> "dataset".equals(it.getMetadata().get("tcomp::configurationtype::type")) 187 && name.equals(it.getMetadata().get("tcomp::configurationtype::name"))); 188 } 189 190 private boolean isRequired(final ParameterMeta parameterMeta) { 191 return Boolean.parseBoolean(parameterMeta.getMetadata().getOrDefault("tcomp::validation::required", "false")); 192 } 193 194}