001/**
002 * Copyright (C) 2006-2024 Talend Inc. - www.talend.com
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 * http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.talend.sdk.component.tools.validator;
017
018import static java.util.Collections.emptyList;
019import static java.util.Optional.ofNullable;
020import static java.util.function.Function.identity;
021import static java.util.stream.Collectors.joining;
022import static java.util.stream.Collectors.toList;
023import static java.util.stream.Collectors.toMap;
024import static java.util.stream.Stream.empty;
025
026import java.util.Collection;
027import java.util.HashSet;
028import java.util.List;
029import java.util.Map;
030import java.util.Objects;
031import java.util.Set;
032import java.util.stream.Collectors;
033import java.util.stream.Stream;
034
035import org.apache.xbean.finder.AnnotationFinder;
036import org.talend.sdk.component.api.configuration.type.DataSet;
037import org.talend.sdk.component.api.input.Emitter;
038import org.talend.sdk.component.api.input.PartitionMapper;
039import org.talend.sdk.component.api.processor.AfterGroup;
040import org.talend.sdk.component.api.processor.ElementListener;
041import org.talend.sdk.component.api.processor.Output;
042import org.talend.sdk.component.api.processor.Processor;
043import org.talend.sdk.component.runtime.manager.ParameterMeta;
044import org.talend.sdk.component.runtime.manager.reflect.ParameterModelService;
045import org.talend.sdk.component.runtime.manager.reflect.parameterenricher.BaseParameterEnricher;
046import org.talend.sdk.component.runtime.manager.service.LocalConfigurationService;
047import org.talend.sdk.component.tools.validator.Validators.ValidatorHelper;
048
049public class DatasetValidator implements Validator {
050
051    private final Validators.ValidatorHelper helper;
052
053    public DatasetValidator(final ValidatorHelper helper) {
054        this.helper = helper;
055    }
056
057    @Override
058    public Stream<String> validate(final AnnotationFinder finder, final List<Class<?>> components) {
059        final List<Class<?>> datasetClasses = finder.findAnnotatedClasses(DataSet.class);
060        final Map<Class<?>, String> datasets =
061                datasetClasses.stream().collect(toMap(identity(), d -> d.getAnnotation(DataSet.class).value()));
062
063        final Stream<String> duplicated = this.duplicatedDataset(datasets.values());
064
065        final Stream<String> i18nError = datasets
066                .entrySet()
067                .stream()
068                .map(entry -> this.helper
069                        .validateFamilyI18nKey(entry.getKey(),
070                                "${family}.dataset." + entry.getValue() + "._displayName"))
071                .filter(Objects::nonNull);
072
073        // ensure there is always a source with a config matching without user entries each dataset
074        final Map<Class<?>, Collection<ParameterMeta>> componentNeedingADataSet = components
075                .stream()
076                .filter(c -> isSource(c) || isOutput(c))
077                .collect(toMap(identity(), helper::buildOrGetParameters));
078
079        final Map<? extends Class<?>, Collection<ParameterMeta>> inputs = componentNeedingADataSet
080                .entrySet()
081                .stream()
082                .filter(it -> isSource(it.getKey()))
083                .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
084
085        final Stream<String> sourceWithoutDataset = datasets
086                .entrySet()
087                .stream()
088                .filter(dataset -> inputs.isEmpty() || inputs.entrySet().stream().allMatch(input -> {
089                    final Collection<ParameterMeta> allProps = flatten(input.getValue()).collect(toList());
090                    final Collection<ParameterMeta> datasetProperties =
091                            findNestedDataSets(allProps, dataset.getValue()).collect(toList());
092                    return !datasetProperties.isEmpty() && allProps
093                            .stream()
094                            // .filter(it -> it.getType() != OBJECT && it.getType() != ARRAY) // should it be
095                            // done?
096                            .filter(it -> datasetProperties
097                                    .stream()
098                                    .noneMatch(dit -> it.getPath().equals(dit.getPath())
099                                            || it.getPath().startsWith(dit.getPath() + '.')))
100                            .anyMatch(this::isRequired);
101                }))
102                .map(dataset -> "No source instantiable without adding parameters for @DataSet(\"" + dataset.getValue()
103                        + "\") (" + dataset.getKey().getName() + "), please ensure at least a source using this "
104                        + "dataset can be used just filling the dataset information.")
105                .sorted();
106
107        // "cloud" rule - ensure all input/output have a dataset at least
108        final Stream<String> configWithoutDataset = componentNeedingADataSet
109                .entrySet()
110                .stream()
111                .filter(it -> flatten(it.getValue())
112                        .noneMatch((ParameterMeta prop) -> "dataset"
113                                .equals(prop.getMetadata().get("tcomp::configurationtype::type"))
114                                || "datasetDiscovery".equals(prop.getMetadata().get("tcomp::configurationtype::type"))))
115                .map(it -> "The component " + it.getKey().getName()
116                        + " is missing a dataset in its configuration (see @DataSet)")
117                .sorted();
118
119        // "cloud" rule - ensure all datasets have a datastore
120        final BaseParameterEnricher.Context context =
121                new BaseParameterEnricher.Context(new LocalConfigurationService(emptyList(), "tools"));
122        final Stream<String> withoutStore = datasetClasses
123                .stream()
124                .map((Class<?> ds) -> this.findDatasetWithoutDataStore(ds, context))
125                .filter(Objects::nonNull)
126                .sorted();
127        return Stream
128                .of(duplicated, i18nError, sourceWithoutDataset, configWithoutDataset, withoutStore)
129                .reduce(Stream::concat)
130                .orElseGet(Stream::empty);
131    }
132
133    private String findDatasetWithoutDataStore(final Class<?> ds, final BaseParameterEnricher.Context context) {
134        final List<ParameterMeta> dataset = helper
135                .getParameterModelService()
136                .buildParameterMetas(Stream.of(new ParameterModelService.Param(ds, ds.getAnnotations(), "dataset")), ds,
137                        ofNullable(ds.getPackage()).map(Package::getName).orElse(""), true, context);
138        if (flatten(dataset)
139                .noneMatch(prop -> "datastore".equals(prop.getMetadata().get("tcomp::configurationtype::type")))) {
140            return "The dataset " + ds.getName()
141                    + " is missing a datastore reference in its configuration (see @DataStore)";
142        }
143        return null;
144    }
145
146    protected static Stream<String> duplicatedDataset(final Collection<String> datasets) {
147
148        final Set<String> uniqueDatasets = new HashSet<>(datasets);
149        if (datasets.size() != uniqueDatasets.size()) {
150            return Stream
151                    .of("Duplicated DataSet found : " + datasets
152                            .stream()
153                            .collect(Collectors.groupingBy(identity()))
154                            .entrySet()
155                            .stream()
156                            .filter(e -> e.getValue().size() > 1)
157                            .map(Map.Entry::getKey)
158                            .collect(joining(", ")));
159        }
160        return Stream.empty();
161    }
162
163    protected static Stream<ParameterMeta> flatten(final Collection<ParameterMeta> options) {
164        return options
165                .stream()
166                .flatMap(it -> Stream
167                        .concat(Stream.of(it),
168                                it.getNestedParameters().isEmpty() ? empty() : flatten(it.getNestedParameters())));
169    }
170
171    private boolean isSource(final Class<?> component) {
172        return component.isAnnotationPresent(PartitionMapper.class) || component.isAnnotationPresent(Emitter.class);
173    }
174
175    private boolean isOutput(final Class<?> component) {
176        return component.isAnnotationPresent(Processor.class) && Stream
177                .of(component.getMethods())
178                .filter(it -> it.isAnnotationPresent(ElementListener.class) || it.isAnnotationPresent(AfterGroup.class))
179                .allMatch(it -> void.class == it.getReturnType()
180                        && Stream.of(it.getParameters()).noneMatch(param -> param.isAnnotationPresent(Output.class)));
181    }
182
183    private Stream<ParameterMeta> findNestedDataSets(final Collection<ParameterMeta> options, final String name) {
184        return options
185                .stream()
186                .filter(it -> "dataset".equals(it.getMetadata().get("tcomp::configurationtype::type"))
187                        && name.equals(it.getMetadata().get("tcomp::configurationtype::name")));
188    }
189
190    private boolean isRequired(final ParameterMeta parameterMeta) {
191        return Boolean.parseBoolean(parameterMeta.getMetadata().getOrDefault("tcomp::validation::required", "false"));
192    }
193
194}