001/*
002 *  Licensed to the Apache Software Foundation (ASF) under one
003 *  or more contributor license agreements.  See the NOTICE file
004 *  distributed with this work for additional information
005 *  regarding copyright ownership.  The ASF licenses this file
006 *  to you under the Apache License, Version 2.0 (the
007 *  "License"); you may not use this file except in compliance
008 *  with the License.  You may obtain a copy of the License at
009 *  
010 *    http://www.apache.org/licenses/LICENSE-2.0
011 *  
012 *  Unless required by applicable law or agreed to in writing,
013 *  software distributed under the License is distributed on an
014 *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
015 *  KIND, either express or implied.  See the License for the
016 *  specific language governing permissions and limitations
017 *  under the License. 
018 *  
019 */
020package org.apache.directory.server.xdbm.search.impl;
021
022
023import java.io.IOException;
024import java.util.HashSet;
025import java.util.List;
026import java.util.Set;
027
028import org.apache.directory.api.ldap.model.constants.SchemaConstants;
029import org.apache.directory.api.ldap.model.cursor.Cursor;
030import org.apache.directory.api.ldap.model.exception.LdapException;
031import org.apache.directory.api.ldap.model.exception.LdapOtherException;
032import org.apache.directory.api.ldap.model.filter.AndNode;
033import org.apache.directory.api.ldap.model.filter.ApproximateNode;
034import org.apache.directory.api.ldap.model.filter.AssertionNode;
035import org.apache.directory.api.ldap.model.filter.BranchNode;
036import org.apache.directory.api.ldap.model.filter.EqualityNode;
037import org.apache.directory.api.ldap.model.filter.ExprNode;
038import org.apache.directory.api.ldap.model.filter.ExtensibleNode;
039import org.apache.directory.api.ldap.model.filter.GreaterEqNode;
040import org.apache.directory.api.ldap.model.filter.LeafNode;
041import org.apache.directory.api.ldap.model.filter.LessEqNode;
042import org.apache.directory.api.ldap.model.filter.NotNode;
043import org.apache.directory.api.ldap.model.filter.OrNode;
044import org.apache.directory.api.ldap.model.filter.PresenceNode;
045import org.apache.directory.api.ldap.model.filter.ScopeNode;
046import org.apache.directory.api.ldap.model.filter.SimpleNode;
047import org.apache.directory.api.ldap.model.filter.SubstringNode;
048import org.apache.directory.api.util.Strings;
049import org.apache.directory.server.core.api.partition.Partition;
050import org.apache.directory.server.core.api.partition.PartitionTxn;
051import org.apache.directory.server.i18n.I18n;
052import org.apache.directory.server.xdbm.Index;
053import org.apache.directory.server.xdbm.IndexNotFoundException;
054import org.apache.directory.server.xdbm.Store;
055import org.apache.directory.server.xdbm.search.Optimizer;
056
057
058/**
059 * Optimizer that annotates the filter using scan counts.
060 * 
061 * @author <a href="mailto:dev@directory.apache.org">Apache Directory Project</a>
062 */
063public class DefaultOptimizer implements Optimizer
064{
065    static final String CANDIDATES_ANNOTATION_KEY = "candidates";
066
067    /** the database this optimizer operates on */
068    private final Store db;
069    private String contextEntryId;
070
071
072    /**
073     * Creates an optimizer on a database.
074     *
075     * @param db the database this optimizer works for.
076     */
077    public DefaultOptimizer( Store db )
078    {
079        this.db = db;
080    }
081
082
083    // This will suppress PMD.EmptyCatchBlock warnings in this method
084    @SuppressWarnings("PMD.EmptyCatchBlock")
085    private String getContextEntryId( PartitionTxn partitionTxn ) throws LdapException
086    {
087        if ( contextEntryId == null )
088        {
089            try
090            {
091                this.contextEntryId = db.getEntryId( partitionTxn, ( ( Partition ) db ).getSuffixDn() );
092            }
093            catch ( Exception e )
094            {
095                // might not have been created
096            }
097        }
098
099        if ( contextEntryId == null )
100        {
101            return Partition.DEFAULT_ID;
102        }
103
104        return contextEntryId;
105    }
106
107
108    /**
109     * Annotates the expression tree to determine optimal evaluation order based
110     * on the scan count for indices that exist for each expression node.  If an
111     * index on the attribute does not exist an IndexNotFoundException will be
112     * thrown.
113     *
114     * {@inheritDoc}
115     */
116    @Override
117    @SuppressWarnings("unchecked")
118    public Long annotate( PartitionTxn partitionTxn, ExprNode node ) throws LdapException
119    {
120        // Start off with the worst case unless scan count says otherwise.
121        Long count = Long.MAX_VALUE;
122
123        /* --------------------------------------------------------------------
124         *                 H A N D L E   L E A F   N O D E S          
125         * --------------------------------------------------------------------
126         * 
127         * Each leaf node is based on an attribute and it represents a condition
128         * that needs to be statisfied.  We ask the index (if one exists) for 
129         * the attribute to give us a scan count of all the candidates that 
130         * would satisfy the attribute assertion represented by the leaf node.
131         * 
132         * This is conducted differently based on the type of the leaf node.
133         * Comments on each node type explain how each scan count is arrived at.
134         */
135
136        if ( node instanceof ScopeNode )
137        {
138            count = getScopeScan( partitionTxn, ( ScopeNode ) node );
139        }
140        else if ( node instanceof AssertionNode )
141        {
142            /* 
143             * Leave it up to the assertion node to determine just how much it
144             * will cost us.  Anyway it defaults to a maximum scan count if a
145             * scan count is not specified by the implementation.
146             */
147        }
148        else if ( node.isLeaf() )
149        {
150            LeafNode leaf = ( LeafNode ) node;
151
152            try
153            {  
154                if ( node instanceof PresenceNode )
155                {
156                    count = getPresenceScan( partitionTxn, ( PresenceNode ) leaf );
157                }
158                else if ( node instanceof EqualityNode )
159                {
160                    count = getEqualityScan( partitionTxn, ( EqualityNode ) leaf );
161                }
162                else if ( node instanceof GreaterEqNode )
163                {
164                    count = getGreaterLessScan( partitionTxn, ( GreaterEqNode ) leaf, SimpleNode.EVAL_GREATER );
165                }
166                else if ( node instanceof LessEqNode )
167                {
168                    count = getGreaterLessScan( partitionTxn, ( SimpleNode ) leaf, SimpleNode.EVAL_LESSER );
169                }
170                else if ( node instanceof SubstringNode )
171                {
172                    /** Cannot really say so we presume the total index count */
173                    count = getSubstringScan( partitionTxn, ( SubstringNode ) leaf );
174                }
175                else if ( node instanceof ExtensibleNode )
176                {
177                    /** Cannot really say so we presume the total index count */
178                    count = getFullScan( partitionTxn, leaf );
179                }
180                else if ( node instanceof ApproximateNode )
181                {
182                    /** Feature not implemented so we just use equality matching */
183                    count = getEqualityScan( partitionTxn, ( ApproximateNode ) leaf );
184                }
185                else
186                {
187                    throw new IllegalArgumentException( I18n.err( I18n.ERR_711 ) );
188                }
189            }
190            catch ( IndexNotFoundException | IOException e )
191            {
192                throw new LdapOtherException( e.getMessage(), e );
193            }
194        }
195        // --------------------------------------------------------------------
196        //                 H A N D L E   B R A N C H   N O D E S       
197        // --------------------------------------------------------------------
198        else
199        {
200            if ( node instanceof AndNode )
201            {
202                count = getConjunctionScan( partitionTxn, ( AndNode ) node );
203            }
204            else if ( node instanceof OrNode )
205            {
206                count = getDisjunctionScan( partitionTxn, ( OrNode ) node );
207            }
208            else if ( node instanceof NotNode )
209            {
210                annotate( partitionTxn, ( ( NotNode ) node ).getFirstChild() );
211
212                /*
213                 * A negation filter is always worst case since we will have
214                 * to retrieve all entries from the master table then test
215                 * each one against the negated child filter.  There is no way
216                 * to use the indices.
217                 */
218                count = Long.MAX_VALUE;
219            }
220            else
221            {
222                throw new IllegalArgumentException( I18n.err( I18n.ERR_712 ) );
223            }
224        }
225
226        // Protect against overflow when counting.
227        if ( count < 0L )
228        {
229            count = Long.MAX_VALUE;
230        }
231
232        node.set( "count", count );
233
234        return count;
235    }
236
237
238    /**
239     * ANDs or Conjunctions take the count of the smallest child as their count.
240     * This is the best that a conjunction can do and should be used rather than
241     * the worst case. Notice that we annotate the child node with a recursive 
242     * call before accessing its count parameter making the chain recursion 
243     * depth first.
244     *
245     * @param node a AND (Conjunction) BranchNode
246     * @return the calculated scan count
247     * @throws Exception if there is an error
248     */
249    private long getConjunctionScan( PartitionTxn partitionTxn, BranchNode node ) throws LdapException
250    {
251        long count = Long.MAX_VALUE;
252        List<ExprNode> children = node.getChildren();
253
254        for ( ExprNode child : children )
255        {
256            if ( ( count == 1 ) && ( child instanceof ScopeNode ) )
257            {
258                // We can stop here
259                break;
260            }
261
262            annotate( partitionTxn, child );
263            count = Math.min( ( ( Long ) child.get( "count" ) ), count );
264
265            if ( count == 0 )
266            {
267                // No need to continue
268                break;
269            }
270        }
271
272        return count;
273    }
274
275
276    /**
277     * Disjunctions (OR) are the union of candidates across all subexpressions 
278     * so we add all the counts of the child nodes. Notice that we annotate the 
279     * child node with a recursive call.
280     *
281     * @param node the OR branch node
282     * @return the scan count on the OR node
283     * @throws Exception if there is an error
284     */
285    private long getDisjunctionScan( PartitionTxn partitionTxn, BranchNode node ) throws LdapException
286    {
287        List<ExprNode> children = node.getChildren();
288        long total = 0L;
289
290        for ( ExprNode child : children )
291        {
292            annotate( partitionTxn, child );
293            total += ( Long ) child.get( "count" );
294
295            if ( total == Long.MAX_VALUE )
296            {
297                // We can stop here withoit evaluating the following filters
298                break;
299            }
300        }
301
302        return total;
303    }
304
305
306    /**
307     * Gets the worst case scan count for all entries that satisfy the equality
308     * assertion in the SimpleNode argument.  
309     *
310     * @param node the node to get a scan count for 
311     * @return the worst case
312     * @throws Exception if there is an error accessing an index
313     */
314    @SuppressWarnings("unchecked")
315    private <V> long getEqualityScan( PartitionTxn partitionTxn, SimpleNode<V> node ) throws LdapException, IndexNotFoundException, IOException
316    {
317        if ( db.hasIndexOn( node.getAttributeType() ) )
318        {
319            Index<V, String> idx = ( Index<V, String> ) db.getIndex( node.getAttributeType() );
320
321            String normalizedKey;
322            
323            if ( node.getValue().isSchemaAware() )
324            {
325                normalizedKey = node.getValue().getNormalized();
326            }
327            else
328            {
329                normalizedKey = node.getAttributeType().getEquality().getNormalizer().normalize( node.getValue().getValue() );
330            }
331            
332            Cursor<String> result = idx.forwardValueCursor( partitionTxn, ( V ) normalizedKey );
333            Set<String> values = new HashSet<>();
334            int nbFound = 0;
335
336            for ( String value : result )
337            {
338                values.add( value );
339                nbFound++;
340
341                // Arbitrary stop gathering the candidates if we have more than 100
342                if ( nbFound == 100 )
343                {
344                    break;
345                }
346            }
347
348            result.close();
349
350            if ( nbFound < 100 )
351            {
352                // Store the found candidates in the node
353                node.set( CANDIDATES_ANNOTATION_KEY, values );
354
355                return values.size();
356            }
357            else
358            {
359                // Reset the candidates annotation
360                node.set( CANDIDATES_ANNOTATION_KEY, null );
361
362                return idx.count( partitionTxn, ( V ) node.getValue().getNormalized() );
363            }
364        }
365
366        // count for non-indexed attribute is unknown so we presume da worst
367        return Long.MAX_VALUE;
368    }
369
370
371    /**
372     * Gets a scan count of the nodes that satisfy the greater or less than test
373     * specified by the node.
374     *
375     * @param node the greater or less than node to get a count for 
376     * @param isGreaterThan if true test is for >=, otherwise <=
377     * @return the scan count of all nodes satisfying the Ava
378     * @throws Exception if there is an error accessing an index
379     */
380    @SuppressWarnings("unchecked")
381    private <V> long getGreaterLessScan( PartitionTxn partitionTxn, SimpleNode<V> node, boolean isGreaterThan ) throws LdapException, IndexNotFoundException
382    {
383        if ( db.hasIndexOn( node.getAttributeType() ) )
384        {
385            Index<V, String> idx = ( Index<V, String> ) db.getIndex( node.getAttributeType() );
386
387            if ( isGreaterThan )
388            {
389                return idx.greaterThanCount( partitionTxn, ( V ) node.getValue().getValue() );
390            }
391            else
392            {
393                return idx.lessThanCount( partitionTxn, ( V ) node.getValue().getValue() );
394            }
395        }
396
397        // count for non-indexed attribute is unknown so we presume da worst
398        return Long.MAX_VALUE;
399    }
400
401
402    /**
403     * Get a scan count based on a Substring node : we will count the entries that are greater
404     * than ABC where the filter is (attr=ABC*). Any other filter won't be evaluated (for instance,
405     * a filter like (attr=*ABC) will resolve to a full scan atm - we could have created a reverted
406     * index for such a case -, and filters like (attr=*ABC*) also esolve to a full scan).
407     * 
408     * @param node The substring node
409     * @return The number of candidates
410     * @throws Exception If there is an error accessing an index
411     */
412    private long getSubstringScan( PartitionTxn partitionTxn, SubstringNode node ) throws LdapException, IndexNotFoundException
413    {
414        if ( db.hasIndexOn( node.getAttributeType() ) )
415        {
416            Index<String, String> idx = ( Index<String, String> ) db.getIndex( node.getAttributeType() );
417
418            String initial = node.getInitial();
419
420            if ( Strings.isEmpty( initial ) )
421            {
422                // Not a (attr=ABC*) filter : full index scan
423                return idx.count( partitionTxn );
424            }
425            else
426            {
427                return idx.greaterThanCount( partitionTxn, initial );
428            }
429        }
430        else
431        {
432            // count for non-indexed attribute is unknown so we presume da worst
433            return Long.MAX_VALUE;
434        }
435    }
436
437
438    /**
439     * Gets the total number of entries within the database index if one is 
440     * available otherwise the count of all the entries within the database is
441     * returned.
442     *
443     * @param node the leaf node to get a full scan count for 
444     * @return the worst case full scan count
445     * @throws Exception if there is an error access database indices
446     */
447    private long getFullScan( PartitionTxn partitionTxn, LeafNode node ) throws LdapException, IndexNotFoundException
448    {
449        if ( db.hasIndexOn( node.getAttributeType() ) )
450        {
451            Index<?, ?> idx = db.getIndex( node.getAttributeType() );
452            return idx.count( partitionTxn );
453        }
454
455        return Long.MAX_VALUE;
456    }
457
458
459    /**
460     * Gets the number of entries that would be returned by a presence node
461     * assertion.  Leverages the presence system index for scan counts.
462     *
463     * @param node the presence node
464     * @return the number of entries matched for the presence of an attribute
465     * @throws Exception if errors result
466     */
467    private long getPresenceScan( PartitionTxn partitionTxn, PresenceNode node ) throws LdapException
468    {
469        if ( db.hasUserIndexOn( node.getAttributeType() )
470             || node.getAttributeType().getOid().equals( SchemaConstants.ADMINISTRATIVE_ROLE_AT_OID ) )
471        {
472            Index<String, String> presenceIndex = db.getPresenceIndex();
473
474            return presenceIndex.count( partitionTxn, node.getAttributeType().getOid() );
475        }
476        else if ( db.hasSystemIndexOn( node.getAttributeType() )
477            || ( node.getAttributeType().getOid() == SchemaConstants.ENTRY_UUID_AT_OID ) )
478        {
479            // the system indices (objectClass, entryUUID and entryCSN) are maintained for
480            // each entry, so we could just return the database count
481            return db.count( partitionTxn );
482        }
483
484        return Long.MAX_VALUE;
485    }
486
487
488    /**
489     * Gets the scan count for the scope node attached to this filter.
490     *
491     * @param node the ScopeNode
492     * @return the scan count for scope
493     * @throws Exception if any errors result
494     */
495    private long getScopeScan( PartitionTxn partitionTxn, ScopeNode node ) throws LdapException
496    {
497        String id = node.getBaseId();
498
499        switch ( node.getScope() )
500        {
501            case OBJECT:
502                return 1L;
503
504            case ONELEVEL:
505                return db.getChildCount( partitionTxn, id );
506
507            case SUBTREE:
508                if ( id == getContextEntryId( partitionTxn ) )
509                {
510                    return db.count( partitionTxn );
511                }
512                else
513                {
514                    return db.getRdnIndex().reverseLookup( partitionTxn, id ).getNbDescendants() + 1L;
515                }
516
517            default:
518                throw new IllegalArgumentException( I18n.err( I18n.ERR_713 ) );
519        }
520    }
521}