001    /**
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.camel.dataformat.bindy.csv;
018    
019    import java.io.InputStream;
020    import java.io.InputStreamReader;
021    import java.io.OutputStream;
022    import java.util.ArrayList;
023    import java.util.Arrays;
024    import java.util.HashMap;
025    import java.util.Iterator;
026    import java.util.List;
027    import java.util.Map;
028    import java.util.Scanner;
029    
030    import org.apache.camel.Exchange;
031    import org.apache.camel.dataformat.bindy.BindyAbstractDataFormat;
032    import org.apache.camel.dataformat.bindy.BindyAbstractFactory;
033    import org.apache.camel.dataformat.bindy.BindyCsvFactory;
034    import org.apache.camel.dataformat.bindy.util.Converter;
035    import org.apache.camel.spi.DataFormat;
036    import org.apache.camel.spi.PackageScanClassResolver;
037    import org.apache.camel.util.IOHelper;
038    import org.apache.camel.util.ObjectHelper;
039    import org.slf4j.Logger;
040    import org.slf4j.LoggerFactory;
041    
042    /**
043     * A <a href="http://camel.apache.org/data-format.html">data format</a> (
044     * {@link DataFormat}) using Bindy to marshal to and from CSV files
045     */
046    public class BindyCsvDataFormat extends BindyAbstractDataFormat {
047        private static final transient Logger LOG = LoggerFactory.getLogger(BindyCsvDataFormat.class);
048    
049        public BindyCsvDataFormat() {
050        }
051    
052        public BindyCsvDataFormat(String... packages) {
053            super(packages);
054        }
055    
056        @SuppressWarnings("unchecked")
057        public void marshal(Exchange exchange, Object body, OutputStream outputStream) throws Exception {
058    
059            BindyCsvFactory factory = (BindyCsvFactory)getFactory(exchange.getContext().getPackageScanClassResolver());
060            ObjectHelper.notNull(factory, "not instantiated");
061    
062            // Get CRLF
063            byte[] bytesCRLF = Converter.getByteReturn(factory.getCarriageReturn());
064    
065            if (factory.getGenerateHeaderColumnNames()) {
066    
067                String result = factory.generateHeader();
068                byte[] bytes = exchange.getContext().getTypeConverter().convertTo(byte[].class, exchange, result);
069                outputStream.write(bytes);
070    
071                // Add a carriage return
072                outputStream.write(bytesCRLF);
073            }
074    
075            List<Map<String, Object>> models;
076    
077            // the body is not a prepared list so help a bit here and create one for us
078            if (exchange.getContext().getTypeConverter().convertTo(List.class, body) == null) {
079                models = new ArrayList<Map<String, Object>>();
080                Iterator it = ObjectHelper.createIterator(body);
081                while (it.hasNext()) {
082                    Object model = it.next();
083                    String name = model.getClass().getName();
084                    Map<String, Object> row = new HashMap<String, Object>();
085                    row.put(name, body);
086                    models.add(row);
087                }
088            } else {
089                // cast to the expected type
090                models = (List<Map<String, Object>>) body;
091            }
092    
093            for (Map<String, Object> model : models) {
094    
095                String result = factory.unbind(model);
096    
097                byte[] bytes = exchange.getContext().getTypeConverter().convertTo(byte[].class, exchange, result);
098                outputStream.write(bytes);
099    
100                // Add a carriage return
101                outputStream.write(bytesCRLF);
102            }
103        }
104    
105        public Object unmarshal(Exchange exchange, InputStream inputStream) throws Exception {
106            BindyCsvFactory factory = (BindyCsvFactory)getFactory(exchange.getContext().getPackageScanClassResolver());
107            ObjectHelper.notNull(factory, "not instantiated");
108    
109            // List of Pojos
110            List<Map<String, Object>> models = new ArrayList<Map<String, Object>>();
111    
112            // Pojos of the model
113            Map<String, Object> model;
114    
115            InputStreamReader in = new InputStreamReader(inputStream);
116    
117            // Scanner is used to read big file
118            Scanner scanner = new Scanner(in);
119    
120            // Retrieve the separator defined to split the record
121            String separator = factory.getSeparator();
122            ObjectHelper.notNull(separator, "The separator has not been defined in the annotation @CsvRecord or not instantiated during initModel.");
123    
124            int count = 0;
125            try {
126                // If the first line of the CSV file contains columns name, then we
127                // skip this line
128                if (factory.getSkipFirstLine()) {
129                    // Check if scanner is empty
130                    if (scanner.hasNextLine()) {
131                        scanner.nextLine();
132                    }
133                }
134    
135                while (scanner.hasNextLine()) {
136    
137                    // Read the line
138                    String line = scanner.nextLine().trim();
139    
140                    if (ObjectHelper.isEmpty(line)) {
141                        // skip if line is empty
142                        continue;
143                    }
144    
145                    // Increment counter
146                    count++;
147    
148                    // Create POJO where CSV data will be stored
149                    model = factory.factory();
150                    
151                    // Split the CSV record according to the separator defined in
152                    // annotated class @CSVRecord
153                    String[] tokens = line.split(separator, -1);
154                    List<String> result = Arrays.asList(tokens);
155                    // must unquote tokens before use
156                    result = unquoteTokens(result, separator);
157    
158                    if (result.size() == 0 || result.isEmpty()) {
159                        throw new java.lang.IllegalArgumentException("No records have been defined in the CSV");
160                    }
161    
162                    if (result.size() > 0) {
163                        if (LOG.isDebugEnabled()) {
164                            LOG.debug("Size of the record splitted : {}", result.size());
165                        }
166    
167                        // Bind data from CSV record with model classes
168                        factory.bind(result, model, count);
169    
170                        // Link objects together
171                        factory.link(model);
172    
173                        // Add objects graph to the list
174                        models.add(model);
175    
176                        LOG.debug("Graph of objects created: {}", model);
177                    }
178                }
179    
180                // Test if models list is empty or not
181                // If this is the case (correspond to an empty stream, ...)
182                if (models.size() == 0) {
183                    throw new java.lang.IllegalArgumentException("No records have been defined in the CSV");
184                } else {
185                    return models;
186                }
187    
188            } finally {
189                scanner.close();
190                IOHelper.close(in, "in", LOG);
191            }
192    
193        }
194    
195        /**
196         * Unquote the tokens, by removing leading and trailing quote chars,
197         * as will handling fixing broken tokens which may have been split
198         * by a separator inside a quote.
199         */
200        private List<String> unquoteTokens(List<String> result, String separator) {
201            // a current quoted token which we assemble from the broken pieces
202            // we need to do this as we use the split method on the String class
203            // to split the line using regular expression, and it does not handle
204            // if the separator char is also inside a quoted token, therefore we need
205            // to fix this afterwards
206            StringBuilder current = new StringBuilder();
207    
208            List<String> answer = new ArrayList<String>();
209            for (String s : result) {
210                boolean startQuote = false;
211                boolean endQuote = false;
212                if (s.startsWith("\"") || s.startsWith("'")) {
213                    s = s.substring(1);
214                    startQuote = true;
215                }
216                if (s.endsWith("\"") || s.endsWith("'")) {
217                    s = s.substring(0, s.length() - 1);
218                    endQuote = true;
219                }
220    
221                // are we in progress of rebuilding a broken token
222                boolean currentInProgress = current.length() > 0;
223    
224                // if we hit a start token then rebuild a broken token
225                if (currentInProgress || startQuote) {
226                    // append to current if we are in the middle of a start quote
227                    if (currentInProgress) {
228                        // must append separator back as this is a quoted token that was broken
229                        // but a separator inside the quotes
230                        current.append(separator);
231                    }
232                    current.append(s);
233                }
234    
235                // are we in progress of rebuilding a broken token
236                currentInProgress = current.length() > 0;
237    
238                if (endQuote) {
239                    // we hit end quote so append current and reset it
240                    answer.add(current.toString());
241                    current.setLength(0);
242                } else if (!currentInProgress) {
243                    // not rebuilding so add directly as is
244                    answer.add(s);
245                }
246            }
247    
248            // any left over from current?
249            if (current.length() > 0) {
250                answer.add(current.toString());
251                current.setLength(0);
252            }
253    
254            return answer;
255        }
256    
257        protected BindyAbstractFactory createModelFactory(PackageScanClassResolver resolver) throws Exception {
258            return new BindyCsvFactory(resolver, getPackages());
259        }
260    }