Dealing with large content

Have you ever tried to download/upload large (say 2Gb and more) content through DFC? I have tried and got frustrated:

  • getContent(|Ex(|2|3)) methods operate through ByteArrayInputStream, so these methods consume a lot of memory:
     ~]$ cat Test.java
    import com.documentum.com.DfClientX;
    import com.documentum.fc.client.IDfSession;
    import com.documentum.fc.client.IDfSysObject;
    import com.documentum.fc.common.DfId;
    import com.documentum.fc.common.DfLoginInfo;
    
    /**
     * @author Andrey B. Panfilov <andrew@panfilov.tel>
     */
    public class Test {
    
        public static void main(String[] args) throws Exception {
            IDfSession session = new DfClientX().getLocalClient().newSession(
                    "ssc_dev", new DfLoginInfo("dmadmin", "dmadmin"));
            IDfSysObject object = (IDfSysObject) session.getObject(new DfId(
                    "0901ffd7803542a2"));
            object.getContent();
        }
    
    }
     ~]$ javac Test.java
     ~]$ java Test
    Exception in thread "main" java.lang.OutOfMemoryError: Java heap space
            at java.util.Arrays.copyOf(Arrays.java:2786)
            at java.io.ByteArrayOutputStream.write(ByteArrayOutputStream.java:94)
           ...
    
  • getFile(|Ex(|2)) methods are memory efficient, but require temporary files:
     ~]$ cat Test.java
    import java.io.File;
    
    import com.documentum.com.DfClientX;
    import com.documentum.fc.client.IDfSession;
    import com.documentum.fc.client.IDfSysObject;
    import com.documentum.fc.common.DfId;
    import com.documentum.fc.common.DfLoginInfo;
    
    /**
     * @author Andrey B. Panfilov <andrew@panfilov.tel>
     */
    public class Test {
    
        public static void main(String[] args) throws Exception {
            IDfSession session = new DfClientX().getLocalClient().newSession(
                    "ssc_dev", new DfLoginInfo("dmadmin", "dmadmin"));
            IDfSysObject object = (IDfSysObject) session.getObject(new DfId(
                    "0901ffd7803542a2"));
            File tmp = new File(object.getFile(null));
            System.out.println("File size: " + tmp.length());
        }
    
    }
     ~]$ javac Test.java
     ~]$ java Test
    File size: 4294967296
    
  • DFC also has getCollectionForContent(Ex(2|3|4)) methods, but I hate how these methods are implemented:
     ~]$ cat Test.java
    import java.io.ByteArrayInputStream;
    
    import com.documentum.com.DfClientX;
    import com.documentum.fc.client.IDfCollection;
    import com.documentum.fc.client.IDfSession;
    import com.documentum.fc.client.IDfSysObject;
    import com.documentum.fc.common.DfId;
    import com.documentum.fc.common.DfLoginInfo;
    
    /**
     * @author Andrey B. Panfilov <andrew@panfilov.tel>
     */
    public class Test {
    
        public static void main(String[] args) throws Exception {
            IDfSession session = new DfClientX().getLocalClient().newSession(
                    "ssc_dev", new DfLoginInfo("dmadmin", "dmadmin"));
            IDfSysObject object = (IDfSysObject) session.getObject(new DfId(
                    "0901ffd7803542a2"));
            IDfCollection collection = null;
            try {
                collection = object.getCollectionForContent(null, 0);
                long total = 0;
                while (collection.next()) {
                    ByteArrayInputStream baos = collection.getBytesBuffer(null,
                            null, null, 0);
                    total += baos.available();
                }
                System.out.println("Bytes read: " + total);
            } finally {
                if (collection != null) {
                    collection.close();
                }
            }
        }
    
    }
     ~]$ javac Test.java
    [dmadmin@docu70dev01 ~]$ java Test
    Bytes read: 4294967296
    

So, I decided to investigate how the same is implemented in EMC products, for example, I found that REST services do not create temporary files and do not consume memory:

 ~]$ wget "http://.../0901ffd7803542a2/content-media?format=crtext&amp;modifier=&amp;page=0"
--2014-08-12 20:23:33--  http://...
Распознаётся localhost... ::1, 127.0.0.1
Устанавливается соединение с localhost|::1|:8280... соединение установлено.
Запрос HTTP послан, ожидается ответ... 401 Unauthorized
Устанавливается соединение с localhost|::1|:8280... соединение установлено.
Запрос HTTP послан, ожидается ответ... 200 OK
Длина: 4294967296 (4,0G) [text/plain]
Saving to: «/dev/null»

100%[====================================>] 4 294 967 296 74,5M/s   в 55s

2014-08-12 20:24:27 (75,0 MB/s) - «/dev/null» saved [4294967296/4294967296]

Moreover, it executes with following stack:

at com.documentum.fc.client.impl.connection.docbase.DocbaseConnection.getBlock(DocbaseConnection.java:1463)
- locked <0x00000007e4238e78> (a com.documentum.fc.client.impl.connection.docbase.DocbaseConnection)
at com.documentum.fc.client.impl.connection.docbase.RawPuller.getBlock(RawPuller.java:52)
- locked <0x00000007e4238e78> (a com.documentum.fc.client.impl.connection.docbase.DocbaseConnection)
at com.documentum.fc.client.content.impl.BlockPuller.nextBlock(BlockPuller.java:49)
at com.documentum.fc.client.content.impl.PullerInputStream.getNextBuffer(PullerInputStream.java:73)
at com.documentum.fc.client.content.impl.PullerInputStream.ensureBufferHasData(PullerInputStream.java:63)
at com.documentum.fc.client.content.impl.PullerInputStream.read(PullerInputStream.java:88)
at java.io.FilterInputStream.read(FilterInputStream.java:116)
at java.io.FilterInputStream.read(FilterInputStream.java:116)
at java.io.FilterInputStream.read(FilterInputStream.java:90)
at org.springframework.util.StreamUtils.copy(StreamUtils.java:124)
at org.springframework.util.FileCopyUtils.copy(FileCopyUtils.java:109)

which differs from getCollectionForContentEx stack:

at com.documentum.fc.client.impl.connection.netwise.AbstractNetwiseRpcClient.sendMessage(AbstractNetwiseRpcClient.java:211)
at com.documentum.fc.client.impl.connection.docbase.netwise.NetwiseDocbaseRpcClient.getBlock(NetwiseDocbaseRpcClient.java:1022)
- locked <0xb3ebedd8> (a com.documentum.fc.client.impl.connection.docbase.netwise.NetwiseDocbaseRpcClient)
at com.documentum.fc.client.impl.connection.docbase.DocbaseConnection.getBlock(DocbaseConnection.java:1406)
- locked <0xb3e820c0> (a com.documentum.fc.client.impl.connection.docbase.DocbaseConnection)
at com.documentum.fc.client.impl.connection.docbase.RawPuller.getBlock(RawPuller.java:52)
- locked <0xb3e820c0> (a com.documentum.fc.client.impl.connection.docbase.DocbaseConnection)
at com.documentum.fc.client.content.impl.BlockPuller.nextBlock(BlockPuller.java:49)
at com.documentum.fc.client.content.impl.PullerInputStream.getNextBuffer(PullerInputStream.java:73)
at com.documentum.fc.client.content.impl.PullerInputStream.ensureBufferHasData(PullerInputStream.java:63)
at com.documentum.fc.client.content.impl.PullerInputStream.available(PullerInputStream.java:29)
at java.io.FilterInputStream.available(FilterInputStream.java:142)
at java.io.FilterInputStream.available(FilterInputStream.java:142)
at com.documentum.fc.client.content.impl.ContentIterator.determineNextBufferSize(ContentIterator.java:150)
at com.documentum.fc.client.content.impl.ContentIterator.hasNext(ContentIterator.java:42)
at com.documentum.fc.client.impl.collection.TypedDataCollection.next(TypedDataCollection.java:131)
- locked <0xb44f3488> (a com.documentum.fc.client.content.impl.ContentCollection)
at com.documentum.fc.client.impl.collection.CollectionHandle.next(CollectionHandle.java:47)

How they do that? The answer is simple: DfSysObject implements ISysObjectInternal interfaces, which has a lot of useful methods (in current context useful methods are: getStream/setStream/setStreamEx):

 ~]$ cat Test.java
import java.io.InputStream;

import com.documentum.com.DfClientX;
import com.documentum.fc.client.IDfSession;
import com.documentum.fc.client.IDfSysObject;
import com.documentum.fc.client.internal.ISysObjectInternal;
import com.documentum.fc.common.DfId;
import com.documentum.fc.common.DfLoginInfo;

/**
 * @author Andrey B. Panfilov <andrew@panfilov.tel>
 */
public class Test {

    public static void main(String[] args) throws Exception {
        IDfSession session = new DfClientX().getLocalClient().newSession(
                "ssc_dev", new DfLoginInfo("dmadmin", "dmadmin"));
        IDfSysObject object = (IDfSysObject) session.getObject(new DfId(
                "0901ffd7803542a2"));
        InputStream stream = null;
        try {
            stream = ((ISysObjectInternal) object).getStream(null, 0, null,
                    false);
            byte[] buffer = new byte[65536];
            long total = 0;
            int read = 0;
            while ((read = stream.read(buffer)) > -1) {
                total += Math.max(read, 0);
            }
            System.out.println("Bytes read: " + total);
        } finally {
            if (stream != null) {
                stream.close();
            }
        }
    }
}
 ~]$ javac Test.java
 ~]$ java Test
Bytes read: 4294967296

2 thoughts on “Dealing with large content

  1. Pingback: Q & A. XI | Documentum in a (nuts)HELL
  2. Pingback: Explanation for dfc.diagnostics.resources.enable | Documentum in a (nuts)HELL

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s