| Back | Main view

Content may not get Full Text indexed due to low memory condition

Product:IMiS/ARChive
Release:9.7.x
Date:04/04/2017

Case: - Full text searches do not product all results that should be returned
- Detailed examination shows that content is not indexed
- Warning message "java.lang.OutOfMemoryError" appears in the server log

Description:

An "java.lang.OutOfMemoryError" error may occur during full text indexing process. This indicates that full text indexing service ran out of Java heap size. In order to successfully index content files for Full Text queries, additional memory should be assigned to the Java subsystem.

To increase the Java heap size, you should increase the -Xmx value in JVMOptions parameter in iarc.conf file and restart the IMiS/ARChive Server. Default heap size value is 512 megabytes.

Example of default iarc.conf value:

JVMOptions=-Xmx512m

Example of increased value:

JVMOptions=-Xmx1024m


Examples of Java stack trace errors in server log:

[iarcd:31768:3289365360:FullTextIndexService.cpp:FullTextIndexService::Stage1:267] WARN[4] FullTextIndexService[QueueWorker]: Unable to parse file '102011964' ('/iarc/vol/vol20/194/0614943c'). Reason: 'Error extracting content. Reason: java.lang.OutOfMemoryError: GC overhead limit exceeded
  java.util.Arrays.copyOfRange(Arrays.java:3664)
  java.lang.String.<init>(String.java:207)
  java.lang.String.substring(String.java:1969)
  org.apache.xmlbeans.impl.store.Locale$SaxHandler.startElement(Locale.java:3267)
  org.apache.xmlbeans.impl.piccolo.xml.Piccolo.reportStartTag(Piccolo.java:1082)
  org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.parseAttributesNS(PiccoloLexer.java:1802)
  org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.parseOpenTagNS(PiccoloLexer.java:1521)
  org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.parseTagNS(PiccoloLexer.java:1362)
  org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.parseXMLNS(PiccoloLexer.java:1293)
  org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.parseXML(PiccoloLexer.java:1261)
  org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.yylex(PiccoloLexer.java:4812)
  org.apache.xmlbeans.impl.piccolo.xml.Piccolo.yylex(Piccolo.java:1290)
  org.apache.xmlbeans.impl.piccolo.xml.Piccolo.yyparse(Piccolo.java:1400)
  org.apache.xmlbeans.impl.piccolo.xml.Piccolo.parse(Piccolo.java:714)
  org.apache.xmlbeans.impl.store.Locale$SaxLoader.load(Locale.java:3479)
  org.apache.xmlbeans.impl.store.Locale.parseToXmlObject(Locale.java:1277)
  org.apache.xmlbeans.impl.store.Locale.parseToXmlObject(Locale.java:1264)
 org.apache.xmlbeans.impl.schema.SchemaTypeLoaderBase.parse(SchemaTypeLoaderBase.java:345)
org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument$Factory.parse(Unknown Source)
  org.apache.poi.xwpf.usermodel.XWPFDocument.onDocumentRead(XWPFDocument.java:158)
  org.apache.poi.POIXMLDocument.load(POIXMLDocument.java:177)
  org.apache.poi.xwpf.usermodel.XWPFDocument.<init>(XWPFDocument.java:119)
  org.apache.poi.xwpf.extractor.XWPFWordExtractor.<init>(XWPFWordExtractor.java:58)
  org.apache.poi.extractor.ExtractorFactory.createExtractor(ExtractorFactory.java:204)
org.apache.tika.parser.microsoft.ooxml.OOXMLExtractorFactory.parse(OOXMLExtractorFactory.java:86)
  org.apache.tika.parser.microsoft.ooxml.OOXMLParser.parse(OOXMLParser.java:87)
  org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
  org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
  org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:120)
  com.imis.imisarc.server.parser.impl.ContentParser.extractContent(ContentParser.java:302)


[iarcd:31768:3289365360:FullTextIndexService.cpp:FullTextIndexService::Stage1:267] WARN[4] FullTextIndexService[QueueWorker]: Unable to parse file '102011964' ('/iarc/vol/vol20/194/0614943c'). Reason: 'Error extracting content. Reason: java.lang.OutOfMemoryError: Java heap space
  java.util.Arrays.copyOfRange(Arrays.java:3664)
  java.lang.String.<init>(String.java:207)
  java.lang.String.substring(String.java:1933)
  org.apache.xmlbeans.impl.store.Locale$SaxHandler.startElement(Locale.java:3267)
  org.apache.xmlbeans.impl.piccolo.xml.Piccolo.reportStartTag(Piccolo.java:1082)
  org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.parseAttributesNS(PiccoloLexer.java:1802)
  org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.parseOpenTagNS(PiccoloLexer.java:1521)
  org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.parseTagNS(PiccoloLexer.java:1362)
  org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.parseXMLNS(PiccoloLexer.java:1293)
  org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.parseXML(PiccoloLexer.java:1261)
  org.apache.xmlbeans.impl.piccolo.xml.PiccoloLexer.yylex(PiccoloLexer.java:4812)
  org.apache.xmlbeans.impl.piccolo.xml.Piccolo.yylex(Piccolo.java:1290)
  org.apache.xmlbeans.impl.piccolo.xml.Piccolo.yyparse(Piccolo.java:1400)
  org.apache.xmlbeans.impl.piccolo.xml.Piccolo.parse(Piccolo.java:714)
  org.apache.xmlbeans.impl.store.Locale$SaxLoader.load(Locale.java:3479)
  org.apache.xmlbeans.impl.store.Locale.parseToXmlObject(Locale.java:1277)
  org.apache.xmlbeans.impl.store.Locale.parseToXmlObject(Locale.java:1264)
 org.apache.xmlbeans.impl.schema.SchemaTypeLoaderBase.parse(SchemaTypeLoaderBase.java:345)
org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument$Factory.parse(Unknown Source)
  org.apache.poi.xwpf.usermodel.XWPFDocument.onDocumentRead(XWPFDocument.java:158)
  org.apache.poi.POIXMLDocument.load(POIXMLDocument.java:177)
  org.apache.poi.xwpf.usermodel.XWPFDocument.<init>(XWPFDocument.java:119)
  org.apache.poi.xwpf.extractor.XWPFWordExtractor.<init>(XWPFWordExtractor.java:58)
  org.apache.poi.extractor.ExtractorFactory.createExtractor(ExtractorFactory.java:204)
org.apache.tika.parser.microsoft.ooxml.OOXMLExtractorFactory.parse(OOXMLExtractorFactory.java:86)
  org.apache.tika.parser.microsoft.ooxml.OOXMLParser.parse(OOXMLParser.java:87)
  org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
  org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
  org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:120)
  com.imis.imisarc.server.parser.impl.ContentParser.extractContent(ContentParser.java:302)


[iarcd:32212:3255810928:FullTextIndexService.cpp:FullTextIndexService::Stage1:267] WARN[4] FullTextIndexService[QueueWorker]: Unable to parse file '102011964' ('/iarc/vol/vol20/194/0614
943c'). Reason: 'Error extracting content. Reason: java.lang.OutOfMemoryError: GC overhead limit exceeded
  org.apache.xmlbeans.impl.common.XPath$ExecutionContext.init(XPath.java:63)
  org.apache.xmlbeans.impl.store.Path$XbeanPathEngine.<init>(Path.java:452)
  org.apache.xmlbeans.impl.store.Path$XbeanPath.execute(Path.java:305)
  org.apache.xmlbeans.impl.store.Cursor._selectPath(Cursor.java:902)
  org.apache.xmlbeans.impl.store.Cursor._selectPath(Cursor.java:894)
  org.apache.xmlbeans.impl.store.Cursor.selectPath(Cursor.java:2616)
  org.apache.poi.xwpf.usermodel.XWPFRun.text(XWPFRun.java:1040)
  org.apache.poi.xwpf.usermodel.XWPFRun.toString(XWPFRun.java:1027)
org.apache.tika.parser.microsoft.ooxml.XWPFWordExtractorDecorator.processRun(XWPFWordExtractorDecorator.java:322)
org.apache.tika.parser.microsoft.ooxml.XWPFWordExtractorDecorator.extractParagraph(XWPFWordExtractorDecorator.java:225)
org.apache.tika.parser.microsoft.ooxml.XWPFWordExtractorDecorator.extractIBodyText(XWPFWordExtractorDecorator.java:107)
org.apache.tika.parser.microsoft.ooxml.XWPFWordExtractorDecorator.buildXHTML(XWPFWordExtractorDecorator.java:93)
org.apache.tika.parser.microsoft.ooxml.AbstractOOXMLExtractor.getXHTML(AbstractOOXMLExtractor.java:110)
org.apache.tika.parser.microsoft.ooxml.OOXMLExtractorFactory.parse(OOXMLExtractorFactory.java:112)
  org.apache.tika.parser.microsoft.ooxml.OOXMLParser.parse(OOXMLParser.java:87)
  org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
  org.apache.tika.parser.CompositeParser.parse(CompositeParser.java:280)
  org.apache.tika.parser.AutoDetectParser.parse(AutoDetectParser.java:120)
  com.imis.imisarc.server.parser.impl.ContentParser.extractContent(ContentParser.java:302)


Related Documents:



| Back | Main view