/*
* This file is part of Bracket Properties
* Copyright 2011 David R. Smith
*
*/
package asia.redact.bracket.properties;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import asia.redact.bracket.properties.Properties.Mode;
import asia.redact.bracket.util.AsciiToNativeFilterReader;
/**
* <pre>
* Parse a properties file conforming to the description at
* http://download.oracle.com/javase/6/docs/api/java/util/Properties.html#load(java.io.Reader)
* into tokens.
*
* There is one additional extension: a comment line which starts with #;; is treated
* as transient (not read in). This is used later to generate a transient header and footer
*
* This is an "off-line" lexer, it is backed by a String as input, which implies it is memory
* fed. That's not a problem for all but the biggest properties files on contemporary hardware.
*
* </pre>
*
* @author Dave
*
* @see PropertiesToken
* @see PropertiesTokenType
* @see InputAdapter
*/
public class PropertiesLexer {
final String input;
int index;
final List<PropertiesToken> list = new ArrayList<PropertiesToken>();
private final Lock lock = new ReentrantLock();
public PropertiesLexer(String input) {
super();
this.input = input;
}
/**
* Convenience method, swallows the input whole
*
* @param in
*/
public PropertiesLexer(Reader in){
this(new InputAdapter().asString(in));
}
/**
* Convenience method, swallows the input whole
*
* @param in
*/
public PropertiesLexer(File in){
this(new InputAdapter().asString(in));
}
/**
* Convenience method, swallows the input whole
*
* @param in
*/
public PropertiesLexer(InputStream in){
if(Properties.Factory.mode == Mode.Compatibility){
// filter for unicode escapes
AsciiToNativeFilterReader reader = new AsciiToNativeFilterReader(new InputStreamReader(in));
StringBuilder builder = new StringBuilder();
char [] array = new char[8120];
int count = 0;
try {
while((count = reader.read(array))!= -1) {
builder.append(array,0,count);
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
input = builder.toString();
}else{
// normal, String will be in the default encoding, normally UTF-8
input = new InputAdapter().asString(in);
}
}
public void lex() {
lock.lock();
try {
if(input==null)return;
PropertiesToken lastEol = null;
while(hasNext()){
StringBuffer buf = new StringBuffer();
PropertiesToken eol = null;
while(hasNext() && ((eol=scanLineBreak())==null)){
buf.append(next());
}
analyzeLine(buf.toString(),lastEol);
if(eol == null) {
return;
}
lastEol = eol;
list.add(eol);
index+=eol.text.length();
}
}finally{
list.add(PropertiesToken.eof());
lock.unlock();
}
}
private void analyzeLine(String buf, PropertiesToken tok){
lock.lock();
try {
//strip off any leading white space
int countBlank = 0;
for(int i =0;i<buf.length();i++){
if(Character.isWhitespace(buf.charAt(i))){
countBlank++;
continue;
}else{
break;
}
}
if(countBlank>0)buf=buf.substring(countBlank);
// this should handle blank lines
if(buf.isEmpty())return;
// do not keep our Last Generated header banner or our End footer banner
if((buf.length() > 3) && (buf.charAt(0)== '#' && buf.charAt(1)== ';' && buf.charAt(2)==';')){
return;
}
char ch = buf.charAt(0);
switch(ch){
case '#':
case '!': comment(buf); break;
default: {
scanKeyValue(buf,tok);
}
}
}finally{
lock.unlock();
}
}
private void scanKeyValue(String buf,PropertiesToken eol){
lock.lock();
try {
int sepIndex=-1;
char previous = '\0';
char ch = '\0';
boolean sawEscapedDelimiter=false;
for(int i=0;i<buf.length();i++){
previous=ch;
ch = buf.charAt(i);
if((ch == '='||ch==':') && previous == '\\'){
sawEscapedDelimiter=true;
}
if((ch == '='||ch==':') && previous != '\\'){
//I see an actual delimiter
sepIndex=i;
break;
}
}
if(sepIndex == -1){
//no separator found, test if continuation...
if(eol !=null && eol.type==PropertiesTokenType.LOGICAL_LINE_BREAK){
list.add(new PropertiesToken(PropertiesTokenType.VALUE,buf));
}
}else{
// key and value
//first purge escaped delimiters from the key, if needed
if(sawEscapedDelimiter){
String key = buf.substring(0, sepIndex);
previous = '\0';
ch = '\0';
StringBuilder b = new StringBuilder();
for(int i=0;i<key.length();i++){
previous=ch;
ch = buf.charAt(i);
if((ch == '='||ch==':') && previous == '\\'){
b.deleteCharAt(b.length()-1);
}
b.append(ch);
}
// use cleaned up key
// Issue #1 - need to trim key also of whitespace
list.add(new PropertiesToken(PropertiesTokenType.KEY,b.toString().trim()));
}else{
//delimiter not escaped, so just use key as is
// Issue #1 - but need to trim key of whitespace
list.add(new PropertiesToken(PropertiesTokenType.KEY,buf.substring(0, sepIndex).trim()));
}
list.add(new PropertiesToken(PropertiesTokenType.SEPARATOR,String.valueOf(buf.charAt(sepIndex))));
list.add(new PropertiesToken(PropertiesTokenType.VALUE,buf.substring(sepIndex+1, buf.length())));
}
}finally{
lock.unlock();
}
}
private void comment(String buf){
lock.lock();
try {
list.add(new PropertiesToken(PropertiesTokenType.COMMENT,buf));
}finally{
lock.unlock();
}
}
private PropertiesToken scanLineBreak(){
lock.lock();
try {
if(la(0)=='\\' && la(1) == '\r'&&la(2)=='\n'){
return new PropertiesToken(PropertiesTokenType.LOGICAL_LINE_BREAK,"\\\r\n");
}else if(la(0)=='\\' && la(1) == '\r'){
return new PropertiesToken(PropertiesTokenType.LOGICAL_LINE_BREAK,"\\\r");
}else if(la(0)=='\\' && la(1) == '\n'){
return new PropertiesToken(PropertiesTokenType.LOGICAL_LINE_BREAK,"\\\n");
}else if(la(0) == '\r'&&la(1)=='\n'){
return new PropertiesToken(PropertiesTokenType.NATURAL_LINE_BREAK,"\r\n");
}else if(la(0) == '\r'){
return new PropertiesToken(PropertiesTokenType.NATURAL_LINE_BREAK,"\r");
}else if(la(0) == '\n'){
return new PropertiesToken(PropertiesTokenType.NATURAL_LINE_BREAK,"\n");
}
return null;
}finally{
lock.unlock();
}
}
private boolean hasNext() {
lock.lock();
try {
return index < input.length();
}finally{
lock.unlock();
}
}
private char next() {
lock.lock();
try {
if(index >= input.length()) {
System.err.println("problem");
}
char ch = input.charAt(index);
index++;
return ch;
}finally{
lock.unlock();
}
}
private Character la(int count) {
lock.lock();
try {
try {
return input.charAt(index+count);
}catch(IndexOutOfBoundsException x){
return '\0';
}
}finally{
lock.unlock();
}
}
public List<PropertiesToken> getList() {
return list;
}
}