xref: /JGit/org.eclipse.jgit.lfs/src/org/eclipse/jgit/lfs/LfsPointer.java (revision 616a88895dfc36fd3c9fea7d010b03e6d2cf8c1d)
145ee55d0SChristian Halstrick /*
2aa052ea0SThomas Wolf  * Copyright (C) 2016, 2021 Christian Halstrick <christian.halstrick@sap.com> and others
345ee55d0SChristian Halstrick  *
45c5f7c6bSMatthias Sohn  * This program and the accompanying materials are made available under the
55c5f7c6bSMatthias Sohn  * terms of the Eclipse Distribution License v. 1.0 which is available at
65c5f7c6bSMatthias Sohn  * https://www.eclipse.org/org/documents/edl-v10.php.
745ee55d0SChristian Halstrick  *
85c5f7c6bSMatthias Sohn  * SPDX-License-Identifier: BSD-3-Clause
945ee55d0SChristian Halstrick  */
1045ee55d0SChristian Halstrick package org.eclipse.jgit.lfs;
1145ee55d0SChristian Halstrick 
1230c6c754SDavid Pursehouse import static java.nio.charset.StandardCharsets.UTF_8;
13171f84a0SDavid Pursehouse 
14*616a8889SThomas Wolf import java.io.BufferedInputStream;
15bcb5a431SChristian Halstrick import java.io.BufferedReader;
16*616a8889SThomas Wolf import java.io.ByteArrayInputStream;
17bcb5a431SChristian Halstrick import java.io.IOException;
18bcb5a431SChristian Halstrick import java.io.InputStream;
19bcb5a431SChristian Halstrick import java.io.InputStreamReader;
2045ee55d0SChristian Halstrick import java.io.OutputStream;
2145ee55d0SChristian Halstrick import java.io.PrintStream;
221572964eSChristian Halstrick import java.io.UnsupportedEncodingException;
23a4feeb01SMatthias Sohn import java.util.Locale;
24aa052ea0SThomas Wolf import java.util.Objects;
2545ee55d0SChristian Halstrick 
26bcb5a431SChristian Halstrick import org.eclipse.jgit.annotations.Nullable;
274b7747ccSMatthias Sohn import org.eclipse.jgit.lfs.lib.AnyLongObjectId;
2845ee55d0SChristian Halstrick import org.eclipse.jgit.lfs.lib.Constants;
2945ee55d0SChristian Halstrick import org.eclipse.jgit.lfs.lib.LongObjectId;
30*616a8889SThomas Wolf import org.eclipse.jgit.util.IO;
3145ee55d0SChristian Halstrick 
3245ee55d0SChristian Halstrick /**
3345ee55d0SChristian Halstrick  * Represents an LFS pointer file
3445ee55d0SChristian Halstrick  *
3545ee55d0SChristian Halstrick  * @since 4.6
3645ee55d0SChristian Halstrick  */
37c0bb9928SMarkus Duft public class LfsPointer implements Comparable<LfsPointer> {
3845ee55d0SChristian Halstrick 	/**
3945ee55d0SChristian Halstrick 	 * The version of the LfsPointer file format
4045ee55d0SChristian Halstrick 	 */
4145ee55d0SChristian Halstrick 	public static final String VERSION = "https://git-lfs.github.com/spec/v1"; //$NON-NLS-1$
4245ee55d0SChristian Halstrick 
4345ee55d0SChristian Halstrick 	/**
440e187f14SDariusz Luksza 	 * The version of the LfsPointer file format using legacy URL
450e187f14SDariusz Luksza 	 * @since 4.7
460e187f14SDariusz Luksza 	 */
470e187f14SDariusz Luksza 	public static final String VERSION_LEGACY = "https://hawser.github.com/spec/v1"; //$NON-NLS-1$
480e187f14SDariusz Luksza 
490e187f14SDariusz Luksza 	/**
50c0bb9928SMarkus Duft 	 * Don't inspect files that are larger than this threshold to avoid
51c0bb9928SMarkus Duft 	 * excessive reading. No pointer file should be larger than this.
52c0bb9928SMarkus Duft 	 * @since 4.11
53c0bb9928SMarkus Duft 	 */
54c0bb9928SMarkus Duft 	public static final int SIZE_THRESHOLD = 200;
55c0bb9928SMarkus Duft 
56c0bb9928SMarkus Duft 	/**
5745ee55d0SChristian Halstrick 	 * The name of the hash function as used in the pointer files. This will
5845ee55d0SChristian Halstrick 	 * evaluate to "sha256"
5945ee55d0SChristian Halstrick 	 */
6045ee55d0SChristian Halstrick 	public static final String HASH_FUNCTION_NAME = Constants.LONG_HASH_FUNCTION
61a4feeb01SMatthias Sohn 			.toLowerCase(Locale.ROOT).replace("-", ""); //$NON-NLS-1$ //$NON-NLS-2$
6245ee55d0SChristian Halstrick 
63*616a8889SThomas Wolf 	/**
64*616a8889SThomas Wolf 	 * {@link #SIZE_THRESHOLD} is too low; with lfs extensions a LFS pointer can
65*616a8889SThomas Wolf 	 * be larger. But 8kB should be more than enough.
66*616a8889SThomas Wolf 	 */
67*616a8889SThomas Wolf 	static final int FULL_SIZE_THRESHOLD = 8 * 1024;
68*616a8889SThomas Wolf 
69aa052ea0SThomas Wolf 	private final AnyLongObjectId oid;
7045ee55d0SChristian Halstrick 
71aa052ea0SThomas Wolf 	private final long size;
7245ee55d0SChristian Halstrick 
7345ee55d0SChristian Halstrick 	/**
74e0332bfbSMatthias Sohn 	 * <p>Constructor for LfsPointer.</p>
75e0332bfbSMatthias Sohn 	 *
7645ee55d0SChristian Halstrick 	 * @param oid
7745ee55d0SChristian Halstrick 	 *            the id of the content
7845ee55d0SChristian Halstrick 	 * @param size
7945ee55d0SChristian Halstrick 	 *            the size of the content
8045ee55d0SChristian Halstrick 	 */
LfsPointer(AnyLongObjectId oid, long size)814b7747ccSMatthias Sohn 	public LfsPointer(AnyLongObjectId oid, long size) {
8245ee55d0SChristian Halstrick 		this.oid = oid;
8345ee55d0SChristian Halstrick 		this.size = size;
8445ee55d0SChristian Halstrick 	}
8545ee55d0SChristian Halstrick 
8645ee55d0SChristian Halstrick 	/**
87e0332bfbSMatthias Sohn 	 * <p>Getter for the field <code>oid</code>.</p>
88e0332bfbSMatthias Sohn 	 *
8945ee55d0SChristian Halstrick 	 * @return the id of the content
9045ee55d0SChristian Halstrick 	 */
getOid()914b7747ccSMatthias Sohn 	public AnyLongObjectId getOid() {
9245ee55d0SChristian Halstrick 		return oid;
9345ee55d0SChristian Halstrick 	}
9445ee55d0SChristian Halstrick 
9545ee55d0SChristian Halstrick 	/**
96e0332bfbSMatthias Sohn 	 * <p>Getter for the field <code>size</code>.</p>
97e0332bfbSMatthias Sohn 	 *
9845ee55d0SChristian Halstrick 	 * @return the size of the content
9945ee55d0SChristian Halstrick 	 */
getSize()10045ee55d0SChristian Halstrick 	public long getSize() {
10145ee55d0SChristian Halstrick 		return size;
10245ee55d0SChristian Halstrick 	}
10345ee55d0SChristian Halstrick 
10445ee55d0SChristian Halstrick 	/**
10545ee55d0SChristian Halstrick 	 * Encode this object into the LFS format defined by {@link #VERSION}
10645ee55d0SChristian Halstrick 	 *
10745ee55d0SChristian Halstrick 	 * @param out
108e0332bfbSMatthias Sohn 	 *            the {@link java.io.OutputStream} into which the encoded data should be
10945ee55d0SChristian Halstrick 	 *            written
11045ee55d0SChristian Halstrick 	 */
encode(OutputStream out)11145ee55d0SChristian Halstrick 	public void encode(OutputStream out) {
1121572964eSChristian Halstrick 		try (PrintStream ps = new PrintStream(out, false,
11330c6c754SDavid Pursehouse 				UTF_8.name())) {
11445ee55d0SChristian Halstrick 			ps.print("version "); //$NON-NLS-1$
1151572964eSChristian Halstrick 			ps.print(VERSION + "\n"); //$NON-NLS-1$
11645ee55d0SChristian Halstrick 			ps.print("oid " + HASH_FUNCTION_NAME + ":"); //$NON-NLS-1$ //$NON-NLS-2$
1171572964eSChristian Halstrick 			ps.print(oid.name() + "\n"); //$NON-NLS-1$
11845ee55d0SChristian Halstrick 			ps.print("size "); //$NON-NLS-1$
1191572964eSChristian Halstrick 			ps.print(size + "\n"); //$NON-NLS-1$
1201572964eSChristian Halstrick 		} catch (UnsupportedEncodingException e) {
1211572964eSChristian Halstrick 			// should not happen, we are using a standard charset
12245ee55d0SChristian Halstrick 		}
12345ee55d0SChristian Halstrick 	}
12445ee55d0SChristian Halstrick 
125bcb5a431SChristian Halstrick 	/**
126bcb5a431SChristian Halstrick 	 * Try to parse the data provided by an InputStream to the format defined by
127*616a8889SThomas Wolf 	 * {@link #VERSION}. If the given stream supports mark and reset as
128*616a8889SThomas Wolf 	 * indicated by {@link InputStream#markSupported()}, its input position will
129*616a8889SThomas Wolf 	 * be reset if the stream content is not actually a LFS pointer (i.e., when
130*616a8889SThomas Wolf 	 * {@code null} is returned). If the stream content is an invalid LFS
131*616a8889SThomas Wolf 	 * pointer or the given stream does not support mark/reset, the input
132*616a8889SThomas Wolf 	 * position may not be reset.
133bcb5a431SChristian Halstrick 	 *
134bcb5a431SChristian Halstrick 	 * @param in
135e0332bfbSMatthias Sohn 	 *            the {@link java.io.InputStream} from where to read the data
136*616a8889SThomas Wolf 	 * @return an {@link org.eclipse.jgit.lfs.LfsPointer} or {@code null} if the
137*616a8889SThomas Wolf 	 *         stream was not parseable as LfsPointer
138e0332bfbSMatthias Sohn 	 * @throws java.io.IOException
139bcb5a431SChristian Halstrick 	 */
140bcb5a431SChristian Halstrick 	@Nullable
parseLfsPointer(InputStream in)141bcb5a431SChristian Halstrick 	public static LfsPointer parseLfsPointer(InputStream in)
142bcb5a431SChristian Halstrick 			throws IOException {
143*616a8889SThomas Wolf 		if (in.markSupported()) {
144*616a8889SThomas Wolf 			return parse(in);
145*616a8889SThomas Wolf 		}
146*616a8889SThomas Wolf 		// Fallback; note that while parse() resets its input stream, that won't
147*616a8889SThomas Wolf 		// reset "in".
148*616a8889SThomas Wolf 		return parse(new BufferedInputStream(in));
149*616a8889SThomas Wolf 	}
150*616a8889SThomas Wolf 
151*616a8889SThomas Wolf 	@Nullable
parse(InputStream in)152*616a8889SThomas Wolf 	private static LfsPointer parse(InputStream in)
153*616a8889SThomas Wolf 			throws IOException {
154*616a8889SThomas Wolf 		if (!in.markSupported()) {
155*616a8889SThomas Wolf 			// No translation; internal error
156*616a8889SThomas Wolf 			throw new IllegalArgumentException(
157*616a8889SThomas Wolf 					"LFS pointer parsing needs InputStream.markSupported() == true"); //$NON-NLS-1$
158*616a8889SThomas Wolf 		}
159*616a8889SThomas Wolf 		// Try reading only a short block first.
160*616a8889SThomas Wolf 		in.mark(SIZE_THRESHOLD);
161*616a8889SThomas Wolf 		byte[] preamble = new byte[SIZE_THRESHOLD];
162*616a8889SThomas Wolf 		int length = IO.readFully(in, preamble, 0);
163*616a8889SThomas Wolf 		if (length < preamble.length || in.read() < 0) {
164*616a8889SThomas Wolf 			// We have the whole file. Try to parse a pointer from it.
165*616a8889SThomas Wolf 			try (BufferedReader r = new BufferedReader(new InputStreamReader(
166*616a8889SThomas Wolf 					new ByteArrayInputStream(preamble, 0, length), UTF_8))) {
167*616a8889SThomas Wolf 				LfsPointer ptr = parse(r);
168*616a8889SThomas Wolf 				if (ptr == null) {
169*616a8889SThomas Wolf 					in.reset();
170*616a8889SThomas Wolf 				}
171*616a8889SThomas Wolf 				return ptr;
172*616a8889SThomas Wolf 			}
173*616a8889SThomas Wolf 		}
174*616a8889SThomas Wolf 		// Longer than SIZE_THRESHOLD: expect "version" to be the first line.
175*616a8889SThomas Wolf 		boolean hasVersion = checkVersion(preamble);
176*616a8889SThomas Wolf 		in.reset();
177*616a8889SThomas Wolf 		if (!hasVersion) {
178*616a8889SThomas Wolf 			return null;
179*616a8889SThomas Wolf 		}
180*616a8889SThomas Wolf 		in.mark(FULL_SIZE_THRESHOLD);
181*616a8889SThomas Wolf 		byte[] fullPointer = new byte[FULL_SIZE_THRESHOLD];
182*616a8889SThomas Wolf 		length = IO.readFully(in, fullPointer, 0);
183*616a8889SThomas Wolf 		if (length == fullPointer.length && in.read() >= 0) {
184*616a8889SThomas Wolf 			in.reset();
185*616a8889SThomas Wolf 			return null; // Too long.
186*616a8889SThomas Wolf 		}
187*616a8889SThomas Wolf 		try (BufferedReader r = new BufferedReader(new InputStreamReader(
188*616a8889SThomas Wolf 				new ByteArrayInputStream(fullPointer, 0, length), UTF_8))) {
189*616a8889SThomas Wolf 			LfsPointer ptr = parse(r);
190*616a8889SThomas Wolf 			if (ptr == null) {
191*616a8889SThomas Wolf 				in.reset();
192*616a8889SThomas Wolf 			}
193*616a8889SThomas Wolf 			return ptr;
194*616a8889SThomas Wolf 		}
195*616a8889SThomas Wolf 	}
196*616a8889SThomas Wolf 
parse(BufferedReader r)197*616a8889SThomas Wolf 	private static LfsPointer parse(BufferedReader r) throws IOException {
198bcb5a431SChristian Halstrick 		boolean versionLine = false;
199bcb5a431SChristian Halstrick 		LongObjectId id = null;
200bcb5a431SChristian Halstrick 		long sz = -1;
201aa052ea0SThomas Wolf 		// This parsing is a bit too general if we go by the spec at
202aa052ea0SThomas Wolf 		// https://github.com/git-lfs/git-lfs/blob/master/docs/spec.md
203*616a8889SThomas Wolf 		// Comment lines are not mentioned in the spec, the "version" line
204*616a8889SThomas Wolf 		// MUST be the first, and keys are ordered alphabetically.
205*616a8889SThomas Wolf 		for (String s = r.readLine(); s != null; s = r.readLine()) {
206bcb5a431SChristian Halstrick 			if (s.startsWith("#") || s.length() == 0) { //$NON-NLS-1$
207bcb5a431SChristian Halstrick 				continue;
208aa052ea0SThomas Wolf 			} else if (s.startsWith("version")) { //$NON-NLS-1$
209*616a8889SThomas Wolf 				if (versionLine || !checkVersionLine(s)) {
210aa052ea0SThomas Wolf 					return null; // Not a LFS pointer
211aa052ea0SThomas Wolf 				}
212*616a8889SThomas Wolf 				versionLine = true;
213aa052ea0SThomas Wolf 			} else {
214aa052ea0SThomas Wolf 				try {
215aa052ea0SThomas Wolf 					if (s.startsWith("oid sha256:")) { //$NON-NLS-1$
216aa052ea0SThomas Wolf 						if (id != null) {
217aa052ea0SThomas Wolf 							return null; // Not a LFS pointer
218aa052ea0SThomas Wolf 						}
219*616a8889SThomas Wolf 						id = LongObjectId.fromString(s.substring(11).trim());
220aa052ea0SThomas Wolf 					} else if (s.startsWith("size")) { //$NON-NLS-1$
221*616a8889SThomas Wolf 						if (sz > 0 || s.length() < 5 || s.charAt(4) != ' ') {
222aa052ea0SThomas Wolf 							return null; // Not a LFS pointer
223aa052ea0SThomas Wolf 						}
224bcb5a431SChristian Halstrick 						sz = Long.parseLong(s.substring(5).trim());
225bcb5a431SChristian Halstrick 					}
226aa052ea0SThomas Wolf 				} catch (RuntimeException e) {
227aa052ea0SThomas Wolf 					// We could not parse the line. If we have a version
228aa052ea0SThomas Wolf 					// already, this is a corrupt LFS pointer. Otherwise it
229aa052ea0SThomas Wolf 					// is just not an LFS pointer.
230aa052ea0SThomas Wolf 					if (versionLine) {
231aa052ea0SThomas Wolf 						throw e;
232aa052ea0SThomas Wolf 					}
233aa052ea0SThomas Wolf 					return null;
234aa052ea0SThomas Wolf 				}
235aa052ea0SThomas Wolf 			}
236bcb5a431SChristian Halstrick 			if (versionLine && id != null && sz > -1) {
237bcb5a431SChristian Halstrick 				return new LfsPointer(id, sz);
238bcb5a431SChristian Halstrick 			}
239bcb5a431SChristian Halstrick 		}
240bcb5a431SChristian Halstrick 		return null;
241bcb5a431SChristian Halstrick 	}
242bcb5a431SChristian Halstrick 
checkVersion(byte[] data)243*616a8889SThomas Wolf 	private static boolean checkVersion(byte[] data) {
244*616a8889SThomas Wolf 		// According to the spec at
245*616a8889SThomas Wolf 		// https://github.com/git-lfs/git-lfs/blob/master/docs/spec.md
246*616a8889SThomas Wolf 		// it MUST always be the first line.
247*616a8889SThomas Wolf 		try (BufferedReader r = new BufferedReader(
248*616a8889SThomas Wolf 				new InputStreamReader(new ByteArrayInputStream(data), UTF_8))) {
249*616a8889SThomas Wolf 			String s = r.readLine();
250*616a8889SThomas Wolf 			if (s != null && s.startsWith("version")) { //$NON-NLS-1$
251*616a8889SThomas Wolf 				return checkVersionLine(s);
252*616a8889SThomas Wolf 			}
253*616a8889SThomas Wolf 		} catch (IOException e) {
254*616a8889SThomas Wolf 			// Doesn't occur, we're reading from a byte array!
255*616a8889SThomas Wolf 		}
256*616a8889SThomas Wolf 		return false;
257*616a8889SThomas Wolf 	}
258*616a8889SThomas Wolf 
checkVersionLine(String s)259*616a8889SThomas Wolf 	private static boolean checkVersionLine(String s) {
260*616a8889SThomas Wolf 		if (s.length() < 8 || s.charAt(7) != ' ') {
261*616a8889SThomas Wolf 			return false; // Not a valid LFS pointer version line
262*616a8889SThomas Wolf 		}
263*616a8889SThomas Wolf 		String rest = s.substring(8).trim();
264*616a8889SThomas Wolf 		return VERSION.equals(rest) || VERSION_LEGACY.equals(rest);
265*616a8889SThomas Wolf 	}
266*616a8889SThomas Wolf 
267e0332bfbSMatthias Sohn 	/** {@inheritDoc} */
26845ee55d0SChristian Halstrick 	@Override
toString()26945ee55d0SChristian Halstrick 	public String toString() {
2704b7747ccSMatthias Sohn 		return "LfsPointer: oid=" + oid.name() + ", size=" //$NON-NLS-1$ //$NON-NLS-2$
27145ee55d0SChristian Halstrick 				+ size;
27245ee55d0SChristian Halstrick 	}
273c0bb9928SMarkus Duft 
274c0bb9928SMarkus Duft 	/**
275c0bb9928SMarkus Duft 	 * @since 4.11
276c0bb9928SMarkus Duft 	 */
277c0bb9928SMarkus Duft 	@Override
compareTo(LfsPointer o)278c0bb9928SMarkus Duft 	public int compareTo(LfsPointer o) {
279c0bb9928SMarkus Duft 		int x = getOid().compareTo(o.getOid());
280c0bb9928SMarkus Duft 		if (x != 0) {
281c0bb9928SMarkus Duft 			return x;
282c0bb9928SMarkus Duft 		}
283c0bb9928SMarkus Duft 
2843e2ec7e5SDavid Ostrovsky 		return Long.compare(getSize(), o.getSize());
285c0bb9928SMarkus Duft 	}
286aa052ea0SThomas Wolf 
287aa052ea0SThomas Wolf 	@Override
hashCode()288aa052ea0SThomas Wolf 	public int hashCode() {
289aa052ea0SThomas Wolf 		return Objects.hash(getOid()) * 31 + Long.hashCode(getSize());
29045ee55d0SChristian Halstrick 	}
291bcb5a431SChristian Halstrick 
292aa052ea0SThomas Wolf 	@Override
equals(Object obj)293aa052ea0SThomas Wolf 	public boolean equals(Object obj) {
294aa052ea0SThomas Wolf 		if (this == obj) {
295aa052ea0SThomas Wolf 			return true;
296aa052ea0SThomas Wolf 		}
297aa052ea0SThomas Wolf 		if (obj == null || getClass() != obj.getClass()) {
298aa052ea0SThomas Wolf 			return false;
299aa052ea0SThomas Wolf 		}
300aa052ea0SThomas Wolf 		LfsPointer other = (LfsPointer) obj;
301aa052ea0SThomas Wolf 		return Objects.equals(getOid(), other.getOid())
302aa052ea0SThomas Wolf 				&& getSize() == other.getSize();
303aa052ea0SThomas Wolf 	}
304aa052ea0SThomas Wolf }
305