@@ -2885,6 +2885,16 @@ class P4Sync(Command, P4UserMap):
print("\nIgnoring apple filetype file %s" % file['depotFile'])
return
+ if type_base == "utf8":
+ # The type utf8 explicitly means utf8 *with BOM*. These are
+ # streamed just like regular text files, however, without
+ # the BOM in the stream.
+ # Therefore, to accurately import these files into git, we
+ # need to explicitly re-add the BOM before writing.
+ # 'contents' is a set of bytes in this case, so create the
+ # BOM prefix as a b'' literal.
+ contents = [b'\xef\xbb\xbf' + contents[0]] + contents[1:]
+
# Note that we do not try to de-mangle keywords on utf16 files,
# even though in theory somebody may want that.
regexp = p4_keywords_regexp_for_type(type_base, type_mods)
@@ -333,4 +333,38 @@ test_expect_success SYMLINKS 'empty symlink target' '
)
'
+test_expect_success SYMLINKS 'utf-8 with and without BOM in text file' '
+ (
+ cd "$cli" &&
+
+ # some utf8 content
+ echo some tǣxt >utf8-nobom-test &&
+
+ # same utf8 content as before but with bom
+ echo some tǣxt | sed '\''s/^/\xef\xbb\xbf/'\'' >utf8-bom-test &&
+
+ # bom only
+ dd bs=1 count=3 if=utf8-bom-test of=utf8-bom-empty-test &&
+
+ p4 add utf8-nobom-test utf8-bom-test utf8-bom-empty-test &&
+ p4 submit -d "add utf8 test files"
+ ) &&
+ test_when_finished cleanup_git &&
+
+ git p4 clone --dest="$git" //depot@all &&
+ (
+ cd "$git" &&
+ git checkout refs/remotes/p4/master &&
+
+ echo some tǣxt >utf8-nobom-check &&
+ test_cmp utf8-nobom-check utf8-nobom-test &&
+
+ echo some tǣxt | sed '\''s/^/\xef\xbb\xbf/'\'' >utf8-bom-check &&
+ test_cmp utf8-bom-check utf8-bom-test &&
+
+ dd bs=1 count=3 if=utf8-bom-check of=utf8-bom-empty-check &&
+ test_cmp utf8-bom-empty-check utf8-bom-empty-test
+ )
+'
+
test_done